pitrou commented on a change in pull request #8302:
URL: https://github.com/apache/arrow/pull/8302#discussion_r497538522



##########
File path: cpp/src/arrow/ipc/writer.cc
##########
@@ -1020,13 +1014,30 @@ class ARROW_EXPORT IpcFormatWriter : public 
RecordBatchWriter {
   Status WriteDictionaries(const RecordBatch& batch) {
     ARROW_ASSIGN_OR_RAISE(const auto dictionaries, CollectDictionaries(batch, 
mapper_));
 
+    // TODO: Check for delta dictionaries. Can we scan for deltas while 
computing
+    // the RecordBatch payload to save time?
+
     for (const auto& pair : dictionaries) {
       IpcPayload payload;
       int64_t dictionary_id = pair.first;
       const auto& dictionary = pair.second;
 
+      // If a dictionary with this id was already emitted, check if it was the 
same.
+      // Note we don't compare dictionaries by value because it may be costly.
+      auto* last_dictionary = &last_dictionaries_[dictionary_id];
+      {
+        const auto maybe_last_dict = last_dictionary->lock();
+        if (maybe_last_dict && maybe_last_dict->data() == dictionary->data()) {
+          // Same dictionary data by pointer => no need to emit it again
+          continue;
+        }
+      }
+
       RETURN_NOT_OK(GetDictionaryPayload(dictionary_id, dictionary, options_, 
&payload));
       RETURN_NOT_OK(payload_writer_->WritePayload(payload));

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to