This is an automated email from the ASF dual-hosted git repository.
htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 9340d8c44d Avoid writing empty files when COPYing TO S3
9340d8c44d is described below
commit 9340d8c44d594829c1761700c24436b70ec9241a
Author: Hussain Towaileb <[email protected]>
AuthorDate: Mon Apr 8 20:51:02 2024 +0300
Avoid writing empty files when COPYing TO S3
Change-Id: Iee18cb7458495471fe38e5f6f2f3721d8ce9c12f
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18230
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Hussain Towaileb <[email protected]>
---
.../org/apache/asterix/runtime/writer/ExternalFileWriter.java | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/writer/ExternalFileWriter.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/writer/ExternalFileWriter.java
index f9f98dae29..95dc9623f2 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/writer/ExternalFileWriter.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/writer/ExternalFileWriter.java
@@ -57,11 +57,17 @@ final class ExternalFileWriter implements IExternalWriter {
// Ignore writing values for unresolvable partition paths
return;
}
- writer.write(value);
- tupleCounter++;
+
+ // create a new file only when we reach the maximum tuples and we know
a new tuple is incoming
+ // e.g., if max is 1000, we hit tuple 1001, we will upload and create
a new file, if we only have 1000
+ // we will stop here, and calling the close/finish will upload
whatever is written. This is to avoid
+ // creating and uploading empty files
if (tupleCounter >= maxResultPerFile) {
newFile();
}
+
+ writer.write(value);
+ tupleCounter++;
}
@Override