echauchot commented on a change in pull request #15381:
URL: https://github.com/apache/beam/pull/15381#discussion_r725913649



##########
File path: 
sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
##########
@@ -2214,7 +2502,40 @@ private void flushBatch() throws IOException, 
InterruptedException {
           }
           responseEntity = handleRetry("POST", endPoint, 
Collections.emptyMap(), requestBody);
         }
-        checkForErrors(responseEntity, spec.getAllowedResponseErrors());
+
+        List<WriteSummary> responses =
+            checkForErrors(
+                responseEntity, spec.getAllowedResponseErrors(), 
spec.getThrowWriteErrors());
+
+        return mergeInputsAndResponses(inputEntries, responses);
+      }
+
+      private static Multimap<BoundedWindow, WriteSummary> 
mergeInputsAndResponses(
+          List<Entry<BoundedWindow, WriteSummary>> inputs, List<WriteSummary> 
responses) {
+
+        checkArgument(
+            inputs.size() == responses.size(), "inputs and responses must be 
of same size");
+
+        Multimap<BoundedWindow, WriteSummary> results = 
ArrayListMultimap.create();
+
+        // N.B. the order of responses must always match the order of inputs
+        for (int i = 0; i < inputs.size(); i++) {
+          BoundedWindow outputWindow = inputs.get(i).getKey();
+
+          // Contains raw input document and Bulk directive counterpart only
+          WriteSummary inputDoc = inputs.get(i).getValue();
+
+          // Contains stringified JSON response from Elasticsearch and error 
status only
+          WriteSummary outputDoc = responses.get(i);
+
+          WriteSummary merged =

Review comment:
       Plkease add a comment: contains, all the `WriteSummary` fields set 
matching inputDoc and write response

##########
File path: 
sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
##########
@@ -237,49 +250,64 @@ static JsonNode parseResponse(HttpEntity responseEntity) 
throws IOException {
     return mapper.readValue(responseEntity.getContent(), JsonNode.class);
   }
 
-  static void checkForErrors(HttpEntity responseEntity, @Nullable Set<String> 
allowedErrorTypes)
+  static List<WriteSummary> checkForErrors(
+      HttpEntity responseEntity, @Nullable Set<String> allowedErrorTypes, 
boolean throwWriteErrors)
       throws IOException {
 
+    List<WriteSummary> responses = new ArrayList<>();
+    int numErrors = 0;
     JsonNode searchResult = parseResponse(responseEntity);
-    boolean errors = searchResult.path("errors").asBoolean();
-    if (errors) {
-      int numErrors = 0;
-
-      StringBuilder errorMessages =
-          new StringBuilder("Error writing to Elasticsearch, some elements 
could not be inserted:");
-      JsonNode items = searchResult.path("items");
-      if (items.isMissingNode() || items.size() == 0) {
-        errorMessages.append(searchResult.toString());
-      }
-      // some items present in bulk might have errors, concatenate error 
messages
-      for (JsonNode item : items) {
-        JsonNode error = item.findValue("error");
-        if (error != null) {
-          // N.B. An empty-string within the allowedErrorTypes Set implies all 
errors are allowed.
-          String type = error.path("type").asText();
-          String reason = error.path("reason").asText();
-          String docId = item.findValue("_id").asText();
-          JsonNode causedBy = error.path("caused_by"); // May not be present
-          String cbReason = causedBy.path("reason").asText();
-          String cbType = causedBy.path("type").asText();
-
-          if (allowedErrorTypes == null
-              || (!allowedErrorTypes.contains(type) && 
!allowedErrorTypes.contains(cbType))) {
-            // 'error' and 'causedBy` fields are not null, and the error is 
not being ignored.
-            numErrors++;
-
-            errorMessages.append(String.format("%nDocument id %s: %s (%s)", 
docId, reason, type));
-
-            if (!causedBy.isMissingNode()) {
-              errorMessages.append(String.format("%nCaused by: %s (%s)", 
cbReason, cbType));
-            }
+    StringBuilder errorMessages =
+        new StringBuilder("Error writing to Elasticsearch, some elements could 
not be inserted:");
+    JsonNode items = searchResult.path("items");
+
+    if (items.isMissingNode() || items.size() == 0) {
+      // This would only be expected in cases like connectivity issues or 
similar
+      errorMessages.append(searchResult.toString());
+      throw new RuntimeException(

Review comment:
       ping ?

##########
File path: 
sdks/java/io/elasticsearch-tests/elasticsearch-tests-5/src/test/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIOIT.java
##########
@@ -141,6 +141,18 @@ public void testWriteWithAllowableErrors() throws 
Exception {
     elasticsearchIOTestCommon.testWriteWithAllowedErrors();
   }
 
+  @Test

Review comment:
       ping ?

##########
File path: 
sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
##########
@@ -237,49 +250,64 @@ static JsonNode parseResponse(HttpEntity responseEntity) 
throws IOException {
     return mapper.readValue(responseEntity.getContent(), JsonNode.class);
   }
 
-  static void checkForErrors(HttpEntity responseEntity, @Nullable Set<String> 
allowedErrorTypes)
+  static List<WriteSummary> checkForErrors(

Review comment:
       ping ?

##########
File path: 
sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
##########
@@ -1623,11 +1651,153 @@ public void setup() throws IOException {
 
       @ProcessElement
       public void processElement(ProcessContext c) throws IOException {
-        c.output(createBulkApiEntity(spec, c.element(), backendVersion));
+        String inputDoc = c.element();
+        String bulkDirective = createBulkApiEntity(spec, inputDoc, 
backendVersion);
+        c.output(
+            WriteSummary.create()
+                .withInputDoc(inputDoc)
+                .withBulkDirective(bulkDirective)
+                // N.B. Saving the element timestamp for later use allows for 
exactly emulating
+                // c.output(...) because c.output is equivalent to
+                // c.outputWithTimestamp(..., c.timestamp())
+                .withTimestamp(c.timestamp()));
       }
     }
   }
 
+  public static class WriteSummaryCoder extends AtomicCoder<WriteSummary> 
implements Serializable {
+    private static final WriteSummaryCoder INSTANCE = new WriteSummaryCoder();
+
+    private WriteSummaryCoder() {}
+
+    public static WriteSummaryCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(WriteSummary value, OutputStream outStream) throws 
IOException {
+      NullableCoder.of(StringUtf8Coder.of()).encode(value.getInputDoc(), 
outStream);
+      NullableCoder.of(StringUtf8Coder.of()).encode(value.getBulkDirective(), 
outStream);
+      BooleanCoder.of().encode(value.getHasError(), outStream);
+      
NullableCoder.of(StringUtf8Coder.of()).encode(value.getResponseItemJson(), 
outStream);
+      NullableCoder.of(InstantCoder.of()).encode(value.getTimestamp(), 
outStream);
+    }
+
+    @Override
+    public WriteSummary decode(InputStream inStream) throws IOException {
+      String inputDoc = 
NullableCoder.of(StringUtf8Coder.of()).decode(inStream);
+      String bulkDirective = 
NullableCoder.of(StringUtf8Coder.of()).decode(inStream);
+      boolean hasError = BooleanCoder.of().decode(inStream);
+      String responseItemJson = 
NullableCoder.of(StringUtf8Coder.of()).decode(inStream);
+      Instant timestamp = NullableCoder.of(InstantCoder.of()).decode(inStream);
+
+      return WriteSummary.create()
+          .withInputDoc(inputDoc)
+          .withBulkDirective(bulkDirective)
+          .withHasError(hasError)
+          .withResponseItemJson(responseItemJson)
+          .withTimestamp(timestamp);
+    }
+  }
+
+  // Immutable POJO for maintaining various states of documents and their bulk 
representation, plus
+  // response from ES for the given document and the timestamp of the data
+  @DefaultCoder(WriteSummaryCoder.class)
+  @AutoValue
+  public abstract static class WriteSummary implements Serializable {

Review comment:
       ping ?

##########
File path: 
sdks/java/io/elasticsearch/src/main/java/org/apache/beam/sdk/io/elasticsearch/ElasticsearchIO.java
##########
@@ -1789,8 +1964,14 @@ public Write withAllowableResponseErrors(@Nullable 
Set<String> allowableResponse
       return this;
     }
 
+    /** Refer to {@link BulkIO#withThrowWriteErrors}. */
+    public Write withThrowWriteErrors(boolean throwWriteErrors) {

Review comment:
       ping ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to