Consider reaching out to [email protected] On Sat, Dec 16, 2017 at 11:39 PM, Joshua Fox <[email protected]> wrote:
> Thank you for fixing BEAM-991 > <https://issues.apache.org/jira/browse/BEAM-991>. > > 1. The special Datastore library bundled in Dataflow/Beam gives "datastore > transaction or write too big" for some Entities. > > - See below for stacktrace > - We have no transaction here, just org.apache.beam.sdk.io. > gcp.datastore.DatastoreV1.Write. > - I am familiar with various exceptions in the Datastore Cloud API > that indicate that the entity or the index is too big. But, if we > do the same put action in the Datastore Cloud API, we get > com.google.cloud.datastore.DatastoreException: I/O error, strangely > with no stacktrace. > > How do we get more diagnostic info? > > > 2. A single failure of a batch-put by Write means that all puts in the > batch (~500) fail. > > > Does Write have a fallback mechanism? > > For example: Retry in batches of 250, recursively splitting in two on > failure. Eventually only one will fail, and the others will succeed. > > > > > ------------------------------------------------------------ > ------------------------------------------------------------ > ---------------- > > (c3f1654ffadc5b23): java.lang.RuntimeException: > org.apache.beam.sdk.util.UserCodeException: > com.google.datastore.v1.client.DatastoreException: > datastore transaction or write too big., code=INVALID_ARGUMENT > at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1 > .output(GroupAlsoByWindowsParDoFn.java:182) > at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner$1 > .outputWindowedValue(GroupAlsoByWindowFnRunner.java:104) > at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindow > ReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:54) > at com.google.cloud.dataflow.worker.util.BatchGroupAlsoByWindow > ReshuffleFn.processElement(BatchGroupAlsoByWindowReshuffleFn.java:37) > at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.i > nvokeProcessElement(GroupAlsoByWindowFnRunner.java:117) > at com.google.cloud.dataflow.worker.GroupAlsoByWindowFnRunner.p > rocessElement(GroupAlsoByWindowFnRunner.java:74) > at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn.p > rocessElement(GroupAlsoByWindowsParDoFn.java:113) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.util.common.worker.ReadOper > ation.runReadLoop(ReadOperation.java:187) > at com.google.cloud.dataflow.worker.util.common.worker.ReadOper > ation.start(ReadOperation.java:148) > at com.google.cloud.dataflow.worker.util.common.worker.MapTaskE > xecutor.execute(MapTaskExecutor.java:68) > at com.google.cloud.dataflow.worker.DataflowWorker.executeWork( > DataflowWorker.java:330) > at com.google.cloud.dataflow.worker.DataflowWorker.doWork(Dataf > lowWorker.java:302) > at com.google.cloud.dataflow.worker.DataflowWorker.getAndPerfor > mWork(DataflowWorker.java:251) > at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$ > WorkerThread.doWork(DataflowBatchWorkerHarness.java:135) > at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$ > WorkerThread.call(DataflowBatchWorkerHarness.java:115) > at com.google.cloud.dataflow.worker.DataflowBatchWorkerHarness$ > WorkerThread.call(DataflowBatchWorkerHarness.java:102) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool > Executor.java:1142) > at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo > lExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.apache.beam.sdk.util.UserCodeException: > com.google.datastore.v1.client.DatastoreException: datastore transaction > or write too big., code=INVALID_ARGUMENT > at org.apache.beam.sdk.util.UserCodeException.wrap(UserCodeExce > ption.java:36) > at org.apache.beam.sdk.io.gcp.datastore.DatastoreV1$DatastoreWr > iterFn$DoFnInvoker.invokeProcessElement(Unknown Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(Simp > leParDoFn.java:272) > at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowed > Value(SimpleDoFnRunner.java:211) > at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(Sim > pleDoFnRunner.java:66) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:436) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:424) > at org.apache.beam.sdk.transforms.MapElements$1.processElement( > MapElements.java:122) > at > org.apache.beam.sdk.transforms.MapElements$1$DoFnInvoker.invokeProcessElement(Unknown > Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(Simp > leParDoFn.java:272) > at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowed > Value(SimpleDoFnRunner.java:211) > at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(Sim > pleDoFnRunner.java:66) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:436) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:424) > at com.freightos.backup.datastore.beam.EntityDoFn.processElemen > t(EntityDoFn.java:59) > at > com.freightos.backup.datastore.beam.EntityDoFn$DoFnInvoker.invokeProcessElement(Unknown > Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(Simp > leParDoFn.java:272) > at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowed > Value(SimpleDoFnRunner.java:211) > at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(Sim > pleDoFnRunner.java:66) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:436) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:424) > at org.apache.beam.sdk.io.gcp.datastore.DatastoreV1$Read$ReadFn > .processElement(DatastoreV1.java:919) > at org.apache.beam.sdk.io.gcp.datastore.DatastoreV1$Read$ReadFn > $DoFnInvoker.invokeProcessElement(Unknown Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(Simp > leParDoFn.java:272) > at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowed > Value(SimpleDoFnRunner.java:211) > at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(Sim > pleDoFnRunner.java:66) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:436) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:424) > at org.apache.beam.sdk.transforms.MapElements$1.processElement( > MapElements.java:122) > at > org.apache.beam.sdk.transforms.MapElements$1$DoFnInvoker.invokeProcessElement(Unknown > Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(Simp > leParDoFn.java:272) > at org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowed > Value(SimpleDoFnRunner.java:211) > at org.apache.beam.runners.core.SimpleDoFnRunner.access$700(Sim > pleDoFnRunner.java:66) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:436) > at org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessCon > text.output(SimpleDoFnRunner.java:424) > at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$Re > shuffleWithOnlyTrigger$1.processElement(ReshuffleOverrideFactory.java:84) > at org.apache.beam.runners.dataflow.ReshuffleOverrideFactory$Re > shuffleWithOnlyTrigger$1$DoFnInvoker.invokeProcessElement(Unknown Source) > at org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessE > lement(SimpleDoFnRunner.java:177) > at org.apache.beam.runners.core.SimpleDoFnRunner.processElement > (SimpleDoFnRunner.java:141) > at com.google.cloud.dataflow.worker.SimpleParDoFn.processElemen > t(SimpleParDoFn.java:324) > at com.google.cloud.dataflow.worker.util.common.worker.ParDoOpe > ration.process(ParDoOperation.java:48) > at com.google.cloud.dataflow.worker.util.common.worker.OutputRe > ceiver.process(OutputReceiver.java:52) > at com.google.cloud.dataflow.worker.GroupAlsoByWindowsParDoFn$1 > .output(GroupAlsoByWindowsParDoFn.java:180) > ... 21 more > Caused by: com.google.datastore.v1.client.DatastoreException: datastore > transaction or write too big., code=INVALID_ARGUMENT > at com.google.datastore.v1.client.RemoteRpc.makeException(Remot > eRpc.java:226) > at com.google.datastore.v1.client.RemoteRpc.makeException(Remot > eRpc.java:275) > at com.google.datastore.v1.client.RemoteRpc.call(RemoteRpc.java:186) > at com.google.datastore.v1.client.Datastore.commit(Datastore.java:87) > at org.apache.beam.sdk.io.gcp.datastore.DatastoreV1$DatastoreWr > iterFn.flushBatch(DatastoreV1.java:1326) > > >
