[ https://issues.apache.org/jira/browse/BEAM-2761?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Fallon updated BEAM-2761: ------------------------- Description: In 2.1.0-SNAPSHOT and 2.2.0-SNAPSHOT, jobs writing an empty PCollection to a BigQuery partition fail with "java.lang.RuntimeException: Failed to create load job with id prefix". This is associated with a message "No schema specified on job or table" even though a schema is provided. See attached stack trace for the more detail on the error. Command to run job: {code} mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.EmptyPCollection \ -Dexec.args="--runner=DataflowRunner --project=<GCP project> \ --gcpTempLocation=<tmp location>" \ -Pdataflow-runner {code} Code to reproduce the problem: {code:title=EmptyPCollection.java|borderStyle=solid} public class EmptyPCollection { public static void main(String[] args) { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); options.setTempLocation("<your tmp location>"); Pipeline pipeline = Pipeline.create(options); String schema = "{\"fields\": [{\"name\": \"pet\", \"type\": \"string\", \"mode\": \"required\"}]}"; String table = "mydataset.pets"; List<String> pets = Arrays.asList("Dog", "Cat", "Goldfish"); PCollection<String> inputText = pipeline.apply(Create.of(pets)).setCoder(StringUtf8Coder.of()); PCollection<TableRow> rows = inputText.apply(ParDo.of(new DoFn<String, TableRow>() { @ProcessElement public void processElement(ProcessContext c) { String text = c.element(); if (text.startsWith("X")) { // change to (D)og and works fine TableRow row = new TableRow(); row.set("pet", text); c.output(row); } } })); rows.apply(BigQueryIO.writeTableRows().to(table).withJsonSchema(schema) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); pipeline.run().waitUntilFinish(); } } {code} was: In 2.1.0-SNAPSHOT and 2.2.0-SNAPSHOT, jobs writing an empty PCollection to a BigQuery partition fail with "java.lang.RuntimeException: Failed to create load job with id prefix". This is associated with a message "No schema specified on job or table" even though a schema is provided. Command to run job: {code} mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.EmptyPCollection \ -Dexec.args="--runner=DataflowRunner --project=<GCP project> \ --gcpTempLocation=<tmp location>" \ -Pdataflow-runner {code} Code to reproduce the problem: {code:title=EmptyPCollection.java|borderStyle=solid} public class EmptyPCollection { public static void main(String[] args) { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); options.setTempLocation("<your tmp location>"); Pipeline pipeline = Pipeline.create(options); String schema = "{\"fields\": [{\"name\": \"pet\", \"type\": \"string\", \"mode\": \"required\"}]}"; String table = "mydataset.pets"; List<String> pets = Arrays.asList("Dog", "Cat", "Goldfish"); PCollection<String> inputText = pipeline.apply(Create.of(pets)).setCoder(StringUtf8Coder.of()); PCollection<TableRow> rows = inputText.apply(ParDo.of(new DoFn<String, TableRow>() { @ProcessElement public void processElement(ProcessContext c) { String text = c.element(); if (text.startsWith("X")) { // change to (D)og and works fine TableRow row = new TableRow(); row.set("pet", text); c.output(row); } } })); rows.apply(BigQueryIO.writeTableRows().to(table).withJsonSchema(schema) .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); pipeline.run().waitUntilFinish(); } } {code} > Write to empty BigQuery partition fails with "No schema specified on job or > table." despite having provided schema > ------------------------------------------------------------------------------------------------------------------ > > Key: BEAM-2761 > URL: https://issues.apache.org/jira/browse/BEAM-2761 > Project: Beam > Issue Type: Bug > Components: runner-dataflow > Affects Versions: 2.1.0, 2.2.0 > Reporter: Fallon > Assignee: Thomas Groh > Priority: Minor > Attachments: beam-2761-stacktrace.txt > > > In 2.1.0-SNAPSHOT and 2.2.0-SNAPSHOT, jobs writing an empty PCollection to a > BigQuery partition fail with "java.lang.RuntimeException: Failed to create > load job with id prefix". This is associated with a message "No schema > specified on job or table" even though a schema is provided. See attached > stack trace for the more detail on the error. > Command to run job: > {code} > mvn compile exec:java > -Dexec.mainClass=org.apache.beam.examples.EmptyPCollection \ > -Dexec.args="--runner=DataflowRunner --project=<GCP project> \ > --gcpTempLocation=<tmp location>" \ > -Pdataflow-runner > {code} > Code to reproduce the problem: > {code:title=EmptyPCollection.java|borderStyle=solid} > public class EmptyPCollection { > public static void main(String[] args) { > PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); > options.setTempLocation("<your tmp location>"); > Pipeline pipeline = Pipeline.create(options); > String schema = "{\"fields\": [{\"name\": \"pet\", \"type\": \"string\", > \"mode\": \"required\"}]}"; > String table = "mydataset.pets"; > List<String> pets = Arrays.asList("Dog", "Cat", "Goldfish"); > PCollection<String> inputText = > pipeline.apply(Create.of(pets)).setCoder(StringUtf8Coder.of()); > PCollection<TableRow> rows = inputText.apply(ParDo.of(new DoFn<String, > TableRow>() { > @ProcessElement > public void processElement(ProcessContext c) { > String text = c.element(); > if (text.startsWith("X")) { // change to (D)og and works fine > TableRow row = new TableRow(); > row.set("pet", text); > c.output(row); > } > } > })); > rows.apply(BigQueryIO.writeTableRows().to(table).withJsonSchema(schema) > .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) > > .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)); > pipeline.run().waitUntilFinish(); > } > } > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)