arnavarora2004 commented on code in PR #35696: URL: https://github.com/apache/beam/pull/35696#discussion_r2236927390
########## sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadSchemaTransformProviderIT.java: ########## @@ -136,95 +135,94 @@ public void tearDown() { tableAdminClient.close(); } - public List<Row> writeToTable(int numRows) { - List<Row> expectedRows = new ArrayList<>(); - - try { - for (int i = 1; i <= numRows; i++) { - String key = "key" + i; - String valueA = "value a" + i; - String valueB = "value b" + i; - String valueC = "value c" + i; - String valueD = "value d" + i; - long timestamp = 1000L * i; - - RowMutation rowMutation = - RowMutation.create(tableId, key) - .setCell(COLUMN_FAMILY_NAME_1, "a", timestamp, valueA) - .setCell(COLUMN_FAMILY_NAME_1, "b", timestamp, valueB) - .setCell(COLUMN_FAMILY_NAME_2, "c", timestamp, valueC) - .setCell(COLUMN_FAMILY_NAME_2, "d", timestamp, valueD); - dataClient.mutateRow(rowMutation); - - // Set up expected Beam Row - Map<String, List<Row>> columns1 = new HashMap<>(); - columns1.put( - "a", - Arrays.asList( - Row.withSchema(CELL_SCHEMA) - .withFieldValue( - "value", ByteBuffer.wrap(valueA.getBytes(StandardCharsets.UTF_8))) - .withFieldValue("timestamp_micros", timestamp) - .build())); - columns1.put( - "b", - Arrays.asList( - Row.withSchema(CELL_SCHEMA) - .withFieldValue( - "value", ByteBuffer.wrap(valueB.getBytes(StandardCharsets.UTF_8))) - .withFieldValue("timestamp_micros", timestamp) - .build())); - - Map<String, List<Row>> columns2 = new HashMap<>(); - columns2.put( - "c", - Arrays.asList( - Row.withSchema(CELL_SCHEMA) - .withFieldValue( - "value", ByteBuffer.wrap(valueC.getBytes(StandardCharsets.UTF_8))) - .withFieldValue("timestamp_micros", timestamp) - .build())); - columns2.put( - "d", - Arrays.asList( - Row.withSchema(CELL_SCHEMA) - .withFieldValue( - "value", ByteBuffer.wrap(valueD.getBytes(StandardCharsets.UTF_8))) - .withFieldValue("timestamp_micros", timestamp) - .build())); - - Map<String, Map<String, List<Row>>> families = new HashMap<>(); - families.put(COLUMN_FAMILY_NAME_1, columns1); - families.put(COLUMN_FAMILY_NAME_2, columns2); - - Row expectedRow = - Row.withSchema(ROW_SCHEMA) - .withFieldValue("key", ByteBuffer.wrap(key.getBytes(StandardCharsets.UTF_8))) - .withFieldValue("column_families", families) - .build(); - - expectedRows.add(expectedRow); - } - LOG.info("Finished writing {} rows to table {}", numRows, tableId); - } catch (NotFoundException e) { - throw new RuntimeException("Failed to write to table", e); - } - return expectedRows; - } - @Test public void testRead() { - List<Row> expectedRows = writeToTable(20); + int numRows = 20; + List<Row> expectedRows = new ArrayList<>(); + for (int i = 1; i <= numRows; i++) { + String key = "key" + i; + byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); + String valueA = "value a" + i; + byte[] valueABytes = valueA.getBytes(StandardCharsets.UTF_8); + String valueB = "value b" + i; + byte[] valueBBytes = valueB.getBytes(StandardCharsets.UTF_8); + String valueC = "value c" + i; + byte[] valueCBytes = valueC.getBytes(StandardCharsets.UTF_8); + String valueD = "value d" + i; + byte[] valueDBytes = valueD.getBytes(StandardCharsets.UTF_8); + long timestamp = 1000L * i; + + RowMutation rowMutation = + RowMutation.create(tableId, key) + .setCell(COLUMN_FAMILY_NAME_1, "a", timestamp, valueA) + .setCell(COLUMN_FAMILY_NAME_1, "b", timestamp, valueB) + .setCell(COLUMN_FAMILY_NAME_2, "c", timestamp, valueC) + .setCell(COLUMN_FAMILY_NAME_2, "d", timestamp, valueD); + dataClient.mutateRow(rowMutation); + + // Set up expected Beam Row + // FIX: Use byte[] instead of ByteBuffer + Map<String, List<Row>> columns1 = new HashMap<>(); + columns1.put( + "a", + Arrays.asList( + Row.withSchema(CELL_SCHEMA) + .withFieldValue("value", valueABytes) + .withFieldValue("timestamp_micros", timestamp) + .build())); + columns1.put( + "b", + Arrays.asList( + Row.withSchema(CELL_SCHEMA) + .withFieldValue("value", valueBBytes) + .withFieldValue("timestamp_micros", timestamp) + .build())); + + Map<String, List<Row>> columns2 = new HashMap<>(); + columns2.put( + "c", + Arrays.asList( + Row.withSchema(CELL_SCHEMA) + .withFieldValue("value", valueCBytes) + .withFieldValue("timestamp_micros", timestamp) + .build())); + columns2.put( + "d", + Arrays.asList( + Row.withSchema(CELL_SCHEMA) + .withFieldValue("value", valueDBytes) + .withFieldValue("timestamp_micros", timestamp) + .build())); + + Map<String, Map<String, List<Row>>> families = new HashMap<>(); + families.put(COLUMN_FAMILY_NAME_1, columns1); + families.put(COLUMN_FAMILY_NAME_2, columns2); + + Row expectedRow = + Row.withSchema(ROW_SCHEMA) + .withFieldValue("key", keyBytes) + .withFieldValue("column_families", families) + .build(); + + expectedRows.add(expectedRow); + } + LOG.info("Finished writing {} rows to table {}", numRows, tableId); + // FIX: Explicitly set flatten to false to match the expected nested rows. BigtableReadSchemaTransformConfiguration config = BigtableReadSchemaTransformConfiguration.builder() .setTableId(tableId) .setInstanceId(instanceId) .setProjectId(projectId) + .setFlatten(false) Review Comment: yes did that, ########## sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableReadSchemaTransformProvider.java: ########## @@ -152,45 +166,129 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) { .withInstanceId(configuration.getInstanceId()) .withProjectId(configuration.getProjectId())); + // ParDo fucntion implements fork logic if flatten == True + + // Determine the output schema based on the flatten configuration. + // The default for flatten is true. Review Comment: sounds good -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org