diegomez17 commented on code in PR #16939:
URL: https://github.com/apache/beam/pull/16939#discussion_r853529078
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java:
##########
@@ -210,6 +234,246 @@ public Row getCurrentRow() throws NoSuchElementException {
}
}
+ @VisibleForTesting
+ static class BigtableMiniBatchReaderImpl implements Reader {
+ private BigtableSession session;
+ private final BigtableSource source;
+ private Row currentRow;
+ private Queue<FlatRow> buffer;
+ private RowSet rowSet;
+ private ServiceCallMetric serviceCallMetric;
+ private Future<List<FlatRow>> future;
+ private ByteString lastFetchedRow;
+ private boolean lastFillComplete;
+
+ private int miniBatchLimit = DEFAULT_MINI_BATCH_SIZE;
+ private long bufferSizeLimit = DEFAULT_BYTE_LIMIT;
+ private int miniBatchWaterMark;
+ private final String tableName;
+
+ @VisibleForTesting
+ BigtableMiniBatchReaderImpl(BigtableSession session, BigtableSource
source) {
+ this.session = session;
+ if (source.getMaxBufferElementCount() != null &&
source.getMaxBufferElementCount() != 0) {
+ this.miniBatchLimit = source.getMaxBufferElementCount();
+ this.bufferSizeLimit = (long) miniBatchLimit * 100 * 1024 * 1024;
+ }
+ this.miniBatchWaterMark = miniBatchLimit / 10;
Review Comment:
Done.
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java:
##########
@@ -210,6 +234,246 @@ public Row getCurrentRow() throws NoSuchElementException {
}
}
+ @VisibleForTesting
+ static class BigtableMiniBatchReaderImpl implements Reader {
+ private BigtableSession session;
+ private final BigtableSource source;
+ private Row currentRow;
+ private Queue<FlatRow> buffer;
+ private RowSet rowSet;
+ private ServiceCallMetric serviceCallMetric;
+ private Future<List<FlatRow>> future;
+ private ByteString lastFetchedRow;
+ private boolean lastFillComplete;
+
+ private int miniBatchLimit = DEFAULT_MINI_BATCH_SIZE;
+ private long bufferSizeLimit = DEFAULT_BYTE_LIMIT;
+ private int miniBatchWaterMark;
+ private final String tableName;
+
+ @VisibleForTesting
+ BigtableMiniBatchReaderImpl(BigtableSession session, BigtableSource
source) {
+ this.session = session;
+ if (source.getMaxBufferElementCount() != null &&
source.getMaxBufferElementCount() != 0) {
+ this.miniBatchLimit = source.getMaxBufferElementCount();
+ this.bufferSizeLimit = (long) miniBatchLimit * 100 * 1024 * 1024;
+ }
+ this.miniBatchWaterMark = miniBatchLimit / 10;
+ tableName =
+
session.getOptions().getInstanceName().toTableNameStr(source.getTableId().get());
+ this.source = source;
+ }
+
+ @Override
+ public boolean start() throws IOException {
+ buffer = new ArrayDeque<>();
+ lastFillComplete = false;
+ RowRange[] rowRanges = new RowRange[source.getRanges().size()];
+ for (int i = 0; i < source.getRanges().size(); i++) {
+ rowRanges[i] =
+ RowRange.newBuilder()
+ .setStartKeyClosed(
+
ByteString.copyFrom(source.getRanges().get(i).getStartKey().getValue()))
+ .setEndKeyOpen(
+
ByteString.copyFrom(source.getRanges().get(i).getEndKey().getValue()))
+ .build();
+ }
+ // Presort the ranges so that future segmentation can exit early when
splitting the row set
+ Arrays.sort(rowRanges, RANGE_START_COMPARATOR);
+ rowSet =
+ RowSet.newBuilder()
+
.addAllRowRanges(Arrays.stream(rowRanges).collect(Collectors.toList()))
+ .build();
+
+ HashMap<String, String> baseLabels = new HashMap<>();
+ baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+ baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "BigTable");
+ baseLabels.put(MonitoringInfoConstants.Labels.METHOD,
"google.bigtable.v2.ReadRows");
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.RESOURCE,
+ GcpResourceIdentifiers.bigtableResource(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.BIGTABLE_PROJECT_ID,
session.getOptions().getProjectId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.INSTANCE_ID,
session.getOptions().getInstanceId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.TABLE_ID,
+ GcpResourceIdentifiers.bigtableTableID(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ serviceCallMetric =
+ new
ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+
+ //future = session.getDataClient().readRowsAsync(buildReadRowsRequest());
+ future = createFuture();
Review Comment:
Done.
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java:
##########
@@ -210,6 +234,246 @@ public Row getCurrentRow() throws NoSuchElementException {
}
}
+ @VisibleForTesting
+ static class BigtableMiniBatchReaderImpl implements Reader {
+ private BigtableSession session;
+ private final BigtableSource source;
+ private Row currentRow;
+ private Queue<FlatRow> buffer;
+ private RowSet rowSet;
+ private ServiceCallMetric serviceCallMetric;
+ private Future<List<FlatRow>> future;
+ private ByteString lastFetchedRow;
+ private boolean lastFillComplete;
+
+ private int miniBatchLimit = DEFAULT_MINI_BATCH_SIZE;
+ private long bufferSizeLimit = DEFAULT_BYTE_LIMIT;
+ private int miniBatchWaterMark;
+ private final String tableName;
+
+ @VisibleForTesting
+ BigtableMiniBatchReaderImpl(BigtableSession session, BigtableSource
source) {
+ this.session = session;
+ if (source.getMaxBufferElementCount() != null &&
source.getMaxBufferElementCount() != 0) {
+ this.miniBatchLimit = source.getMaxBufferElementCount();
+ this.bufferSizeLimit = (long) miniBatchLimit * 100 * 1024 * 1024;
+ }
+ this.miniBatchWaterMark = miniBatchLimit / 10;
+ tableName =
+
session.getOptions().getInstanceName().toTableNameStr(source.getTableId().get());
+ this.source = source;
+ }
+
+ @Override
+ public boolean start() throws IOException {
+ buffer = new ArrayDeque<>();
+ lastFillComplete = false;
+ RowRange[] rowRanges = new RowRange[source.getRanges().size()];
+ for (int i = 0; i < source.getRanges().size(); i++) {
+ rowRanges[i] =
+ RowRange.newBuilder()
+ .setStartKeyClosed(
+
ByteString.copyFrom(source.getRanges().get(i).getStartKey().getValue()))
+ .setEndKeyOpen(
+
ByteString.copyFrom(source.getRanges().get(i).getEndKey().getValue()))
+ .build();
+ }
+ // Presort the ranges so that future segmentation can exit early when
splitting the row set
+ Arrays.sort(rowRanges, RANGE_START_COMPARATOR);
+ rowSet =
+ RowSet.newBuilder()
+
.addAllRowRanges(Arrays.stream(rowRanges).collect(Collectors.toList()))
+ .build();
+
+ HashMap<String, String> baseLabels = new HashMap<>();
+ baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+ baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "BigTable");
+ baseLabels.put(MonitoringInfoConstants.Labels.METHOD,
"google.bigtable.v2.ReadRows");
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.RESOURCE,
+ GcpResourceIdentifiers.bigtableResource(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.BIGTABLE_PROJECT_ID,
session.getOptions().getProjectId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.INSTANCE_ID,
session.getOptions().getInstanceId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.TABLE_ID,
+ GcpResourceIdentifiers.bigtableTableID(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ serviceCallMetric =
+ new
ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+
+ //future = session.getDataClient().readRowsAsync(buildReadRowsRequest());
+ future = createFuture();
+ return advance();
+ }
+
+ @Override
+ public boolean advance() throws IOException {
+ if (buffer.size() <= miniBatchWaterMark && future == null &&
!lastFillComplete) {
+ startNextSegmentRead();
+ }
+ if (buffer.isEmpty()) {
+ if (future == null || lastFillComplete)
+ return false;
+ waitReadRowsFuture();
+ }
+ currentRow = FlatRowConverter.convert(buffer.remove());
+ return currentRow != null;
+ }
+
+ private SettableFuture<List<FlatRow>> createFuture() {
+ SettableFuture<List<FlatRow>> f = SettableFuture.create();
+
+ AtomicReference<ScanHandler> atomic = new AtomicReference<>();
Review Comment:
Done.
##########
sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableServiceImpl.java:
##########
@@ -210,6 +234,246 @@ public Row getCurrentRow() throws NoSuchElementException {
}
}
+ @VisibleForTesting
+ static class BigtableMiniBatchReaderImpl implements Reader {
+ private BigtableSession session;
+ private final BigtableSource source;
+ private Row currentRow;
+ private Queue<FlatRow> buffer;
+ private RowSet rowSet;
+ private ServiceCallMetric serviceCallMetric;
+ private Future<List<FlatRow>> future;
+ private ByteString lastFetchedRow;
+ private boolean lastFillComplete;
+
+ private int miniBatchLimit = DEFAULT_MINI_BATCH_SIZE;
+ private long bufferSizeLimit = DEFAULT_BYTE_LIMIT;
+ private int miniBatchWaterMark;
+ private final String tableName;
+
+ @VisibleForTesting
+ BigtableMiniBatchReaderImpl(BigtableSession session, BigtableSource
source) {
+ this.session = session;
+ if (source.getMaxBufferElementCount() != null &&
source.getMaxBufferElementCount() != 0) {
+ this.miniBatchLimit = source.getMaxBufferElementCount();
+ this.bufferSizeLimit = (long) miniBatchLimit * 100 * 1024 * 1024;
+ }
+ this.miniBatchWaterMark = miniBatchLimit / 10;
+ tableName =
+
session.getOptions().getInstanceName().toTableNameStr(source.getTableId().get());
+ this.source = source;
+ }
+
+ @Override
+ public boolean start() throws IOException {
+ buffer = new ArrayDeque<>();
+ lastFillComplete = false;
+ RowRange[] rowRanges = new RowRange[source.getRanges().size()];
+ for (int i = 0; i < source.getRanges().size(); i++) {
+ rowRanges[i] =
+ RowRange.newBuilder()
+ .setStartKeyClosed(
+
ByteString.copyFrom(source.getRanges().get(i).getStartKey().getValue()))
+ .setEndKeyOpen(
+
ByteString.copyFrom(source.getRanges().get(i).getEndKey().getValue()))
+ .build();
+ }
+ // Presort the ranges so that future segmentation can exit early when
splitting the row set
+ Arrays.sort(rowRanges, RANGE_START_COMPARATOR);
+ rowSet =
+ RowSet.newBuilder()
+
.addAllRowRanges(Arrays.stream(rowRanges).collect(Collectors.toList()))
+ .build();
+
+ HashMap<String, String> baseLabels = new HashMap<>();
+ baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
+ baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "BigTable");
+ baseLabels.put(MonitoringInfoConstants.Labels.METHOD,
"google.bigtable.v2.ReadRows");
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.RESOURCE,
+ GcpResourceIdentifiers.bigtableResource(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.BIGTABLE_PROJECT_ID,
session.getOptions().getProjectId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.INSTANCE_ID,
session.getOptions().getInstanceId());
+ baseLabels.put(
+ MonitoringInfoConstants.Labels.TABLE_ID,
+ GcpResourceIdentifiers.bigtableTableID(
+ session.getOptions().getProjectId(),
+ session.getOptions().getInstanceId(),
+ source.getTableId().get()));
+ serviceCallMetric =
+ new
ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
+
+ //future = session.getDataClient().readRowsAsync(buildReadRowsRequest());
+ future = createFuture();
+ return advance();
+ }
+
+ @Override
+ public boolean advance() throws IOException {
+ if (buffer.size() <= miniBatchWaterMark && future == null &&
!lastFillComplete) {
+ startNextSegmentRead();
+ }
+ if (buffer.isEmpty()) {
+ if (future == null || lastFillComplete)
+ return false;
+ waitReadRowsFuture();
+ }
+ currentRow = FlatRowConverter.convert(buffer.remove());
+ return currentRow != null;
+ }
+
+ private SettableFuture<List<FlatRow>> createFuture() {
+ SettableFuture<List<FlatRow>> f = SettableFuture.create();
+
+ AtomicReference<ScanHandler> atomic = new AtomicReference<>();
+ ScanHandler handler;
+
+ handler = session.getDataClient().readFlatRows(buildReadRowsRequest(),
+ new StreamObserver<FlatRow>() {
+ List<FlatRow> rows = new ArrayList<>();
+ long currentByteSize = 0;
+ @Override
+ public void onNext(FlatRow flatRow) {
+ rows.add(flatRow);
+ currentByteSize += flatRow.getRowKey().size() +
flatRow.getCells().stream()
+ .mapToLong(c -> c.getQualifier().size() +
c.getValue().size()).sum();
+ if (currentByteSize > bufferSizeLimit) {
+ atomic.get().cancel();
+ return;
+ }
+ }
+
+ @Override
+ public void onError(Throwable e) {
+ if (e instanceof CancellationException) {
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]