This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 1113903ec39cfa267b824550aafd2621e5cdb9fe Author: Murtadha Hubail <[email protected]> AuthorDate: Fri May 8 05:03:25 2020 +0300 [NO ISSUE][RT] Collect Tuple Stats in External Scan - user model changes: no - storage format changes: no - interface changes: yes Details: - Report number of procssed tuples in external scan operator. - Add test case. Change-Id: I5dda25f1fc53581dcc5663f2516e79b9b66fd0a5 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6224 Reviewed-by: Murtadha Hubail <[email protected]> Reviewed-by: Ali Alsuliman <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> --- .../src/test/resources/runtimets/metrics.xml | 5 +++++ .../external-dataset/external-dataset.1.ddl.sqlpp} | 18 +++++++++--------- .../external-dataset/external-dataset.2.metrics.sqlpp} | 17 ++++++----------- .../external-dataset/external-datasett.3.ddl.sqlpp} | 17 +++++------------ .../external-dataset/external-dataset.2.regexadm | 1 + .../asterix/common/external/IDataSourceAdapter.java | 7 +++++++ .../asterix/external/api/IDataFlowController.java | 7 +++++++ .../external/dataflow/AbstractDataFlowController.java | 6 ++++++ .../external/dataflow/RecordDataFlowController.java | 2 ++ .../external/dataflow/StreamDataFlowController.java | 2 ++ .../asterix/external/dataset/adapter/FeedAdapter.java | 5 +++++ .../external/dataset/adapter/GenericAdapter.java | 5 +++++ .../operators/ExternalScanOperatorDescriptor.java | 11 ++++++++++- 13 files changed, 70 insertions(+), 33 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml b/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml index da7ba31..a2c3ae5 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/metrics.xml @@ -40,5 +40,10 @@ <output-dir compare="Text">secondary-index-index-only</output-dir> </compilation-unit> </test-case> + <test-case FilePath="metrics"> + <compilation-unit name="external-dataset"> + <output-dir compare="Text">external-dataset</output-dir> + </compilation-unit> + </test-case> </test-group> </test-suite> \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp similarity index 67% copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp index a324496..a105dfe 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.1.ddl.sqlpp @@ -16,16 +16,16 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.external.dataflow; +/* + * Description : Processed objects metrics on external dataset + * Expected Res : Success + */ -import org.apache.asterix.external.api.IDataFlowController; -import org.apache.hyracks.api.context.IHyracksTaskContext; +DROP DATAVERSE test IF EXISTS; +CREATE DATAVERSE test; -public abstract class AbstractDataFlowController implements IDataFlowController { +USE test; - protected final IHyracksTaskContext ctx; +CREATE TYPE t1 AS {f1: string, f2: string, f3: string, f4: string, f5: string}; - public AbstractDataFlowController(IHyracksTaskContext ctx) { - this.ctx = ctx; - } -} +CREATE EXTERNAL DATASET ds1(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="CSV"), ("header"="FALSE")); \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp similarity index 67% copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp index a324496..e57e938 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-dataset.2.metrics.sqlpp @@ -16,16 +16,11 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.external.dataflow; - -import org.apache.asterix.external.api.IDataFlowController; -import org.apache.hyracks.api.context.IHyracksTaskContext; - -public abstract class AbstractDataFlowController implements IDataFlowController { +/* + * Description : Processed objects metrics on external dataset + * Expected Res : Success + */ - protected final IHyracksTaskContext ctx; +USE test; - public AbstractDataFlowController(IHyracksTaskContext ctx) { - this.ctx = ctx; - } -} +SELECT COUNT(*) from ds1; \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp similarity index 67% copy from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java copy to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp index a324496..0bf95e4 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/external-dataset/external-datasett.3.ddl.sqlpp @@ -16,16 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.asterix.external.dataflow; - -import org.apache.asterix.external.api.IDataFlowController; -import org.apache.hyracks.api.context.IHyracksTaskContext; - -public abstract class AbstractDataFlowController implements IDataFlowController { - - protected final IHyracksTaskContext ctx; +/* + * Description : Processed objects metrics on external dataset + * Expected Res : Success + */ - public AbstractDataFlowController(IHyracksTaskContext ctx) { - this.ctx = ctx; - } -} +DROP DATAVERSE test; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm new file mode 100644 index 0000000..ae84a71 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/metrics/external-dataset/external-dataset.2.regexadm @@ -0,0 +1 @@ +.*"processedObjects":15.* \ No newline at end of file diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java index 18f59f2..8fc70b8 100644 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IDataSourceAdapter.java @@ -49,4 +49,11 @@ public interface IDataSourceAdapter { * @throws Exception */ public void start(int partition, IFrameWriter writer) throws HyracksDataException, InterruptedException; + + /** + * @return The number of processed tuples by this adapter + */ + default long getProcessedTuples() { + throw new UnsupportedOperationException(); + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java index f59b82e..ccc420b 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataFlowController.java @@ -43,4 +43,11 @@ public interface IDataFlowController { public default boolean stop(long timeout) throws HyracksDataException { throw new RuntimeDataException(ErrorCode.OPERATION_NOT_SUPPORTED); } + + /** + * @return The number of processed tuples by this controller + */ + default long getProcessedTuples() { + throw new UnsupportedOperationException(); + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java index a324496..95024e1 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/AbstractDataFlowController.java @@ -24,8 +24,14 @@ import org.apache.hyracks.api.context.IHyracksTaskContext; public abstract class AbstractDataFlowController implements IDataFlowController { protected final IHyracksTaskContext ctx; + protected long processedTuples = 0; public AbstractDataFlowController(IHyracksTaskContext ctx) { this.ctx = ctx; } + + @Override + public long getProcessedTuples() { + return processedTuples; + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java index 34379e9..2c19f9d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/RecordDataFlowController.java @@ -44,6 +44,7 @@ public class RecordDataFlowController<T> extends AbstractDataFlowController { @Override public void start(IFrameWriter writer) throws HyracksDataException { try { + processedTuples = 0; ArrayTupleBuilder tb = new ArrayTupleBuilder(numOfTupleFields); TupleForwarder tupleForwarder = new TupleForwarder(ctx, writer); while (recordReader.hasNext()) { @@ -53,6 +54,7 @@ public class RecordDataFlowController<T> extends AbstractDataFlowController { tb.addFieldEndOffset(); appendOtherTupleFields(tb); tupleForwarder.addTuple(tb); + processedTuples++; } } tupleForwarder.complete(); diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java index 8275953..9c11c97 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataflow/StreamDataFlowController.java @@ -35,6 +35,7 @@ public class StreamDataFlowController extends AbstractDataFlowController { @Override public void start(IFrameWriter writer) throws HyracksDataException { try { + processedTuples = 0; ArrayTupleBuilder tb = new ArrayTupleBuilder(1); TupleForwarder tupleForwarder = new TupleForwarder(ctx, writer); while (true) { @@ -44,6 +45,7 @@ public class StreamDataFlowController extends AbstractDataFlowController { } tb.addFieldEndOffset(); tupleForwarder.addTuple(tb); + processedTuples++; } tupleForwarder.complete(); } catch (Exception e) { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java index 0ab59fe..123a552 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/FeedAdapter.java @@ -38,6 +38,11 @@ public class FeedAdapter implements IDataSourceAdapter, Closeable { controller.start(writer); } + @Override + public long getProcessedTuples() { + return controller.getProcessedTuples(); + } + public boolean stop(long timeout) throws HyracksDataException { return controller.stop(timeout); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java index 0904384..17a134b 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/dataset/adapter/GenericAdapter.java @@ -35,4 +35,9 @@ public class GenericAdapter implements IDataSourceAdapter { public void start(int partition, IFrameWriter writer) throws HyracksDataException, InterruptedException { controller.start(writer); } + + @Override + public long getProcessedTuples() { + return controller.getProcessedTuples(); + } } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java index 4fd5151..1d7623d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/operators/ExternalScanOperatorDescriptor.java @@ -26,6 +26,7 @@ import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider; import org.apache.hyracks.api.dataflow.value.RecordDescriptor; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.job.JobSpecification; +import org.apache.hyracks.api.job.profiling.IOperatorStats; import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor; import org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable; @@ -53,13 +54,21 @@ public class ExternalScanOperatorDescriptor extends AbstractSingleActivityOperat return new AbstractUnaryOutputSourceOperatorNodePushable() { + private IOperatorStats stats; + @Override public void initialize() throws HyracksDataException { - IDataSourceAdapter adapter = null; + IDataSourceAdapter adapter; + if (ctx.getStatsCollector() != null) { + stats = ctx.getStatsCollector().getOrAddOperatorStats(getDisplayName()); + } try { writer.open(); adapter = adapterFactory.createAdapter(ctx, partition); adapter.start(partition, writer); + if (stats != null) { + stats.getTupleCounter().update(adapter.getProcessedTuples()); + } } catch (Exception e) { writer.fail(); throw HyracksDataException.create(e);
