KUDU-1684 - [python] Add Scan Resource Metrics Capabilities Currently, the python client doesn't expose scanner resource metrics. This patch enables this ability and includes tests.
Change-Id: Ib6c4057bd2644e46bdbf8bae0d4a768306e2dbd9 Reviewed-on: http://gerrit.cloudera.org:8080/4675 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/4b5425aa Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/4b5425aa Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/4b5425aa Branch: refs/heads/master Commit: 4b5425aa3cfa70e6ec20e2b67c7976b36ca5d2d9 Parents: 2426ef3 Author: Jordan Birdsell <[email protected]> Authored: Sat Oct 8 17:49:43 2016 -0400 Committer: Todd Lipcon <[email protected]> Committed: Fri Oct 21 21:35:35 2016 +0000 ---------------------------------------------------------------------- python/kudu/client.pyx | 17 +++++++++++++++++ python/kudu/libkudu_client.pxd | 10 ++++++++++ python/kudu/tests/test_scanner.py | 15 +++++++++++++++ 3 files changed, 42 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/4b5425aa/python/kudu/client.pyx ---------------------------------------------------------------------- diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx index 261fdbf..ce897a2 100644 --- a/python/kudu/client.pyx +++ b/python/kudu/client.pyx @@ -20,6 +20,7 @@ from libcpp.string cimport string from libcpp cimport bool as c_bool +from libcpp.map cimport map cimport cpython from cython.operator cimport dereference as deref @@ -1453,6 +1454,22 @@ cdef class Scanner: result.schema = schema return result + def get_resource_metrics(self): + """ + Return the cumulative resource metrics since the scan was started. + + Returns + ------- + metrics : Dictionary + """ + _map = self.scanner.GetResourceMetrics().Get() + + # Convert map to python dictionary + result = {} + for it in _map: + result[frombytes(it.first)] = it.second + return result + def open(self): """ Returns a reference to itself to facilitate chaining http://git-wip-us.apache.org/repos/asf/kudu/blob/4b5425aa/python/kudu/libkudu_client.pxd ---------------------------------------------------------------------- diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd index 9c9899f..83a9b03 100644 --- a/python/kudu/libkudu_client.pxd +++ b/python/kudu/libkudu_client.pxd @@ -21,6 +21,7 @@ from libc.stdint cimport * from libcpp cimport bool as c_bool from libcpp.string cimport string from libcpp.vector cimport vector +from libcpp.map cimport map # This must be included for cerr and other things to work cdef extern from "<iostream>": @@ -631,6 +632,7 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: Status AddExclusiveUpperBound(const KuduPartialRow& key) KuduSchema GetProjectionSchema() + const ResourceMetrics& GetResourceMetrics() string ToString() cdef cppclass KuduScanToken: @@ -681,3 +683,11 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: KuduWriteOperation* release_failed_op() c_bool was_possibly_successful() + +cdef extern from "kudu/client/resource_metrics.h" namespace "kudu::client" nogil: + + cdef cppclass ResourceMetrics: + ResourceMetrics() + + map[string, int64_t] Get() + int64_t GetMetric(const string& name) http://git-wip-us.apache.org/repos/asf/kudu/blob/4b5425aa/python/kudu/tests/test_scanner.py ---------------------------------------------------------------------- diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py index 72a22a4..f010f36 100644 --- a/python/kudu/tests/test_scanner.py +++ b/python/kudu/tests/test_scanner.py @@ -215,6 +215,21 @@ class TestScanner(TestScanBase): # Avoid tight looping time.sleep(0.05) + def test_resource_metrics(self): + """ + Test getting the resource metrics after scanning. + """ + + # Build scanner and read through all batches and retrieve metrics. + scanner = self.table.scanner() + scanner.set_fault_tolerant().open() + scanner.read_all_tuples() + metrics = scanner.get_resource_metrics() + + # Confirm that the scanner returned cache hit and miss values. + self.assertTrue('cfile_cache_hit_bytes' in metrics) + self.assertTrue('cfile_cache_miss_bytes' in metrics) + def verify_pred_type_scans(self, preds, row_indexes, count_only=False): # Using the incoming list of predicates, verify that the row returned # matches the inserted tuple at the row indexes specified in a
