[
https://issues.apache.org/jira/browse/BEAM-11984?focusedWorklogId=611990&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-611990
]
ASF GitHub Bot logged work on BEAM-11984:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 18/Jun/21 21:06
Start Date: 18/Jun/21 21:06
Worklog Time Spent: 10m
Work Description: TheNeuralBit commented on a change in pull request
#14770:
URL: https://github.com/apache/beam/pull/14770#discussion_r653965379
##########
File path: sdks/python/apache_beam/io/gcp/gcsio_test.py
##########
@@ -751,6 +768,53 @@ def test_mime_binary_encoding(self):
generator._handle_text(message)
self.assertEqual(test_msg.encode('ascii'), output_buffer.getvalue())
+ def test_downloader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ self.gcs.open(file_name, 'r')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.get',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
+ metric_name).get_cumulative()
+
+ self.assertEqual(metric_value, 2)
+
+ def test_uploader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ f = self.gcs.open(file_name, 'w')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.insert',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
Review comment:
Are these labels supposed to map to
[`MonitoringInfo.labels`](https://github.com/apache/beam/blob/2a86ae963a748328dd671f5cfc8eb9c8399664ac/model/pipeline/src/main/proto/metrics.proto#L432)?
That's a map<string, string> but this value is an integer.
##########
File path: sdks/python/apache_beam/io/gcp/gcsio_test.py
##########
@@ -751,6 +768,53 @@ def test_mime_binary_encoding(self):
generator._handle_text(message)
self.assertEqual(test_msg.encode('ascii'), output_buffer.getvalue())
+ def test_downloader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ self.gcs.open(file_name, 'r')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.get',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
+ metric_name).get_cumulative()
+
+ self.assertEqual(metric_value, 2)
+
+ def test_uploader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ f = self.gcs.open(file_name, 'w')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.insert',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ f.close()
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
Review comment:
I'm assuming this process_wide_container is intended to be a global
container that ends up being shared between tests, because we often run many
tests in one process.
##########
File path: sdks/python/apache_beam/io/gcp/gcsio_test.py
##########
@@ -751,6 +768,53 @@ def test_mime_binary_encoding(self):
generator._handle_text(message)
self.assertEqual(test_msg.encode('ascii'), output_buffer.getvalue())
+ def test_downloader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ self.gcs.open(file_name, 'r')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.get',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
+ metric_name).get_cumulative()
+
+ self.assertEqual(metric_value, 2)
+
+ def test_uploader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ f = self.gcs.open(file_name, 'w')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.insert',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ f.close()
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
Review comment:
I'm assuming this process_wide_container is a global container that ends
up being shared between tests, because we often run many tests in one process.
##########
File path: sdks/python/apache_beam/io/gcp/gcsio_test.py
##########
@@ -751,6 +768,53 @@ def test_mime_binary_encoding(self):
generator._handle_text(message)
self.assertEqual(test_msg.encode('ascii'), output_buffer.getvalue())
+ def test_downloader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ self.gcs.open(file_name, 'r')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.get',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
+ metric_name).get_cumulative()
+
+ self.assertEqual(metric_value, 2)
+
+ def test_uploader_monitoring_info(self):
+ file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
+ file_size = 5 * 1024 * 1024 + 100
+ random_file = self._insert_random_file(self.client, file_name, file_size)
+ f = self.gcs.open(file_name, 'w')
+
+ resource =
resource_identifiers.GoogleCloudStorageBucket(random_file.bucket)
+ labels = {
+ monitoring_infos.SERVICE_LABEL: 'Storage',
+ monitoring_infos.METHOD_LABEL: 'Objects.insert',
+ monitoring_infos.RESOURCE_LABEL: resource,
+ monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
+ monitoring_infos.GCS_PROJECT_ID_LABEL: DEFAULT_PROJECT_NUMBER,
+ monitoring_infos.STATUS_LABEL: 'ok'
+ }
+
+ f.close()
+ metric_name = MetricName(
+ None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels)
+ metric_value = MetricsEnvironment.process_wide_container().get_counter(
Review comment:
The failing test does have a reset() call:
https://github.com/apache/beam/blob/f053948ccea7c26588e17435842ad02b6b23d975/sdks/python/apache_beam/runners/worker/sdk_worker_test.py#L225
Shouldn't that have prevented this flake?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 611990)
Time Spent: 10h 40m (was: 10.5h)
> Python GCS - Implement IO Request Count metrics
> -----------------------------------------------
>
> Key: BEAM-11984
> URL: https://issues.apache.org/jira/browse/BEAM-11984
> Project: Beam
> Issue Type: Test
> Components: io-py-gcp
> Reporter: Alex Amato
> Assignee: Rogelio Miguel Hernandez Sandoval
> Priority: P2
> Time Spent: 10h 40m
> Remaining Estimate: 0h
>
> Reference PRs (See BigQuery IO example) and detailed explanation of what's
> needed to instrument this IO with Request Count metrics is found in this
> handoff doc:
> [https://docs.google.com/document/d/1lrz2wE5Dl4zlUfPAenjXIQyleZvqevqoxhyE85aj4sc/edit'|https://docs.google.com/document/d/1lrz2wE5Dl4zlUfPAenjXIQyleZvqevqoxhyE85aj4sc/edit'?authuser=0]
--
This message was sent by Atlassian Jira
(v8.3.4#803005)