ConverJens commented on pull request #13723:
URL: https://github.com/apache/beam/pull/13723#issuecomment-764664002
@dandy10 Sorry, I forgot to checkout your branch from your fork. Pipeline
options now seems to be passed on but I still get an error though.
I've passed endpoint args but beam is still trying to connect to aws rather
than the specified endpoint. Do you why that is?
My supplied args:
```
'--direct_running_mode=multi_processing',
f'--direct_num_workers={NR_OF_CPUS}',
'--s3_endpoint_url=minio-service.kubeflow:9000',
f'--s3_access_key={ACCESS_KEY}',
f'--s3_secret_access_key={SECRET_ACCESS_KEY},
'--s3_verify=False'
```
The error I'm getting:
```
INFO:apache_beam.runners.portability.local_job_service:Worker: severity:
ERROR timestamp { seconds: 1611236675 nanos: 354464530 } message: "Error in
_start_upload while inserting file
s3://pipelines/tfx/trace_pipeline_e2e/FileBasedExampleGenWithDate/examples/1414/eval/beam-temp-data_tfrecord-178216b45bee11ebab6f6a446dded339/8bc9a559-3429-408f-b93d-e3a64174ec56.data_tfrecord.gz:
Traceback (most recent call last):\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connection.py\", line 170, in
_new_conn\n (self._dns_host, self.port), self.timeout, **extra_kw\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/util/connection.py\", line 96,
in create_connection\n raise err\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/util/connection.py\", line 86,
in create_connection\n sock.connect(sa)\nsocket.timeout: timed out\n\nDuring
handling of the above exception, another exception occurred:\n\nTraceback (most
recent call last):\n File \"/usr/local/lib/pyt
hon3.7/dist-packages/botocore/httpsession.py\", line 317, in send\n
chunked=self._chunked(request.headers),\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py\", line 756,
in urlopen\n method, url, error=e, _pool=self,
_stacktrace=sys.exc_info()[2]\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/util/retry.py\", line 506, in
increment\n raise six.reraise(type(error), error, _stacktrace)\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/packages/six.py\", line 735,
in reraise\n raise value\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py\", line 706,
in urlopen\n chunked=chunked,\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py\", line 382,
in _make_request\n self._validate_conn(conn)\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py\", line
1010, in _validate_conn\n conn.connect()\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connection
.py\", line 353, in connect\n conn = self._new_conn()\n File
\"/usr/local/lib/python3.7/dist-packages/urllib3/connection.py\", line 177, in
_new_conn\n % (self.host,
self.timeout),\nurllib3.exceptions.ConnectTimeoutError:
(<botocore.awsrequest.AWSHTTPSConnection object at 0x7fd2ee57f950>,
\'Connection to pipelines.s3.amazonaws.com timed out. (connect
timeout=60)\')\n\nDuring handling of the above exception, another exception
occurred:\n\nTraceback (most recent call last):\n File
\"/usr/local/lib/python3.7/dist-packages/apache_beam/io/aws/clients/s3/boto3_client.py\",
line 171, in create_multipart_upload\n ContentType=request.mime_type)\n
File \"/usr/local/lib/python3.7/dist-packages/botocore/client.py\", line 357,
in _api_call\n return self._make_api_call(operation_name, kwargs)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/client.py\", line 663, in
_make_api_call\n operation_model, request_dict, request_context)\n File
\"/usr/local/lib/python3.7/dist-
packages/botocore/client.py\", line 682, in _make_request\n return
self._endpoint.make_request(operation_model, request_dict)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py\", line 102, in
make_request\n return self._send_request(request_dict, operation_model)\n
File \"/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py\", line 137,
in _send_request\n success_response, exception):\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py\", line 256, in
_needs_retry\n caught_exception=caught_exception,
request_dict=request_dict)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/hooks.py\", line 356, in
emit\n return self._emitter.emit(aliased_event_name, **kwargs)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/hooks.py\", line 228, in
emit\n return self._emit(event_name, kwargs)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/hooks.py\", line 211, in
_emit\n response = handler(**kwargs)\n
File \"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line
183, in __call__\n if self._checker(attempts, response, caught_exception):\n
File \"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line
251, in __call__\n caught_exception)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line 277,
in _should_retry\n return self._checker(attempt_number, response,
caught_exception)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line 317,
in __call__\n caught_exception)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line 223,
in __call__\n attempt_number, caught_exception)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py\", line 359,
in _check_caught_exception\n raise caught_exception\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py\", line 200, in
_do_get_response\n http_response = self._send(request
)\n File \"/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py\",
line 269, in _send\n return self.http_session.send(request)\n File
\"/usr/local/lib/python3.7/dist-packages/botocore/httpsession.py\", line 341,
in send\n raise ConnectTimeoutError(endpoint_url=request.url,
error=e)\nbotocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint
URL: \"
https://pipelines.s3.amazonaws.com/tfx/trace_pipeline_e2e/FileBasedExampleGenWithDate/examples/1414/eval/beam-temp-data_tfrecord-178216b45bee11ebab6f6a446dded339/8bc9a559-3429-408f-b93d-e3a64174ec56.data_tfrecord.gz?uploads
\"\n\nDuring handling of the above exception, another exception
occurred:\n\nTraceback (most recent call last):\n File
\"/usr/local/lib/python3.7/dist-packages/apache_beam/io/aws/s3io.py\", line
566, in _start_upload\n response =
self._client.create_multipart_upload(request)\n File
\"/usr/local/lib/python3.7/dist-packages/apache_beam/io/aws/clients/s3/boto3_client.py\",
line 174, in create_multipart_upload\n message =
e.response[\'Error\'][\'Message\']\nAttributeError: \'ConnectTimeoutError\'
object has no attribute \'response\'\n" instruction_id: "bundle_39"
transform_id: "WriteSplit[eval]/Write/Write/WriteImpl/WriteBundles"
log_location:
"/usr/local/lib/python3.7/dist-packages/apache_beam/io/aws/s3io.py:572" thread:
"Thread-14"
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/urllib3/connection.py", line
170, in _new_conn
(self._dns_host, self.port), self.timeout, **extra_kw
File "/usr/local/lib/python3.7/dist-packages/urllib3/util/connection.py",
line 96, in create_connection
raise err
File "/usr/local/lib/python3.7/dist-packages/urllib3/util/connection.py",
line 86, in create_connection
sock.connect(sa)
socket.timeout: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/botocore/httpsession.py",
line 317, in send
chunked=self._chunked(request.headers),
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py",
line 756, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "/usr/local/lib/python3.7/dist-packages/urllib3/util/retry.py", line
506, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/local/lib/python3.7/dist-packages/urllib3/packages/six.py",
line 735, in reraise
raise value
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py",
line 706, in urlopen
chunked=chunked,
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py",
line 382, in _make_request
self._validate_conn(conn)
File "/usr/local/lib/python3.7/dist-packages/urllib3/connectionpool.py",
line 1010, in _validate_conn
conn.connect()
File "/usr/local/lib/python3.7/dist-packages/urllib3/connection.py", line
353, in connect
conn = self._new_conn()
File "/usr/local/lib/python3.7/dist-packages/urllib3/connection.py", line
177, in _new_conn
% (self.host, self.timeout),
urllib3.exceptions.ConnectTimeoutError:
(<botocore.awsrequest.AWSHTTPSConnection object at 0x7fd2ee57f950>, 'Connection
to pipelines.s3.amazonaws.com timed out. (connect timeout=60)')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"/usr/local/lib/python3.7/dist-packages/apache_beam/io/aws/clients/s3/boto3_client.py",
line 171, in create_multipart_upload
ContentType=request.mime_type)
File "/usr/local/lib/python3.7/dist-packages/botocore/client.py", line
357, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/usr/local/lib/python3.7/dist-packages/botocore/client.py", line
663, in _make_api_call
operation_model, request_dict, request_context)
File "/usr/local/lib/python3.7/dist-packages/botocore/client.py", line
682, in _make_request
return self._endpoint.make_request(operation_model, request_dict)
File "/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py", line
102, in make_request
return self._send_request(request_dict, operation_model)
File "/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py", line
137, in _send_request
success_response, exception):
File "/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py", line
256, in _needs_retry
caught_exception=caught_exception, request_dict=request_dict)
File "/usr/local/lib/python3.7/dist-packages/botocore/hooks.py", line 356,
in emit
return self._emitter.emit(aliased_event_name, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/botocore/hooks.py", line 228,
in emit
return self._emit(event_name, kwargs)
File "/usr/local/lib/python3.7/dist-packages/botocore/hooks.py", line 211,
in _emit
response = handler(**kwargs)
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 183, in __call__
if self._checker(attempts, response, caught_exception):
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 251, in __call__
caught_exception)
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 277, in _should_retry
return self._checker(attempt_number, response, caught_exception)
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 317, in __call__
caught_exception)
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 223, in __call__
attempt_number, caught_exception)
File "/usr/local/lib/python3.7/dist-packages/botocore/retryhandler.py",
line 359, in _check_caught_exception
raise caught_exception
File "/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py", line
200, in _do_get_response
http_response = self._send(request)
File "/usr/local/lib/python3.7/dist-packages/botocore/endpoint.py", line
269, in _send
return self.http_session.send(request)
File "/usr/local/lib/python3.7/dist-packages/botocore/httpsession.py",
line 341, in send
raise ConnectTimeoutError(endpoint_url=request.url, error=e)
botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "
https://pipelines.s3.amazonaws.com/tfx/trace_pipeline_e2e/FileBasedExampleGenWithDate/examples/1414/eval/beam-temp-data_tfrecord-178216b45bee11ebab6f6a446dded339/8bc9a559-3429-408f-b93d-e3a64174ec56.data_tfrecord.gz?uploads
"
```
Do I need to specify s3_region, tokens etc as well even when I try to access
a local Minio instance?
Which beam args are you testing with?
Any help would be appreciated!
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]