It seems like we only mention the need for pip 7.0.0 on the python
quickstart page https://beam.apache.org/get-started/quickstart-py/

Would you like to submit a change to update it?

On Wed, Aug 22, 2018 at 9:31 AM OrielResearch Eila Arich-Landkof <
[email protected]> wrote:

> The issue was with the pip version. --download was deprecated. I dont know
> where this need to be mentioned / fixed.
> running
> pip install pip==9.0.3
>
> solved the issue.
>
> Thanks,
> eila
>
> On Wed, Aug 22, 2018 at 11:20 AM OrielResearch Eila Arich-Landkof <
> [email protected]> wrote:
>
>> I tried a simple pipeline which is runner perfectly on local runner and
>> the same issue on dataflow. see below. Is there anything at the environment
>> that need to be updated that I am not aware of?
>>
>> Many thanks for any reference.
>> Eila
>>
>> import  apache_beam  as  beam
>> options = PipelineOptions()
>> google_cloud_options = options.view_as(GoogleCloudOptions)
>> google_cloud_options.project = 'PROJECT-ID'
>> google_cloud_options.job_name = 'try-debug'
>> google_cloud_options.staging_location = '%s/staging' % BUCKET_URL 
>> #'gs://archs4/staging'
>> google_cloud_options.temp_location = '%s/tmp' % BUCKET_URL 
>> #'gs://archs4/temp'
>> options.view_as(StandardOptions).runner = 'DataflowRunner'
>>
>> p1 = beam.Pipeline(options=options)
>>
>> (p1 | 'read' >> 
>> beam.io.ReadFromText('gs://dataflow-samples/shakespeare/kinglear.txt')
>>     | 'write' >> beam.io.WriteToText('gs://bucket/test.txt', num_shards=1)
>>  )
>>
>> p1.run().wait_until_finish()
>>
>> will fire the following error:
>>
>> CalledProcessErrorTraceback (most recent call last)
>> <ipython-input-17-b4be63f7802f> in <module>()
>>       5  )
>>       6
>> ----> 7 p1.run().wait_until_finish()
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc 
>> in run(self, test_runner_api)
>>     174       finally:
>>     175         shutil.rmtree(tmpdir)
>> --> 176     return self.runner.run(self)
>>     177
>>     178   def __enter__(self):
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.pyc
>>  in run(self, pipeline)
>>     250     # Create the job
>>     251     result = DataflowPipelineResult(
>> --> 252         self.dataflow_client.create_job(self.job), self)
>>     253
>>     254     self._metrics = DataflowMetrics(self.dataflow_client, result, 
>> self.job)
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/retry.pyc
>>  in wrapper(*args, **kwargs)
>>     166       while True:
>>     167         try:
>> --> 168           return fun(*args, **kwargs)
>>     169         except Exception as exn:  # pylint: disable=broad-except
>>     170           if not retry_filter(exn):
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
>>  in create_job(self, job)
>>     423   def create_job(self, job):
>>     424     """Creates job description. May stage and/or submit for remote 
>> execution."""
>> --> 425     self.create_job_description(job)
>>     426
>>     427     # Stage and submit the job when necessary
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
>>  in create_job_description(self, job)
>>     446     """Creates a job described by the workflow proto."""
>>     447     resources = dependency.stage_job_resources(
>> --> 448         job.options, file_copy=self._gcs_file_copy)
>>     449     job.proto.environment = Environment(
>>     450         packages=resources, options=job.options,
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>  in stage_job_resources(options, file_copy, build_setup_args, temp_dir, 
>> populate_requirements_cache)
>>     377       else:
>>     378         sdk_remote_location = setup_options.sdk_location
>> --> 379       _stage_beam_sdk_tarball(sdk_remote_location, staged_path, 
>> temp_dir)
>>     380       resources.append(names.DATAFLOW_SDK_TARBALL_FILE)
>>     381     else:
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>  in _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)
>>     462   elif sdk_remote_location == 'pypi':
>>     463     logging.info('Staging the SDK tarball from PyPI to %s', 
>> staged_path)
>> --> 464     _dependency_file_copy(_download_pypi_sdk_package(temp_dir), 
>> staged_path)
>>     465   else:
>>     466     raise RuntimeError(
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>  in _download_pypi_sdk_package(temp_dir)
>>     525       '--no-binary', ':all:', '--no-deps']
>>     526   logging.info('Executing command: %s', cmd_args)
>> --> 527   processes.check_call(cmd_args)
>>     528   zip_expected = os.path.join(
>>     529       temp_dir, '%s-%s.zip' % (package_name, version))
>>
>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/processes.pyc
>>  in check_call(*args, **kwargs)
>>      42   if force_shell:
>>      43     kwargs['shell'] = True
>> ---> 44   return subprocess.check_call(*args, **kwargs)
>>      45
>>      46
>>
>> /usr/local/envs/py2env/lib/python2.7/subprocess.pyc in 
>> check_call(*popenargs, **kwargs)
>>     188         if cmd is None:
>>     189             cmd = popenargs[0]
>> --> 190         raise CalledProcessError(retcode, cmd)
>>     191     return 0
>>     192
>>
>> CalledProcessError: Command '['/usr/local/envs/py2env/bin/python', '-m', 
>> 'pip', 'install', '--download', '/tmp/tmpyyiizo', 
>> 'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']' 
>> returned non-zero exit status 2
>>
>>
>>
>> On Wed, Aug 22, 2018 at 10:39 AM OrielResearch Eila Arich-Landkof <
>> [email protected]> wrote:
>>
>>> Hello all,
>>>
>>> I am running a pipeline that used to be executed on dataflow with no
>>> issues. I am using the datalab environment. See below the error. To my
>>> understanding happening before the pipeline code is being is being executed.
>>> Any idea what went wrong?
>>>
>>> Thanks,
>>> Eila
>>>
>>>
>>> Executing the pipeline:
>>>
>>> *p.run().wait_until_finish()*
>>>
>>> The following error is being fired:
>>>
>>> INFO:root:Executing command: ['/usr/local/envs/py2env/bin/python', 
>>> 'setup.py', 'sdist', '--dist-dir', '/tmp/tmp_B0gnK']
>>> INFO:root:Starting GCS upload to 
>>> gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/workflow.tar.gz...
>>> INFO:oauth2client.client:Attempting refresh to obtain initial access_token
>>> INFO:root:Completed GCS upload to 
>>> gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/workflow.tar.gz
>>> INFO:root:Staging the SDK tarball from PyPI to 
>>> gs://archs4/staging/label-archs4-annotation-15.1534948236.075799/dataflow_python_sdk.tar
>>> INFO:root:Executing command: ['/usr/local/envs/py2env/bin/python', '-m', 
>>> 'pip', 'install', '--download', '/tmp/tmp_B0gnK', 
>>> 'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']
>>>
>>> CalledProcessErrorTraceback (most recent call 
>>> last)<ipython-input-27-1e5aeb8b7d9b> in <module>()----> 1 
>>> p.run().wait_until_finish()
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/pipeline.pyc 
>>> in run(self, test_runner_api)    174       finally:    175         
>>> shutil.rmtree(tmpdir)--> 176     return self.runner.run(self)    177     
>>> 178   def __enter__(self):
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.pyc
>>>  in run(self, pipeline)    250     # Create the job    251     result = 
>>> DataflowPipelineResult(--> 252         
>>> self.dataflow_client.create_job(self.job), self)    253     254     
>>> self._metrics = DataflowMetrics(self.dataflow_client, result, self.job)
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/retry.pyc
>>>  in wrapper(*args, **kwargs)    166       while True:    167         
>>> try:--> 168           return fun(*args, **kwargs)    169         except 
>>> Exception as exn:  # pylint: disable=broad-except    170           if not 
>>> retry_filter(exn):
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
>>>  in create_job(self, job)    423   def create_job(self, job):    424     
>>> """Creates job description. May stage and/or submit for remote 
>>> execution."""--> 425     self.create_job_description(job)    426     427    
>>>  # Stage and submit the job when necessary
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/apiclient.pyc
>>>  in create_job_description(self, job)    446     """Creates a job described 
>>> by the workflow proto."""    447     resources = 
>>> dependency.stage_job_resources(--> 448         job.options, 
>>> file_copy=self._gcs_file_copy)    449     job.proto.environment = 
>>> Environment(    450         packages=resources, options=job.options,
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>>  in stage_job_resources(options, file_copy, build_setup_args, temp_dir, 
>>> populate_requirements_cache)    377       else:    378         
>>> sdk_remote_location = setup_options.sdk_location--> 379       
>>> _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)    380  
>>>      resources.append(names.DATAFLOW_SDK_TARBALL_FILE)    381     else:
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>>  in _stage_beam_sdk_tarball(sdk_remote_location, staged_path, temp_dir)    
>>> 462   elif sdk_remote_location == 'pypi':    463     logging.info('Staging 
>>> the SDK tarball from PyPI to %s', staged_path)--> 464     
>>> _dependency_file_copy(_download_pypi_sdk_package(temp_dir), staged_path)    
>>> 465   else:    466     raise RuntimeError(
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/runners/dataflow/internal/dependency.pyc
>>>  in _download_pypi_sdk_package(temp_dir)    525       '--no-binary', 
>>> ':all:', '--no-deps']    526   logging.info('Executing command: %s', 
>>> cmd_args)--> 527   processes.check_call(cmd_args)    528   zip_expected = 
>>> os.path.join(    529       temp_dir, '%s-%s.zip' % (package_name, version))
>>> /usr/local/envs/py2env/lib/python2.7/site-packages/apache_beam/utils/processes.pyc
>>>  in check_call(*args, **kwargs)     42   if force_shell:     43     
>>> kwargs['shell'] = True---> 44   return subprocess.check_call(*args, 
>>> **kwargs)     45      46
>>> /usr/local/envs/py2env/lib/python2.7/subprocess.pyc in 
>>> check_call(*popenargs, **kwargs)    188         if cmd is None:    189      
>>>        cmd = popenargs[0]--> 190         raise CalledProcessError(retcode, 
>>> cmd)    191     return 0    192
>>> CalledProcessError: Command '['/usr/local/envs/py2env/bin/python', '-m', 
>>> 'pip', 'install', '--download', '/tmp/tmp_B0gnK', 
>>> 'google-cloud-dataflow==2.0.0', '--no-binary', ':all:', '--no-deps']' 
>>> returned non-zero exit status 2
>>>
>>>
>>>
>>> --
>>> Eila
>>> www.orielresearch.org
>>> https://www.meetu <https://www.meetup.com/Deep-Learning-In-Production/>
>>> p.co <https://www.meetup.com/Deep-Learning-In-Production/>
>>> m/Deep-Learning-In-Production/
>>> <https://www.meetup.com/Deep-Learning-In-Production/>
>>>
>>>
>>>
>>
>> --
>> Eila
>> www.orielresearch.org
>> https://www.meetu <https://www.meetup.com/Deep-Learning-In-Production/>
>> p.co <https://www.meetup.com/Deep-Learning-In-Production/>
>> m/Deep-Learning-In-Production/
>> <https://www.meetup.com/Deep-Learning-In-Production/>
>>
>>
>>
>
> --
> Eila
> www.orielresearch.org
> https://www.meetu <https://www.meetup.com/Deep-Learning-In-Production/>
> p.co <https://www.meetup.com/Deep-Learning-In-Production/>
> m/Deep-Learning-In-Production/
> <https://www.meetup.com/Deep-Learning-In-Production/>
>
>
>

Reply via email to