paleolimbot commented on pull request #11691:
URL: https://github.com/apache/arrow/pull/11691#issuecomment-971637002


   Ok...both R and Python appear to work with a proxy!
   
   ``` r
   with_squid_proxy <- function(expr) {
     # brew install squid
     # apt install squid
     squid_proc <- processx::process$new("squid", args = "--foreground")
     Sys.sleep(1)
     stopifnot(squid_proc$is_alive())
     
     on.exit({
       squid_proc$interrupt()
     })
     
     force(expr)
   }
   
   # make sure proxy works with httr
   with_squid_proxy({
     httr::GET(
       "http://httpbin.org/get";,
       httr::use_proxy("localhost", 3128), httr::verbose()
     )
   })
   #> Response [http://httpbin.org/get]
   #>   Date: 2021-11-17 14:29
   #>   Status: 200
   #>   Content-Type: application/json
   #>   Size: 417 B
   #> {
   #>   "args": {}, 
   #>   "headers": {
   #>     "Accept": "application/json, text/xml, application/xml, */*", 
   #>     "Accept-Encoding": "deflate, gzip", 
   #>     "Cache-Control": "max-age=259200", 
   #>     "Host": "httpbin.org", 
   #>     "User-Agent": "libcurl/7.64.1 r-curl/4.3.2 httr/1.4.2", 
   #>     "X-Amzn-Trace-Id": "Root=1-619511bc-29ddd2da0df9bec865ef42ec"
   #>   }, 
   #> ...
   
   # remotes::install_github("apache/arrow/r#11691")
   library(arrow, warn.conflicts = FALSE)
   
   # make sure r/arrow works without proxy
   arrow::s3_bucket("ursa-labs-taxi-data")$ls()
   #>  [1] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
   #> [11] "2019"
   
   # make sure pyarrow works without proxy
   reticulate::py_run_string("
   from pyarrow.fs import S3FileSystem
   from pyarrow import fs
   s3 = S3FileSystem(region='us-east-2')
   print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
   
   # make sure arrow fails with no proxy
   s3_bucket("ursa-labs-taxi-data", proxy_options = 
"http://localhost:3128";)$ls()
   #> Error: IOError: When listing objects under key '' in bucket 
'ursa-labs-taxi-data': AWS Error [code 99]: curlCode: 7, Couldn't connect to 
server
   #> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/s3fs.cc:1861
  TreeWalker::Walk(client_, io_context_, bucket, key, kListObjectsMaxKeys, 
handle_results, handle_error, handle_recursion)
   #> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/s3fs.cc:2217
  impl_->Walk(select, base_path.bucket, base_path.key, &results)
   #> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/filesystem.cc:323
  base_fs_->GetFileInfo(selector)
   
   # make sure arrow succeeds with a good proxy
   with_squid_proxy({
     s3_bucket("ursa-labs-taxi-data", proxy_options = 
"http://localhost:3128";)$ls()
   })
   #>  [1] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
   #> [11] "2019"
   
   # make sure pyarrow fails with no proxy
   reticulate::py_run_string("
   from pyarrow.fs import S3FileSystem
   from pyarrow import fs
   s3 = S3FileSystem(region='us-east-2', proxy_options='http://localhost:3128')
   print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
   #> Error in py_run_string_impl(code, local, convert): OSError: When listing 
objects under key '' in bucket 'ursa-labs-taxi-data': AWS Error [code 99]: 
curlCode: 7, Couldn't connect to server
   #> 
   #> Detailed traceback:
   #>   File "<string>", line 5, in <module>
   #>   File "pyarrow/_fs.pyx", line 431, in 
pyarrow._fs.FileSystem.get_file_info
   #>   File "pyarrow/error.pxi", line 143, in 
pyarrow.lib.pyarrow_internal_check_status
   #>   File "pyarrow/error.pxi", line 114, in pyarrow.lib.check_status
   
   # make sure pyarrow succeeds with a good proxy
   with_squid_proxy({
     reticulate::py_run_string("
   from pyarrow.fs import S3FileSystem
   from pyarrow import fs
   s3 = S3FileSystem(region='us-east-2', proxy_options='http://localhost:3128')
   print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
   })
   ```
   
   <sup>Created on 2021-11-17 by the [reprex 
package](https://reprex.tidyverse.org) (v2.0.1)</sup>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to