paleolimbot commented on pull request #11691:
URL: https://github.com/apache/arrow/pull/11691#issuecomment-971637002
Ok...both R and Python appear to work with a proxy!
``` r
with_squid_proxy <- function(expr) {
# brew install squid
# apt install squid
squid_proc <- processx::process$new("squid", args = "--foreground")
Sys.sleep(1)
stopifnot(squid_proc$is_alive())
on.exit({
squid_proc$interrupt()
})
force(expr)
}
# make sure proxy works with httr
with_squid_proxy({
httr::GET(
"http://httpbin.org/get",
httr::use_proxy("localhost", 3128), httr::verbose()
)
})
#> Response [http://httpbin.org/get]
#> Date: 2021-11-17 14:29
#> Status: 200
#> Content-Type: application/json
#> Size: 417 B
#> {
#> "args": {},
#> "headers": {
#> "Accept": "application/json, text/xml, application/xml, */*",
#> "Accept-Encoding": "deflate, gzip",
#> "Cache-Control": "max-age=259200",
#> "Host": "httpbin.org",
#> "User-Agent": "libcurl/7.64.1 r-curl/4.3.2 httr/1.4.2",
#> "X-Amzn-Trace-Id": "Root=1-619511bc-29ddd2da0df9bec865ef42ec"
#> },
#> ...
# remotes::install_github("apache/arrow/r#11691")
library(arrow, warn.conflicts = FALSE)
# make sure r/arrow works without proxy
arrow::s3_bucket("ursa-labs-taxi-data")$ls()
#> [1] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
#> [11] "2019"
# make sure pyarrow works without proxy
reticulate::py_run_string("
from pyarrow.fs import S3FileSystem
from pyarrow import fs
s3 = S3FileSystem(region='us-east-2')
print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
# make sure arrow fails with no proxy
s3_bucket("ursa-labs-taxi-data", proxy_options =
"http://localhost:3128")$ls()
#> Error: IOError: When listing objects under key '' in bucket
'ursa-labs-taxi-data': AWS Error [code 99]: curlCode: 7, Couldn't connect to
server
#>
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/s3fs.cc:1861
TreeWalker::Walk(client_, io_context_, bucket, key, kListObjectsMaxKeys,
handle_results, handle_error, handle_recursion)
#>
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/s3fs.cc:2217
impl_->Walk(select, base_path.bucket, base_path.key, &results)
#>
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/filesystem/filesystem.cc:323
base_fs_->GetFileInfo(selector)
# make sure arrow succeeds with a good proxy
with_squid_proxy({
s3_bucket("ursa-labs-taxi-data", proxy_options =
"http://localhost:3128")$ls()
})
#> [1] "2009" "2010" "2011" "2012" "2013" "2014" "2015" "2016" "2017" "2018"
#> [11] "2019"
# make sure pyarrow fails with no proxy
reticulate::py_run_string("
from pyarrow.fs import S3FileSystem
from pyarrow import fs
s3 = S3FileSystem(region='us-east-2', proxy_options='http://localhost:3128')
print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
#> Error in py_run_string_impl(code, local, convert): OSError: When listing
objects under key '' in bucket 'ursa-labs-taxi-data': AWS Error [code 99]:
curlCode: 7, Couldn't connect to server
#>
#> Detailed traceback:
#> File "<string>", line 5, in <module>
#> File "pyarrow/_fs.pyx", line 431, in
pyarrow._fs.FileSystem.get_file_info
#> File "pyarrow/error.pxi", line 143, in
pyarrow.lib.pyarrow_internal_check_status
#> File "pyarrow/error.pxi", line 114, in pyarrow.lib.check_status
# make sure pyarrow succeeds with a good proxy
with_squid_proxy({
reticulate::py_run_string("
from pyarrow.fs import S3FileSystem
from pyarrow import fs
s3 = S3FileSystem(region='us-east-2', proxy_options='http://localhost:3128')
print(s3.get_file_info(fs.FileSelector('ursa-labs-taxi-data')))")
})
```
<sup>Created on 2021-11-17 by the [reprex
package](https://reprex.tidyverse.org) (v2.0.1)</sup>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]