pitrou commented on code in PR #13582: URL: https://github.com/apache/arrow/pull/13582#discussion_r918967631
########## python/pyarrow/fs.py: ########## @@ -227,16 +227,34 @@ def copy_files(source, destination, Examples -------- - Copy an S3 bucket's files to a local directory: + Inspect an S3 bucket's files: - >>> copy_files("s3://your-bucket-name", - ... "local-directory") # doctest: +SKIP + >>> s3, path = fs.FileSystem.from_uri( + ... "s3://registry.opendata.aws/roda/ndjson/") + >>> selector = fs.FileSelector("registry.opendata.aws/roda/ndjson") + >>> s3.get_file_info(selector) + [<FileInfo for 'registry.opendata.aws/roda/ndjson/index.ndjson':...] - Using a FileSystem object: + Create a LocalFIleSystem object and a new directory: - >>> copy_files("your-bucket-name", "local-directory", - ... source_filesystem=S3FileSystem(...)) # doctest: +SKIP + >>> local = fs.LocalFileSystem() + >>> local.create_dir('/tmp/copy-dir') Review Comment: Is all this boilerplate really useful? Perhaps it's not important to make the examples executable, as long as they are informative? Here we want to showcase usage of `copy_files`, not the rest of the filesystem API. ########## python/pyarrow/_s3fs.pyx: ########## @@ -44,13 +44,22 @@ def initialize_s3(S3LogLevel log_level=S3LogLevel.Fatal): ---------- log_level : S3LogLevel level of logging + + Examples + -------- + >>> fs.initialize_s3(fs.S3LogLevel(5)) # doctest: +SKIP """ cdef CS3GlobalOptions options options.log_level = <CS3LogLevel> log_level check_status(CInitializeS3(options)) def finalize_s3(): + """ Review Comment: To be honest, I'm not sure how useful this function is, so perhaps we don't want to document it for now. (for example, it isn't called anywhere in our source tree currently, even in tests) ########## python/pyarrow/fs.py: ########## @@ -227,16 +227,34 @@ def copy_files(source, destination, Examples -------- - Copy an S3 bucket's files to a local directory: + Inspect an S3 bucket's files: - >>> copy_files("s3://your-bucket-name", - ... "local-directory") # doctest: +SKIP + >>> s3, path = fs.FileSystem.from_uri( + ... "s3://registry.opendata.aws/roda/ndjson/") + >>> selector = fs.FileSelector("registry.opendata.aws/roda/ndjson") Review Comment: Perhaps `path` should be used here? ########## python/pyarrow/conftest.py: ########## @@ -241,3 +242,26 @@ def _docdir(request): else: yield + + +# Define doctest_namespace for fs module docstring import +@pytest.fixture(autouse=True) +def add_fs(doctest_namespace, request): + + # Trigger ONLY for the doctests + doctest_m = request.config.option.doctestmodules + doctest_c = getattr(request.config.option, "doctest_cython", False) + + if doctest_m or doctest_c: + # fs import + doctest_namespace["fs"] = fs + + # Creation of an object and file with data + local = fs.LocalFileSystem() + with local.open_output_stream('/tmp/fileinfo.dat') as stream: + stream.write(b'data') + doctest_namespace["local"] = local + yield + + else: + yield Review Comment: Perhaps simply ```suggestion yield ``` ########## python/pyarrow/conftest.py: ########## @@ -241,3 +242,26 @@ def _docdir(request): else: yield + + +# Define doctest_namespace for fs module docstring import +@pytest.fixture(autouse=True) +def add_fs(doctest_namespace, request): + + # Trigger ONLY for the doctests + doctest_m = request.config.option.doctestmodules + doctest_c = getattr(request.config.option, "doctest_cython", False) + + if doctest_m or doctest_c: + # fs import + doctest_namespace["fs"] = fs + + # Creation of an object and file with data + local = fs.LocalFileSystem() + with local.open_output_stream('/tmp/fileinfo.dat') as stream: Review Comment: Hmm, it would be nice if we could avoid creating files without deleting them at the end... ########## python/pyarrow/_s3fs.pyx: ########## @@ -44,13 +44,22 @@ def initialize_s3(S3LogLevel log_level=S3LogLevel.Fatal): ---------- log_level : S3LogLevel level of logging + + Examples + -------- + >>> fs.initialize_s3(fs.S3LogLevel(5)) # doctest: +SKIP Review Comment: It would be much better to showcase a symbolic constant, for example: ```suggestion >>> fs.initialize_s3(fs.S3LogLevel.Error) # doctest: +SKIP ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org