pitrou commented on code in PR #12701:
URL: https://github.com/apache/arrow/pull/12701#discussion_r854086918
##########
python/pyarrow/tests/test_dataset.py:
##########
@@ -4286,6 +4286,66 @@ def test_write_dataset_s3(s3_example_simple):
assert result.equals(table)
+_minio_put_only_policy = """{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:PutObject",
+ "s3:ListBucket",
+ "s3:GetObjectVersion"
+ ],
+ "Resource": [
+ "arn:aws:s3:::*"
+ ]
+ }
+ ]
+}"""
+
+
[email protected]
[email protected]
+def test_write_dataset_s3_put_only(s3_server):
+ # [ARROW-15892] Testing the create_dir flag which will restrict
+ # creating a new directory for writing a dataset. This is
+ # required while writing a dataset in s3 where we have very
+ # limited permissions and thus we can directly write the dataset
+ # without creating a directory.
+ from pyarrow.fs import S3FileSystem
+
+ # write dataset with s3 filesystem
+ host, port, _, _ = s3_server['connection']
+ fs = S3FileSystem(
+ access_key='limited',
+ secret_key='limited123',
+ endpoint_override='{}:{}'.format(host, port),
+ scheme='http'
+ )
+ limited_s3_user(s3_server, _minio_put_only_policy)
+ table = pa.table([
+ pa.array(range(20)), pa.array(np.random.randn(20)),
+ pa.array(np.repeat(['a', 'b'], 10))],
+ names=["f1", "f2", "part"]
+ )
+ # writing with filesystem object with create_dir flag set to false
+ ds.write_dataset(
+ table, "existing-bucket", filesystem=fs,
+ format="feather", create_dir=False
+ )
Review Comment:
Hmm, if we can't do anything else than filename partitioning then is it
worth fixing this issue? @westonpace What do you think?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]