This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new e68852c8a Port `object_store` integration tests, use github actions
(#2148)
e68852c8a is described below
commit e68852c8a9c08da2d1e4857c0e811bd0f687f7c5
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Jul 25 09:24:01 2022 -0400
Port `object_store` integration tests, use github actions (#2148)
* Add github test skeleton
* Cleanups and fmt
* Run on changes to object_store
* Update name
* Broken yaml?
* Remove uneeded lint job
* Run only object store tests
* Add local gcp test instructions
* Allow custom http client for gcs
* remove unused error
* Also run clippy
* Update object_store/src/gcp.rs
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
* rename more
* Fixup test
Co-authored-by: Raphael Taylor-Davies
<[email protected]>
---
.github/workflows/object_store.yml | 114 +++++++++++++++++++++++++++++++++++++
object_store/CONTRIBUTING.md | 23 +++++++-
object_store/src/azure.rs | 50 +++++++++++++++-
object_store/src/gcp.rs | 34 ++++++++---
object_store/src/lib.rs | 4 +-
5 files changed, 213 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/object_store.yml
b/.github/workflows/object_store.yml
new file mode 100644
index 000000000..1b84f3ef0
--- /dev/null
+++ b/.github/workflows/object_store.yml
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+name: "Object Store"
+
+on:
+ pull_request:
+ paths:
+ # Only run when object store files or github workflows change
+ - object_store/**
+ - .github/**
+
+jobs:
+ clippy:
+ name: Clippy
+ runs-on: ubuntu-latest
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v2
+ - name: Setup Rust toolchain with clippy
+ run: |
+ rustup toolchain install stable
+ rustup default stable
+ rustup component add clippy
+ - name: Run clippy
+ run: |
+ cargo clippy -p object_store --all-features
+
+ # test the crate
+ linux-test:
+ name: Emulator Tests
+ runs-on: ubuntu-latest
+ services:
+ fake-gcs:
+ image: fsouza/fake-gcs-server
+ ports:
+ - 4443:4443
+ localstack:
+ image: localstack/localstack:0.14.4
+ ports:
+ - 4566:4566
+ azurite:
+ image: mcr.microsoft.com/azure-storage/azurite
+ ports:
+ - 10000:10002
+ container:
+ image: amd64/rust
+ env:
+ # Disable full debug symbol generation to speed up CI build and keep
memory down
+ # "1" means line tables only, which is useful for panic tracebacks.
+ RUSTFLAGS: "-C debuginfo=1"
+ # https://github.com/rust-lang/cargo/issues/10280
+ CARGO_NET_GIT_FETCH_WITH_CLI: "true"
+ RUST_BACKTRACE: "1"
+ # Run integration tests
+ TEST_INTEGRATION: 1
+ AWS_DEFAULT_REGION: "us-east-1"
+ AWS_ACCESS_KEY_ID: test
+ AWS_SECRET_ACCESS_KEY: test
+ AWS_ENDPOINT: http://localstack:4566
+ AZURE_USE_EMULATOR: "1"
+ AZURITE_BLOB_STORAGE_URL: "http://azurite:10000"
+ AZURITE_QUEUE_STORAGE_URL: "http://azurite:10001"
+ GOOGLE_SERVICE_ACCOUNT: "/tmp/gcs.json"
+ OBJECT_STORE_BUCKET: test-bucket
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Configure Fake GCS Server (GCP emulation)
+ run: |
+ curl --insecure -v -X POST --data-binary '{"name":"test-bucket"}' -H
"Content-Type: application/json" "https://fake-gcs:4443/storage/v1/b"
+ echo '{"gcs_base_url": "https://fake-gcs:4443", "disable_oauth":
true, "client_email": "", "private_key": ""}' > "$GOOGLE_SERVICE_ACCOUNT"
+
+ - name: Setup LocalStack (AWS emulation)
+ run: |
+ cd /tmp
+ curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o
"awscliv2.zip"
+ unzip awscliv2.zip
+ ./aws/install
+ aws --endpoint-url=http://localstack:4566 s3 mb s3://test-bucket
+
+ - name: Configure Azurite (Azure emulation)
+ # the magical connection string is from
+ #
https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azurite?tabs=visual-studio#http-connection-strings
+ run: |
+ curl -sL https://aka.ms/InstallAzureCLIDeb | bash
+ az storage container create -n test-bucket --connection-string
'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite:10000/devstoreaccount1;QueueEndpoint=http://azurite:10001/devstoreaccount1;'
+
+ - name: Setup Rust toolchain
+ run: |
+ rustup toolchain install stable
+ rustup default stable
+
+ - name: Run object_store tests
+ run: |
+ # run tests
+ cargo test -p object_store --features=aws,azure,azure_test,gcp
diff --git a/object_store/CONTRIBUTING.md b/object_store/CONTRIBUTING.md
index 2e216dd48..7c2832cf7 100644
--- a/object_store/CONTRIBUTING.md
+++ b/object_store/CONTRIBUTING.md
@@ -90,5 +90,24 @@ $ cargo test --features azure
### GCP
-We don't have a good story yet for testing the GCP integration locally. You
will need to create a GCS bucket, a
-service account that has access to it, and use this to run the tests.
+To test the GCS integration, we use [Fake GCS
Server](https://github.com/fsouza/fake-gcs-server)
+
+Startup the fake server:
+
+```shell
+docker run -p 4443:4443 fsouza/fake-gcs-server
+```
+
+Configure the account:
+```shell
+curl --insecure -v -X POST --data-binary '{"name":"test-bucket"}' -H
"Content-Type: application/json" "https://localhost:4443/storage/v1/b"
+echo '{"gcs_base_url": "https://localhost:4443", "disable_oauth": true,
"client_email": "", "private_key": ""}' > /tmp/gcs.json
+```
+
+Now run the tests:
+```shell
+TEST_INTEGRATION=1 \
+OBJECT_STORE_BUCKET=test-bucket \
+GOOGLE_SERVICE_ACCOUNT=/tmp/gcs.json \
+cargo test -p object_store --features=gcp
+```
diff --git a/object_store/src/azure.rs b/object_store/src/azure.rs
index 5f4327982..75dafef86 100644
--- a/object_store/src/azure.rs
+++ b/object_store/src/azure.rs
@@ -38,6 +38,7 @@ use futures::{
use snafu::{ResultExt, Snafu};
use std::collections::BTreeSet;
use std::{convert::TryInto, sync::Arc};
+use url::Url;
/// A specialized `Error` for Azure object store-related errors
#[derive(Debug, Snafu)]
@@ -158,6 +159,18 @@ enum Error {
"Azurite (azure emulator) support not compiled in, please add
`azure_test` feature"
))]
NoEmulatorFeature,
+
+ #[snafu(display(
+ "Unable parse emulator url {}={}, Error: {}",
+ env_name,
+ env_value,
+ source
+ ))]
+ UnableToParseEmulatorUrl {
+ env_name: String,
+ env_value: String,
+ source: url::ParseError,
+ },
}
impl From<Error> for super::Error {
@@ -507,6 +520,21 @@ fn check_if_emulator_works() -> Result<()> {
Err(Error::NoEmulatorFeature.into())
}
+/// Parses the contents of the environment variable `env_name` as a URL
+/// if present, otherwise falls back to default_url
+fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
+ let url = match std::env::var(env_name) {
+ Ok(env_value) => {
+ Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
+ env_name,
+ env_value,
+ })?
+ }
+ Err(_) => Url::parse(default_url).expect("Failed to parse default
URL"),
+ };
+ Ok(url)
+}
+
/// Configure a connection to container with given name on Microsoft Azure
/// Blob store.
///
@@ -524,7 +552,27 @@ pub fn new_azure(
let (is_emulator, storage_account_client) = if use_emulator {
check_if_emulator_works()?;
- (true, StorageAccountClient::new_emulator_default())
+ // Allow overriding defaults. Values taken from
+ // from
https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
+ let http_client = azure_core::new_http_client();
+ let blob_storage_url =
+ url_from_env("AZURITE_BLOB_STORAGE_URL",
"http://127.0.0.1:10000")?;
+ let queue_storage_url =
+ url_from_env("AZURITE_QUEUE_STORAGE_URL",
"http://127.0.0.1:10001")?;
+ let table_storage_url =
+ url_from_env("AZURITE_TABLE_STORAGE_URL",
"http://127.0.0.1:10002")?;
+ let filesystem_url =
+ url_from_env("AZURITE_TABLE_STORAGE_URL",
"http://127.0.0.1:10004")?;
+
+ let storage_client = StorageAccountClient::new_emulator(
+ http_client,
+ &blob_storage_url,
+ &table_storage_url,
+ &queue_storage_url,
+ &filesystem_url,
+ );
+
+ (true, storage_client)
} else {
(
false,
diff --git a/object_store/src/gcp.rs b/object_store/src/gcp.rs
index 84fb572bd..e836caba7 100644
--- a/object_store/src/gcp.rs
+++ b/object_store/src/gcp.rs
@@ -502,9 +502,17 @@ fn reader_credentials_file(
pub fn new_gcs(
service_account_path: impl AsRef<std::path::Path>,
bucket_name: impl Into<String>,
+) -> Result<GoogleCloudStorage> {
+ new_gcs_with_client(service_account_path, bucket_name, Client::new())
+}
+
+/// Configure a connection to Google Cloud Storage with the specified HTTP
client.
+pub fn new_gcs_with_client(
+ service_account_path: impl AsRef<std::path::Path>,
+ bucket_name: impl Into<String>,
+ client: Client,
) -> Result<GoogleCloudStorage> {
let credentials = reader_credentials_file(service_account_path)?;
- let client = Client::new();
// TODO: https://cloud.google.com/storage/docs/authentication#oauth-scopes
let scope = "https://www.googleapis.com/auth/devstorage.full_control";
@@ -575,6 +583,18 @@ mod test {
service_account: String,
}
+ impl GoogleCloudConfig {
+ fn build_test(self) -> Result<GoogleCloudStorage> {
+ // ignore HTTPS errors in tests so we can use fake-gcs server
+ let client = Client::builder()
+ .danger_accept_invalid_certs(true)
+ .build()
+ .expect("Error creating http client for testing");
+
+ new_gcs_with_client(self.service_account, self.bucket, client)
+ }
+ }
+
// Helper macro to skip tests if TEST_INTEGRATION and the GCP environment
variables are not set.
macro_rules! maybe_skip_integration {
() => {{
@@ -622,7 +642,7 @@ mod test {
#[tokio::test]
async fn gcs_test() {
let config = maybe_skip_integration!();
- let integration = new_gcs(config.service_account,
config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
put_get_delete_list(&integration).await.unwrap();
list_uses_directories_correctly(&integration).await.unwrap();
@@ -633,7 +653,7 @@ mod test {
#[tokio::test]
async fn gcs_test_get_nonexistent_location() {
let config = maybe_skip_integration!();
- let integration = new_gcs(config.service_account,
&config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
let location = Path::from_iter([NON_EXISTENT_NAME]);
@@ -650,7 +670,7 @@ mod test {
async fn gcs_test_get_nonexistent_bucket() {
let mut config = maybe_skip_integration!();
config.bucket = NON_EXISTENT_NAME.into();
- let integration = new_gcs(config.service_account,
&config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
let location = Path::from_iter([NON_EXISTENT_NAME]);
@@ -668,7 +688,7 @@ mod test {
#[tokio::test]
async fn gcs_test_delete_nonexistent_location() {
let config = maybe_skip_integration!();
- let integration = new_gcs(config.service_account,
&config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
let location = Path::from_iter([NON_EXISTENT_NAME]);
@@ -684,7 +704,7 @@ mod test {
async fn gcs_test_delete_nonexistent_bucket() {
let mut config = maybe_skip_integration!();
config.bucket = NON_EXISTENT_NAME.into();
- let integration = new_gcs(config.service_account,
&config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
let location = Path::from_iter([NON_EXISTENT_NAME]);
@@ -700,7 +720,7 @@ mod test {
async fn gcs_test_put_nonexistent_bucket() {
let mut config = maybe_skip_integration!();
config.bucket = NON_EXISTENT_NAME.into();
- let integration = new_gcs(config.service_account,
&config.bucket).unwrap();
+ let integration = config.build_test().unwrap();
let location = Path::from_iter([NON_EXISTENT_NAME]);
let data = Bytes::from("arbitrary data");
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 4a56b03bf..2dc65069a 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -695,8 +695,8 @@ mod tests {
#[tokio::test]
async fn test_list_lifetimes() {
let store = memory::InMemory::new();
- let stream = list_store(&store, "path").await.unwrap();
- assert_eq!(stream.count().await, 0);
+ let mut stream = list_store(&store, "path").await.unwrap();
+ assert!(stream.next().await.is_none());
}
// Tests TODO: