This is an automated email from the ASF dual-hosted git repository.
xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-opendal.git
The following commit(s) were added to refs/heads/main by this push:
new bb1d3281b feat(test): add Huggingface behavior test (#3712)
bb1d3281b is described below
commit bb1d3281b1c05bcaa7f8c18fb65dfe288875a0c6
Author: Morris Tai <[email protected]>
AuthorDate: Thu Dec 7 09:11:13 2023 +0900
feat(test): add Huggingface behavior test (#3712)
* feat: add huggingface fs behavior support
* feat: add huggingface scheme to builder
* chore: change root dir
* feat: add huggingface to java/nodejs/python binding
* chore: fix typo
* chore: rename folder
* feat: disable random root of huggingface
* feat: allow StatusCode 206 in read operation
* chore: drop duplicated status code checking
* feat: fix miss set content length from resp body
* chore: remove unused import.
* feat: use relative path in lister
---
.env.example | 7 ++++-
.../services/huggingface/huggingface/action.yml | 33 ++++++++++++++++++++++
.github/workflows/ci.yml | 1 +
bindings/java/Cargo.toml | 2 ++
bindings/nodejs/Cargo.toml | 2 ++
bindings/python/Cargo.toml | 2 ++
core/src/services/huggingface/backend.rs | 4 ++-
core/src/services/huggingface/core.rs | 12 ++------
core/src/services/huggingface/lister.rs | 5 +++-
core/src/types/operator/builder.rs | 2 ++
10 files changed, 57 insertions(+), 13 deletions(-)
diff --git a/.env.example b/.env.example
index 450c734f8..6277e05ae 100644
--- a/.env.example
+++ b/.env.example
@@ -174,4 +174,9 @@ OPENDAL_B2_ROOT=/path/to/dir
OPENDAL_B2_BUCKET=<bucket>
OPENDAL_B2_BUCKET_ID=<bucket_id>
OPENDAL_B2_APPLICATION_KEY_ID=<key_id>
-OPENDAL_B2_APPLICATION_KEY=<application_key>
\ No newline at end of file
+OPENDAL_B2_APPLICATION_KEY=<application_key>
+# huggingface
+OPENDAL_HUGGINGFACE_REPO_TYPE=dataset
+OPENDAL_HUGGINGFACE_REPO_ID=opendal/huggingface-testdata
+OPENDAL_HUGGINGFACE_REVISION=main
+OPENDAL_HUGGINGFACE_ROOT=/testdata/
diff --git a/.github/services/huggingface/huggingface/action.yml
b/.github/services/huggingface/huggingface/action.yml
new file mode 100644
index 000000000..2ee74d3bc
--- /dev/null
+++ b/.github/services/huggingface/huggingface/action.yml
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: huggingface
+description: "Behavior test for Huggingface File System"
+
+runs:
+ using: "composite"
+ steps:
+ - name: Setup
+ shell: bash
+ run: |
+ cat << EOF >> $GITHUB_ENV
+ OPENDAL_HUGGINGFACE_REPO_TYPE=dataset
+ OPENDAL_HUGGINGFACE_REPO_ID=opendal/huggingface-testdata
+ OPENDAL_HUGGINGFACE_REVISION=main
+ OPENDAL_HUGGINGFACE_ROOT=/testdata/
+ OPENDAL_DISABLE_RANDOM_ROOT=true
+ EOF
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3db0e4c3c..5e9a4ccb4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -220,6 +220,7 @@ jobs:
# FIXME how to support HDFS services in other platforms?
# services-hdfs
services-http
+ services-huggingface
services-ipfs
services-ipmfs
services-memcached
diff --git a/bindings/java/Cargo.toml b/bindings/java/Cargo.toml
index 36d5b3285..b5e986235 100644
--- a/bindings/java/Cargo.toml
+++ b/bindings/java/Cargo.toml
@@ -64,6 +64,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
+ "services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
@@ -120,6 +121,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
+services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml
index 62d45d3bb..4a1a1f638 100644
--- a/bindings/nodejs/Cargo.toml
+++ b/bindings/nodejs/Cargo.toml
@@ -61,6 +61,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
+ "services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
@@ -115,6 +116,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
+services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
index 329e75304..81156c355 100644
--- a/bindings/python/Cargo.toml
+++ b/bindings/python/Cargo.toml
@@ -60,6 +60,7 @@ services-all = [
"services-gdrive",
# FIXME how to support HDFS services in bindings?
# "services-hdfs",
+ "services-huggingface",
"services-ipfs",
"services-memcached",
"services-mini-moka",
@@ -114,6 +115,7 @@ services-ftp = ["opendal/services-ftp"]
services-gdrive = ["opendal/services-gdrive"]
services-gridfs = ["opendal/services-gridfs"]
services-hdfs = ["opendal/services-hdfs"]
+services-huggingface = ["opendal/services-huggingface"]
services-ipfs = ["opendal/services-ipfs"]
services-libsql = ["opendal/services-libsql"]
services-memcached = ["opendal/services-memcached"]
diff --git a/core/src/services/huggingface/backend.rs
b/core/src/services/huggingface/backend.rs
index 49502680e..37c0b5064 100644
--- a/core/src/services/huggingface/backend.rs
+++ b/core/src/services/huggingface/backend.rs
@@ -275,7 +275,7 @@ impl Accessor for HuggingfaceBackend {
let status = resp.status();
match status {
- StatusCode::OK => {
+ StatusCode::OK | StatusCode::PARTIAL_CONTENT => {
let size = parse_content_length(resp.headers())?;
Ok((RpRead::new().with_size(size), resp.into_body()))
}
@@ -310,6 +310,8 @@ impl Accessor for HuggingfaceBackend {
)?);
}
+ meta.set_content_length(status.size);
+
match status.type_.as_str() {
"directory" => meta.set_mode(EntryMode::DIR),
"file" => meta.set_mode(EntryMode::FILE),
diff --git a/core/src/services/huggingface/core.rs
b/core/src/services/huggingface/core.rs
index 45ec528ed..67963082e 100644
--- a/core/src/services/huggingface/core.rs
+++ b/core/src/services/huggingface/core.rs
@@ -18,13 +18,12 @@
use std::fmt::Debug;
use bytes::Bytes;
+use http::header;
use http::Request;
use http::Response;
-use http::{header, StatusCode};
use serde::Deserialize;
use super::backend::RepoType;
-use super::error::parse_error;
use crate::raw::*;
use crate::*;
@@ -162,14 +161,7 @@ impl HuggingfaceCore {
.body(AsyncBody::Empty)
.map_err(new_request_build_error)?;
- let resp = self.client.send(req).await?;
-
- let status = resp.status();
-
- match status {
- StatusCode::OK => Ok(resp),
- _ => Err(parse_error(resp).await?),
- }
+ self.client.send(req).await
}
}
diff --git a/core/src/services/huggingface/lister.rs
b/core/src/services/huggingface/lister.rs
index 5cb591f82..e291dd3cd 100644
--- a/core/src/services/huggingface/lister.rs
+++ b/core/src/services/huggingface/lister.rs
@@ -81,7 +81,10 @@ impl oio::PageList for HuggingfaceLister {
status.path.clone()
};
- ctx.entries.push_back(oio::Entry::new(&path, meta));
+ ctx.entries.push_back(oio::Entry::new(
+ &build_rel_path(&self.core.root, &path),
+ meta,
+ ));
}
Ok(())
diff --git a/core/src/types/operator/builder.rs
b/core/src/types/operator/builder.rs
index 278a08e11..785b86229 100644
--- a/core/src/types/operator/builder.rs
+++ b/core/src/types/operator/builder.rs
@@ -193,6 +193,8 @@ impl Operator {
Scheme::Hdfs => Self::from_map::<services::Hdfs>(map)?.finish(),
#[cfg(feature = "services-http")]
Scheme::Http => Self::from_map::<services::Http>(map)?.finish(),
+ #[cfg(feature = "services-huggingface")]
+ Scheme::Huggingface =>
Self::from_map::<services::Huggingface>(map)?.finish(),
#[cfg(feature = "services-ipfs")]
Scheme::Ipfs => Self::from_map::<services::Ipfs>(map)?.finish(),
#[cfg(feature = "services-ipmfs")]