kou commented on code in PR #36835:
URL: https://github.com/apache/arrow/pull/36835#discussion_r1298078549
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -105,15 +111,41 @@ AzuriteEnv* GetAzuriteEnv() {
return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env);
}
-// Placeholder tests for file structure
+// Placeholder tests
// TODO: GH-18014 Remove once a proper test is added
-TEST(AzureFileSystem, InitialiseAzurite) {
+TEST(AzureFileSystem, UploadThenDownload) {
+ const std::string container_name = "sample-container";
+ const std::string blob_name = "sample-blob.txt";
+ const std::string blob_content = "Hello Azure!";
+
const std::string& account_name = GetAzuriteEnv()->account_name();
const std::string& account_key = GetAzuriteEnv()->account_key();
- EXPECT_EQ(account_name, "devstoreaccount1");
- EXPECT_EQ(account_key,
- "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
- "K1SZFPTOtr/KBHBeksoGMGw==");
+
+ auto credential =
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(
+ account_name, account_key);
+
+ auto service_client = Azure::Storage::Blobs::BlobServiceClient(
+ "http://127.0.0.1:10000/devstoreaccount1", credential);
+ auto container_client =
service_client.GetBlobContainerClient(container_name);
+ container_client.CreateIfNotExists();
+ auto blob_client = container_client.GetBlockBlobClient(blob_name);
+
+ std::vector<uint8_t> buffer(blob_content.begin(), blob_content.end());
+ blob_client.UploadFrom(buffer.data(), buffer.size());
+
+ std::vector<uint8_t> buffer2(blob_content.size());
+ blob_client.DownloadTo(buffer2.data(), buffer2.size());
+
+ EXPECT_EQ(std::string(buffer2.begin(), buffer2.end()), blob_content);
Review Comment:
Can we use more meaningful name?
```suggestion
std::vector<uint8_t> downloaded_content(blob_content.size());
blob_client.DownloadTo(downloaded_content.data(),
downloaded_content.size());
EXPECT_EQ(std::string(downloaded_content.begin(),
downloaded_content.end()), blob_content);
```
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -105,15 +111,41 @@ AzuriteEnv* GetAzuriteEnv() {
return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env);
}
-// Placeholder tests for file structure
+// Placeholder tests
// TODO: GH-18014 Remove once a proper test is added
-TEST(AzureFileSystem, InitialiseAzurite) {
+TEST(AzureFileSystem, UploadThenDownload) {
+ const std::string containerName = "sample-container";
+ const std::string blobName = "sample-blob.txt";
+ const std::string blobContent = "Hello Azure!";
+
const std::string& account_name = GetAzuriteEnv()->account_name();
const std::string& account_key = GetAzuriteEnv()->account_key();
- EXPECT_EQ(account_name, "devstoreaccount1");
- EXPECT_EQ(account_key,
- "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
- "K1SZFPTOtr/KBHBeksoGMGw==");
+
+ auto credential =
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(
+ account_name, account_key);
+
+ auto serviceClient = Azure::Storage::Blobs::BlobServiceClient(
+ "http://127.0.0.1:10000/devstoreaccount1", credential);
Review Comment:
Ah, sorry.
I wanted to say that can we use `account_name` "local variable" (to avoid
duplication)?
```suggestion
std::string("http://127.0.0.1:10000/") + account_name, credential);
```
##########
cpp/cmake_modules/FindAzure.cmake:
##########
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(Azure_FOUND)
+ return()
+endif()
+
+set(find_package_args)
+list(APPEND find_package_args
+ CONFIG
+ # Avoid finding cmake files in local copies of the Azure SDK for C++.
+ # e.g. the extracted copy from the previous build.
+ NO_CMAKE_PACKAGE_REGISTRY
Review Comment:
Oh. I could confirmed this. Thanks.
Azure SDK for C++ uses `export(PACKAGE)`
https://github.com/Azure/azure-sdk-for-cpp/blob/f6f26172018e9c682133ea061669c288712e90f0/cmake-modules/AzureVcpkg.cmake#L168
and it creates `~/.cmake/packages/*` by default:
https://cmake.org/cmake/help/latest/command/export.html#exporting-packages
How about disabling this explicitly instead of using
`NO_CMAKE_PACKAGE_REGISTRY` for `find_package`?
```diff
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 840e0d267..9055da2c7 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -928,7 +928,7 @@ set(EP_COMMON_CMAKE_ARGS
-DCMAKE_C_FLAGS_MISIZEREL=${EP_C_FLAGS_MINSIZEREL}
-DCMAKE_C_FLAGS_RELEASE=${EP_C_FLAGS_RELEASE}
-DCMAKE_C_FLAGS_RELWITHDEBINFO=${EP_C_FLAGS_RELWITHDEBINFO}
- -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=${CMAKE_EXPORT_NO_PACKAGE_REGISTRY}
+ -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON
-DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=${CMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY}
-DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}
```
##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -5039,6 +5055,114 @@ if(ARROW_S3)
endif()
endif()
+# ----------------------------------------------------------------------
+# Azure SDK for C++
+
+macro(build_azure_sdk)
+ message(STATUS "Building Azure SDK for C++ from source")
+
+ find_curl()
+ find_package(LibXml2 REQUIRED)
+ add_custom_target(azure_sdk_dependencies)
+
+ set(AZURE_SDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azure-sdk_ep-install")
+ set(AZURE_SDK_INCLUDE_DIR "${AZURE_SDK_PREFIX}/include")
+
+ set(AZURE_SDK_COMMON_CMAKE_ARGS
+ ${EP_COMMON_CMAKE_ARGS}
+ "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
+ "-DCMAKE_PREFIX_PATH=${AZURE_SDK_PREFIX}"
+ -DDISABLE_AZURE_CORE_OPENTELEMETRY=ON
+ -DWARNINGS_AS_ERRORS=OFF
+ -DVCPKG_OVERLAY_PORTS=${CMAKE_SOURCE_DIR}/overlays)
+
+ file(MAKE_DIRECTORY ${AZURE_SDK_INCLUDE_DIR})
+ set(AZURE_CORE_STATIC_LIBRARY
+
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-core${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ set(AZURE_IDENTITY_STATIC_LIBRARY
+
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-identity${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ set(AZURE_STORAGE_BLOBS_STATIC_LIBRARY
+
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-blobs${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ set(AZURE_STORAGE_COMMON_STATIC_LIBRARY
+
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-common${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ set(AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY
+
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-files-datalake${CMAKE_STATIC_LIBRARY_SUFFIX}"
+ )
+ externalproject_add(azure-sdk_ep
+ ${EP_COMMON_OPTIONS}
+ INSTALL_DIR ${AZURE_SDK_PREFIX}
+ URL ${ARROW_AZURE_SDK_URL}
+ URL_HASH
"SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}"
+ CMAKE_ARGS ${AZURE_SDK_COMMON_CMAKE_ARGS}
+ BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY}
+ ${AZURE_IDENTITY_STATIC_LIBRARY}
+ ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}
+ ${AZURE_STORAGE_COMMON_STATIC_LIBRARY}
+
${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}
+ DEPENDS azure_sdk_dependencies)
+ add_library(Azure::azure-core STATIC IMPORTED)
+ set_target_properties(Azure::azure-core
+ PROPERTIES IMPORTED_LOCATION
${AZURE_CORE_STATIC_LIBRARY}
+ INTERFACE_INCLUDE_DIRECTORIES
+ ${AZURE_SDK_INCLUDE_DIR})
+ set_property(TARGET Azure::azure-core
+ PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl OpenSSL::SSL)
+ add_dependencies(Azure::azure-core azure-sdk_ep)
Review Comment:
Hmm. It may be better that we use `FetchContent`
https://cmake.org/cmake/help/latest/module/FetchContent.html instead of
`ExternalProject` because we can avoid this manual CMake targets creation and
Azure SDK for C++ recommends it
https://github.com/Azure/azure-sdk-for-cpp#cmake-project--fetch-content .
```diff
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index fcf6d5609..ae320f5fc 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -975,6 +975,8 @@ else()
set(MAKE_BUILD_ARGS "-j${NPROC}")
endif()
+include(FetchContent)
+
# ----------------------------------------------------------------------
# Find pthreads
@@ -5058,92 +5060,27 @@ endif()
# ----------------------------------------------------------------------
# Azure SDK for C++
-macro(build_azure_sdk)
+function(build_azure_sdk)
message(STATUS "Building Azure SDK for C++ from source")
- find_curl()
- find_package(LibXml2 REQUIRED)
+ fetchcontent_declare(azure_sdk
+ URL ${ARROW_AZURE_SDK_URL}
+ URL_HASH
"SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}")
+ fetchcontent_getproperties(azure_sdk)
+ if(NOT azure_sdk_POPULATED)
+ fetchcontent_populate(azure_sdk)
+ set(BUILD_PERFORMANCE_TESTS FALSE)
+ set(BUILD_TESTING FALSE)
+ set(BUILD_WINDOWS_UWP TRUE)
+ set(CMAKE_EXPORT_NO_PACKAGE_REGISTRY TRUE)
+ set(DISABLE_AZURE_CORE_OPENTELEMETRY TRUE)
+ add_subdirectory(${azure_sdk_SOURCE_DIR} ${azure_sdk_BINARY_DIR}
EXCLUDE_FROM_ALL)
- set(AZURE_SDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azure-sdk_ep-install")
- set(AZURE_SDK_INCLUDE_DIR "${AZURE_SDK_PREFIX}/include")
+ endif()
- set(AZURE_SDK_COMMON_CMAKE_ARGS
- ${EP_COMMON_CMAKE_ARGS}
- "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
- "-DCMAKE_PREFIX_PATH=${AZURE_SDK_PREFIX}"
- -DDISABLE_AZURE_CORE_OPENTELEMETRY=ON
- -DWARNINGS_AS_ERRORS=OFF
- -DVCPKG_OVERLAY_PORTS=${CMAKE_SOURCE_DIR}/overlays)
-
- file(MAKE_DIRECTORY ${AZURE_SDK_INCLUDE_DIR})
- set(AZURE_CORE_STATIC_LIBRARY
-
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-core${CMAKE_STATIC_LIBRARY_SUFFIX}"
- )
- set(AZURE_IDENTITY_STATIC_LIBRARY
-
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-identity${CMAKE_STATIC_LIBRARY_SUFFIX}"
- )
- set(AZURE_STORAGE_BLOBS_STATIC_LIBRARY
-
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-blobs${CMAKE_STATIC_LIBRARY_SUFFIX}"
- )
- set(AZURE_STORAGE_COMMON_STATIC_LIBRARY
-
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-common${CMAKE_STATIC_LIBRARY_SUFFIX}"
- )
- set(AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY
-
"${AZURE_SDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-files-datalake${CMAKE_STATIC_LIBRARY_SUFFIX}"
- )
- externalproject_add(azure-sdk_ep
- ${EP_COMMON_OPTIONS}
- INSTALL_DIR ${AZURE_SDK_PREFIX}
- URL ${ARROW_AZURE_SDK_URL}
- URL_HASH
"SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}"
- CMAKE_ARGS ${AZURE_SDK_COMMON_CMAKE_ARGS}
- BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY}
- ${AZURE_IDENTITY_STATIC_LIBRARY}
- ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}
-
${AZURE_STORAGE_COMMON_STATIC_LIBRARY}
-
${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
- add_library(Azure::azure-core STATIC IMPORTED)
- set_target_properties(Azure::azure-core
- PROPERTIES IMPORTED_LOCATION
${AZURE_CORE_STATIC_LIBRARY}
- INTERFACE_INCLUDE_DIRECTORIES
- ${AZURE_SDK_INCLUDE_DIR})
- set_property(TARGET Azure::azure-core
- PROPERTY INTERFACE_LINK_LIBRARIES CURL::libcurl OpenSSL::SSL)
- add_dependencies(Azure::azure-core azure-sdk_ep)
-
- add_library(Azure::azure-identity STATIC IMPORTED)
- set_target_properties(Azure::azure-identity
- PROPERTIES IMPORTED_LOCATION
${AZURE_IDENTITY_STATIC_LIBRARY})
- set_property(TARGET Azure::azure-identity
- PROPERTY INTERFACE_LINK_LIBRARIES OpenSSL::Crypto
Azure::azure-core)
-
- add_library(Azure::azure-storage-common STATIC IMPORTED)
- set_target_properties(Azure::azure-storage-common
- PROPERTIES IMPORTED_LOCATION
${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
- set_property(TARGET Azure::azure-storage-common
- PROPERTY INTERFACE_LINK_LIBRARIES OpenSSL::SSL
OpenSSL::Crypto LibXml2::LibXml2 Azure::azure-core)
-
- add_library(Azure::azure-storage-blobs STATIC IMPORTED)
- set_target_properties(Azure::azure-storage-blobs
- PROPERTIES IMPORTED_LOCATION
${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
- set_property(TARGET Azure::azure-storage-blobs
- PROPERTY INTERFACE_LINK_LIBRARIES
Azure::azure-storage-common)
-
- add_library(Azure::azure-storage-files-datalake STATIC IMPORTED)
- set_target_properties(Azure::azure-storage-files-datalake
- PROPERTIES IMPORTED_LOCATION
${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
- set_property(TARGET Azure::azure-storage-files-datalake
- PROPERTY INTERFACE_LINK_LIBRARIES Azure::azure-storage-blobs)
-
- set(AZURE_SDK_VENDORED TRUE)
- set(AZURE_SDK_LIBRARIES)
- list(APPEND
- AZURE_SDK_LIBRARIES
- Azure::azure-core
- Azure::azure-identity
- Azure::azure-storage-blobs
- Azure::azure-storage-common
- Azure::azure-storage-files-datalake)
+ set(AZURE_SDK_VENDORED
+ TRUE
+ PARENT_SCOPE)
list(APPEND
ARROW_BUNDLED_STATIC_LIBS
Azure::azure-core
@@ -5151,14 +5088,19 @@ macro(build_azure_sdk)
Azure::azure-storage-blobs
Azure::azure-storage-common
Azure::azure-storage-files-datalake)
-
- set(AZURE_SDK_LINK_LIBRARIES ${AZURE_SDK_LIBRARIES})
-endmacro()
+ set(ARROW_BUNDLED_STATIC_LIBS
+ ${ARROW_BUNDLED_STATIC_LIBS}
+ PARENT_SCOPE)
+endfunction()
if(ARROW_WITH_AZURE_SDK)
resolve_dependency(Azure)
- message(STATUS "Found Azure SDK headers: ${AZURE_SDK_INCLUDE_DIR}")
- message(STATUS "Found Azure SDK libraries: ${AZURE_SDK_LINK_LIBRARIES}")
+ set(AZURE_SDK_LINK_LIBRARIES
+ Azure::azure-storage-files-datalake
+ Azure::azure-storage-common
+ Azure::azure-storage-blobs
+ Azure::azure-identity
+ Azure::azure-core)
endif()
# ----------------------------------------------------------------------
```
##########
cpp/overlays/openssl/vcpkg.json:
##########
@@ -0,0 +1,4 @@
+{
+ "name": "openssl",
+ "version": "1.0.2"
Review Comment:
It may caused by `-DVCPKG_OVERLAY_PORTS=${CMAKE_SOURCE_DIR}/overlays`.
`CMAKE_SOURCE_DIR` is the `cpp` directory in this context.
##########
ci/scripts/python_wheel_manylinux_build.sh:
##########
@@ -87,6 +88,8 @@ pushd /tmp/arrow-build
cmake \
-DARROW_ACERO=${ARROW_ACERO} \
+ -DARROW_AZURE=${ARROW_AZURE} \
+ -DAZURE_SDK_SOURCE=BUNDLED \
Review Comment:
Could you update vcpkg to avoid using vendored Azure SDK for C++ with vcpkg
build?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]