This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 530541907a Upgrade Azure Blob Storage to support Azure Data Lake
Storage gen2 (#4144)
530541907a is described below
commit 530541907ab8acf624e012585ee56d706a026651
Author: Sergio De Lorenzis <[email protected]>
AuthorDate: Fri Sep 6 11:45:10 2024 +0200
Upgrade Azure Blob Storage to support Azure Data Lake Storage gen2 (#4144)
* ADLS gen2 support
* Disable tests for now
---------
Co-authored-by: sramazzina <[email protected]>
Co-authored-by: Hans Van Akelyen <[email protected]>
---
.../integration-tests/integration-tests-azure.yaml | 15 -
.../0001-read_parquet_from_azure_azfs_scheme.hpl | 2 +-
.../0001-write_parquet_to_azure_azfs_scheme.hpl | 2 +-
.../0002-read_parquet_from_azure_azure_scheme.hpl | 10 +-
...0005-read_parquet_from_azure_multi_account.hpl} | 10 +-
... 0005-write_parquet_to_azure_multi_account.hpl} | 16 +-
.../azure/0006-create-file-from-csv.hpl | 548 +++++++++++++++++++++
.../azure/0007-files-manipulation.hwf | 139 ++++++
...re_azure_scheme.hpl => 0007-get-file-names.hpl} | 110 +++--
.../azure/datasets/golden-get-file-names.csv | 4 +
integration-tests/azure/dev-env-config.json | 5 +-
integration-tests/azure/disabled.txt | 17 +
integration-tests/azure/hop-config.json | 5 +-
...ain-0001-write-and-read-parquet-azfs-scheme.hwf | 54 +-
integration-tests/azure/main-0003-move-files.hwf | 14 +-
...-0005-write-and-read-parquet-multi-account.hwf} | 38 +-
.../azure/main-0006-smoke-test-multi-account.hwf | 286 +++++++++++
...scheme.hwf => main-0007-files-manipulation.hwf} | 172 ++++---
.../metadata/AzureConnectionDefinition/azit.json | 7 +
.../azure/metadata/dataset/Get File Names.json | 16 +
.../unit-test/0007-get-file-names UNIT.json | 28 ++
plugins/tech/azure/pom.xml | 57 ++-
plugins/tech/azure/src/assembly/assembly.xml | 7 -
.../apache/hop/vfs/azure/AzureFileNameParser.java | 27 +-
.../org/apache/hop/vfs/azure/AzureFileObject.java | 370 +++++++-------
.../apache/hop/vfs/azure/AzureFileProvider.java | 61 ++-
.../org/apache/hop/vfs/azure/AzureFileSystem.java | 18 +-
.../org/apache/hop/vfs/azure/BlobInputStream.java | 8 +-
.../hop/vfs/azure/AzureFileNameParserTest.java | 130 +++--
29 files changed, 1681 insertions(+), 495 deletions(-)
diff --git a/docker/integration-tests/integration-tests-azure.yaml
b/docker/integration-tests/integration-tests-azure.yaml
index 1738f93257..b7b2dd9f12 100644
--- a/docker/integration-tests/integration-tests-azure.yaml
+++ b/docker/integration-tests/integration-tests-azure.yaml
@@ -20,18 +20,3 @@ services:
extends:
file: integration-tests-base.yaml
service: integration_test
- depends_on:
- azurite-blob:
- condition: service_healthy
- links:
- - azurite-blob
-
- azurite-blob:
- ports:
- - "10000"
- image: mcr.microsoft.com/azure-storage/azurite:3.29.0
- command: azurite-blob --blobHost 0.0.0.0
- healthcheck:
- test: nc 127.0.0.1 10000 -z
- interval: 1s
- retries: 30
diff --git
a/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
b/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
index dc5c4e3134..ac8d940d9d 100644
--- a/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
+++ b/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
@@ -85,7 +85,7 @@ limitations under the License.
<group/>
<length>-1</length>
<name>filename</name>
-
<nullif>azfs://devstoreaccount1/mycontainer/azfs-scheme.parquet</nullif>
+ <nullif>azfs://knowbistorage/mycontainer/azfs-scheme.parquet</nullif>
<precision>-1</precision>
<set_empty_string>N</set_empty_string>
<type>String</type>
diff --git
a/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
b/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
index 5c68d2074b..9fb28a2d98 100644
--- a/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
+++ b/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
@@ -220,7 +220,7 @@ limitations under the License.
<target_field>industry</target_field>
</field>
</fields>
-
<filename_base>azfs://devstoreaccount1/mycontainer/azfs-scheme</filename_base>
+
<filename_base>azfs://${AZURE_ACCOUNT}/mycontainer/azfs-scheme</filename_base>
<filename_create_parent_folders>Y</filename_create_parent_folders>
<filename_datetime_format>yyyyMMdd-HHmmss</filename_datetime_format>
<filename_ext>parquet</filename_ext>
diff --git
a/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
b/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
index 39657d356c..4f3352de6c 100644
--- a/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
+++ b/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
@@ -44,7 +44,7 @@ limitations under the License.
<enabled>Y</enabled>
</hop>
<hop>
- <from>Generate rows Azurite (azfs scheme)</from>
+ <from>Generate rows Azurite (azure scheme)</from>
<to>Parquet File Input</to>
<enabled>Y</enabled>
</hop>
@@ -67,7 +67,7 @@ limitations under the License.
</GUI>
</transform>
<transform>
- <name>Generate rows Azurite (azfs scheme)</name>
+ <name>Generate rows Azurite (azure scheme)</name>
<type>RowGenerator</type>
<description/>
<distribute>Y</distribute>
@@ -79,9 +79,13 @@ limitations under the License.
</partitioning>
<fields>
<field>
+ <currency/>
+ <decimal/>
+ <format/>
+ <group/>
<length>-1</length>
<name>filename</name>
-
<nullif>azfs://devstoreaccount1/mycontainer/azfs-scheme.parquet</nullif>
+ <nullif>azure:///mycontainer/azure-scheme.parquet</nullif>
<precision>-1</precision>
<set_empty_string>N</set_empty_string>
<type>String</type>
diff --git
a/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
b/integration-tests/azure/0005-read_parquet_from_azure_multi_account.hpl
similarity index 93%
copy from integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
copy to integration-tests/azure/0005-read_parquet_from_azure_multi_account.hpl
index dc5c4e3134..071883da9e 100644
--- a/integration-tests/azure/0001-read_parquet_from_azure_azfs_scheme.hpl
+++ b/integration-tests/azure/0005-read_parquet_from_azure_multi_account.hpl
@@ -19,7 +19,7 @@ limitations under the License.
-->
<pipeline>
<info>
- <name>0001-read_parquet_from_azure_azfs_scheme</name>
+ <name>0005-read_parquet_from_azure_multi_account</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
@@ -44,7 +44,7 @@ limitations under the License.
<enabled>Y</enabled>
</hop>
<hop>
- <from>Generate rows Azurite (azfs scheme)</from>
+ <from>Generate rows Azurite</from>
<to>Parquet File Input</to>
<enabled>Y</enabled>
</hop>
@@ -67,7 +67,7 @@ limitations under the License.
</GUI>
</transform>
<transform>
- <name>Generate rows Azurite (azfs scheme)</name>
+ <name>Generate rows Azurite</name>
<type>RowGenerator</type>
<description/>
<distribute>Y</distribute>
@@ -85,7 +85,7 @@ limitations under the License.
<group/>
<length>-1</length>
<name>filename</name>
-
<nullif>azfs://devstoreaccount1/mycontainer/azfs-scheme.parquet</nullif>
+ <nullif>azit:///mycontainer/azit-scheme.parquet</nullif>
<precision>-1</precision>
<set_empty_string>N</set_empty_string>
<type>String</type>
@@ -138,7 +138,7 @@ limitations under the License.
<filename_field>filename</filename_field>
<attributes/>
<GUI>
- <xloc>688</xloc>
+ <xloc>680</xloc>
<yloc>160</yloc>
</GUI>
</transform>
diff --git
a/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
b/integration-tests/azure/0005-write_parquet_to_azure_multi_account.hpl
similarity index 95%
copy from integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
copy to integration-tests/azure/0005-write_parquet_to_azure_multi_account.hpl
index 5c68d2074b..b0b0bd30d5 100644
--- a/integration-tests/azure/0001-write_parquet_to_azure_azfs_scheme.hpl
+++ b/integration-tests/azure/0005-write_parquet_to_azure_multi_account.hpl
@@ -19,7 +19,7 @@ limitations under the License.
-->
<pipeline>
<info>
- <name>0001-write_parquet_to_azure_azfs_scheme</name>
+ <name>0005-write_parquet_to_azure_multi_account</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
@@ -41,7 +41,7 @@ limitations under the License.
<order>
<hop>
<from>Read employees data from dataset</from>
- <to>Write to Azurite a Parquet file (azfs)</to>
+ <to>Write to Azure a Parquet file</to>
<enabled>Y</enabled>
</hop>
</order>
@@ -184,12 +184,12 @@ limitations under the License.
<sizeFieldName/>
<attributes/>
<GUI>
- <xloc>448</xloc>
- <yloc>160</yloc>
+ <xloc>240</xloc>
+ <yloc>144</yloc>
</GUI>
</transform>
<transform>
- <name>Write to Azurite a Parquet file (azfs)</name>
+ <name>Write to Azure a Parquet file</name>
<type>ParquetFileOutput</type>
<description/>
<distribute>Y</distribute>
@@ -220,7 +220,7 @@ limitations under the License.
<target_field>industry</target_field>
</field>
</fields>
-
<filename_base>azfs://devstoreaccount1/mycontainer/azfs-scheme</filename_base>
+ <filename_base>azit://mycontainer/azit-scheme</filename_base>
<filename_create_parent_folders>Y</filename_create_parent_folders>
<filename_datetime_format>yyyyMMdd-HHmmss</filename_datetime_format>
<filename_ext>parquet</filename_ext>
@@ -234,8 +234,8 @@ limitations under the License.
<version>2.0</version>
<attributes/>
<GUI>
- <xloc>736</xloc>
- <yloc>160</yloc>
+ <xloc>528</xloc>
+ <yloc>144</yloc>
</GUI>
</transform>
<transform_error_handling>
diff --git a/integration-tests/azure/0006-create-file-from-csv.hpl
b/integration-tests/azure/0006-create-file-from-csv.hpl
new file mode 100644
index 0000000000..a66b0dc70b
--- /dev/null
+++ b/integration-tests/azure/0006-create-file-from-csv.hpl
@@ -0,0 +1,548 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<pipeline>
+ <info>
+ <name>0006-create-file-from-csv</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <pipeline_version/>
+ <pipeline_type>Normal</pipeline_type>
+ <parameters>
+ </parameters>
+ <capture_transform_performance>N</capture_transform_performance>
+
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
+
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
+ <created_user>-</created_user>
+ <created_date>2024/08/27 13:00:54.747</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2024/08/27 13:00:54.747</modified_date>
+ </info>
+ <notepads>
+ </notepads>
+ <order>
+ <hop>
+ <from>Read from csv</from>
+ <to>Output to container root</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Read from csv</from>
+ <to>Output to container subfolder - level 1</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Read from csv</from>
+ <to>Output to container subfolder - level 2</to>
+ <enabled>Y</enabled>
+ </hop>
+ </order>
+ <transform>
+ <name>Output to container root</name>
+ <type>TextFileOutput</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <schema_definition/>
+ <separator>;</separator>
+ <enclosure>"</enclosure>
+ <enclosure_forced>N</enclosure_forced>
+ <enclosure_fix_disabled>N</enclosure_fix_disabled>
+ <header>Y</header>
+ <footer>N</footer>
+ <format>UNIX</format>
+ <compression>None</compression>
+ <encoding>UTF-8</encoding>
+ <endedLine/>
+ <fileNameInField>N</fileNameInField>
+ <fileNameField/>
+ <create_parent_folder>Y</create_parent_folder>
+ <file>
+ <name>azit://mycontainer/${output_file}</name>
+ <servlet_output>N</servlet_output>
+ <do_not_open_new_file_init>Y</do_not_open_new_file_init>
+ <extention>csv</extention>
+ <append>N</append>
+ <split>N</split>
+ <haspartno>N</haspartno>
+ <add_date>N</add_date>
+ <add_time>N</add_time>
+ <SpecifyFormat>N</SpecifyFormat>
+ <date_time_format/>
+ <add_to_result_filenames>Y</add_to_result_filenames>
+ <pad>N</pad>
+ <fast_dump>N</fast_dump>
+ <splitevery/>
+ </file>
+ <fields>
+ <field>
+ <name>id</name>
+ <type>Integer</type>
+ <format>#</format>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>15</length>
+ <precision>0</precision>
+ </field>
+ <field>
+ <name>name</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>birthday</name>
+ <type>Date</type>
+ <format>yyyy/MM/dd HH:mm:ss.SSS</format>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>-1</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>quote</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>instrument</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>50</length>
+ <precision>-1</precision>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>608</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Output to container subfolder - level 1</name>
+ <type>TextFileOutput</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <schema_definition/>
+ <separator>;</separator>
+ <enclosure>"</enclosure>
+ <enclosure_forced>N</enclosure_forced>
+ <enclosure_fix_disabled>N</enclosure_fix_disabled>
+ <header>Y</header>
+ <footer>N</footer>
+ <format>UNIX</format>
+ <compression>None</compression>
+ <encoding>UTF-8</encoding>
+ <endedLine/>
+ <fileNameInField>N</fileNameInField>
+ <fileNameField/>
+ <create_parent_folder>Y</create_parent_folder>
+ <file>
+ <name>azit://mycontainer/lvl1/${output_file}</name>
+ <servlet_output>N</servlet_output>
+ <do_not_open_new_file_init>Y</do_not_open_new_file_init>
+ <extention>csv</extention>
+ <append>N</append>
+ <split>N</split>
+ <haspartno>N</haspartno>
+ <add_date>N</add_date>
+ <add_time>N</add_time>
+ <SpecifyFormat>N</SpecifyFormat>
+ <date_time_format/>
+ <add_to_result_filenames>Y</add_to_result_filenames>
+ <pad>N</pad>
+ <fast_dump>N</fast_dump>
+ <splitevery/>
+ </file>
+ <fields>
+ <field>
+ <name>id</name>
+ <type>Integer</type>
+ <format>#</format>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>15</length>
+ <precision>0</precision>
+ </field>
+ <field>
+ <name>name</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>birthday</name>
+ <type>Date</type>
+ <format>yyyy/MM/dd HH:mm:ss.SSS</format>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>-1</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>quote</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>instrument</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>50</length>
+ <precision>-1</precision>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>704</xloc>
+ <yloc>352</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Output to container subfolder - level 2</name>
+ <type>TextFileOutput</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <schema_definition/>
+ <separator>;</separator>
+ <enclosure>"</enclosure>
+ <enclosure_forced>N</enclosure_forced>
+ <enclosure_fix_disabled>N</enclosure_fix_disabled>
+ <header>Y</header>
+ <footer>N</footer>
+ <format>UNIX</format>
+ <compression>None</compression>
+ <encoding>UTF-8</encoding>
+ <endedLine/>
+ <fileNameInField>N</fileNameInField>
+ <fileNameField/>
+ <create_parent_folder>Y</create_parent_folder>
+ <file>
+ <name>azit://mycontainer/lvl1/lvl2/${output_file}</name>
+ <servlet_output>N</servlet_output>
+ <do_not_open_new_file_init>Y</do_not_open_new_file_init>
+ <extention>csv</extention>
+ <append>N</append>
+ <split>N</split>
+ <haspartno>N</haspartno>
+ <add_date>N</add_date>
+ <add_time>N</add_time>
+ <SpecifyFormat>N</SpecifyFormat>
+ <date_time_format/>
+ <add_to_result_filenames>Y</add_to_result_filenames>
+ <pad>N</pad>
+ <fast_dump>N</fast_dump>
+ <splitevery/>
+ </file>
+ <fields>
+ <field>
+ <name>id</name>
+ <type>Integer</type>
+ <format>#</format>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>15</length>
+ <precision>0</precision>
+ </field>
+ <field>
+ <name>name</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>birthday</name>
+ <type>Date</type>
+ <format>yyyy/MM/dd HH:mm:ss.SSS</format>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>-1</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>quote</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>255</length>
+ <precision>-1</precision>
+ </field>
+ <field>
+ <name>instrument</name>
+ <type>String</type>
+ <format/>
+ <currency/>
+ <decimal/>
+ <group/>
+ <nullif/>
+ <trim_type>none</trim_type>
+ <length>50</length>
+ <precision>-1</precision>
+ </field>
+ </fields>
+ <attributes/>
+ <GUI>
+ <xloc>512</xloc>
+ <yloc>496</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Read from csv</name>
+ <type>TextFileInput2</type>
+ <description/>
+ <distribute>N</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <accept_filenames>N</accept_filenames>
+ <passing_through_fields>N</passing_through_fields>
+ <accept_field/>
+ <accept_transform_name/>
+ <separator>,</separator>
+ <enclosure>"</enclosure>
+ <enclosure_breaks>N</enclosure_breaks>
+ <escapechar/>
+ <header>Y</header>
+ <nr_headerlines>1</nr_headerlines>
+ <footer>N</footer>
+ <nr_footerlines>1</nr_footerlines>
+ <line_wrapped>N</line_wrapped>
+ <nr_wraps>1</nr_wraps>
+ <layout_paged>N</layout_paged>
+ <nr_lines_per_page>80</nr_lines_per_page>
+ <nr_lines_doc_header>0</nr_lines_doc_header>
+ <noempty>Y</noempty>
+ <include>N</include>
+ <include_field/>
+ <rownum>N</rownum>
+ <rownumByFile>N</rownumByFile>
+ <rownum_field/>
+ <format>mixed</format>
+ <encoding/>
+ <length>Characters</length>
+ <add_to_result_filenames>Y</add_to_result_filenames>
+ <file>
+ <name>${PROJECT_HOME}/datasets/ingest-300.csv</name>
+ <filemask/>
+ <exclude_filemask/>
+ <file_required>N</file_required>
+ <include_subfolders>N</include_subfolders>
+ <type>CSV</type>
+ <compression>None</compression>
+ </file>
+ <filters>
+ </filters>
+ <fields>
+ <field>
+ <name>id</name>
+ <type>Integer</type>
+ <format>#</format>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif>-</nullif>
+ <ifnull/>
+ <position>-1</position>
+ <length>15</length>
+ <precision>0</precision>
+ <trim_type>none</trim_type>
+ <repeat>N</repeat>
+ </field>
+ <field>
+ <name>name</name>
+ <type>String</type>
+ <format/>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif>-</nullif>
+ <ifnull/>
+ <position>-1</position>
+ <length>255</length>
+ <precision>-1</precision>
+ <trim_type>none</trim_type>
+ <repeat>N</repeat>
+ </field>
+ <field>
+ <name>birthday</name>
+ <type>Date</type>
+ <format>yyyy/MM/dd HH:mm:ss.SSS</format>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif>-</nullif>
+ <ifnull/>
+ <position>-1</position>
+ <length>-1</length>
+ <precision>-1</precision>
+ <trim_type>none</trim_type>
+ <repeat>N</repeat>
+ </field>
+ <field>
+ <name>quote</name>
+ <type>String</type>
+ <format/>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif>-</nullif>
+ <ifnull/>
+ <position>-1</position>
+ <length>255</length>
+ <precision>-1</precision>
+ <trim_type>none</trim_type>
+ <repeat>N</repeat>
+ </field>
+ <field>
+ <name>instrument</name>
+ <type>String</type>
+ <format/>
+ <currency>$</currency>
+ <decimal>.</decimal>
+ <group>,</group>
+ <nullif>-</nullif>
+ <ifnull/>
+ <position>-1</position>
+ <length>50</length>
+ <precision>-1</precision>
+ <trim_type>none</trim_type>
+ <repeat>N</repeat>
+ </field>
+ </fields>
+ <limit>0</limit>
+ <error_ignored>N</error_ignored>
+ <skip_bad_files>N</skip_bad_files>
+ <file_error_field/>
+ <file_error_message_field/>
+ <error_line_skipped>N</error_line_skipped>
+ <error_count_field/>
+ <error_fields_field/>
+ <error_text_field/>
+ <schema_definition/>
+ <bad_line_files_destination_directory/>
+ <bad_line_files_extension>warning</bad_line_files_extension>
+ <error_line_files_destination_directory/>
+ <error_line_files_extension>error</error_line_files_extension>
+ <line_number_files_destination_directory/>
+ <line_number_files_extension>line</line_number_files_extension>
+ <date_format_lenient>Y</date_format_lenient>
+ <date_format_locale>en_US</date_format_locale>
+ <shortFileFieldName/>
+ <pathFieldName/>
+ <hiddenFieldName/>
+ <lastModificationTimeFieldName/>
+ <uriNameFieldName/>
+ <rootUriNameFieldName/>
+ <extensionFieldName/>
+ <sizeFieldName/>
+ <attributes/>
+ <GUI>
+ <xloc>368</xloc>
+ <yloc>176</yloc>
+ </GUI>
+ </transform>
+ <transform_error_handling>
+ </transform_error_handling>
+ <attributes/>
+</pipeline>
diff --git a/integration-tests/azure/0007-files-manipulation.hwf
b/integration-tests/azure/0007-files-manipulation.hwf
new file mode 100644
index 0000000000..12cf00bf71
--- /dev/null
+++ b/integration-tests/azure/0007-files-manipulation.hwf
@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<workflow>
+ <name>0007-files-manipulation</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <workflow_version/>
+ <created_user>-</created_user>
+ <created_date>2024/08/27 14:56:12.718</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2024/08/27 14:56:12.718</modified_date>
+ <parameters>
+ </parameters>
+ <actions>
+ <action>
+ <name>Start</name>
+ <description/>
+ <type>SPECIAL</type>
+ <attributes/>
+ <DayOfMonth>1</DayOfMonth>
+ <hour>12</hour>
+ <intervalMinutes>60</intervalMinutes>
+ <intervalSeconds>0</intervalSeconds>
+ <minutes>0</minutes>
+ <repeat>N</repeat>
+ <schedulerType>0</schedulerType>
+ <weekDay>1</weekDay>
+ <parallel>N</parallel>
+ <xloc>112</xloc>
+ <yloc>192</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Copy files to azit subfolder</name>
+ <description/>
+ <type>COPY_FILES</type>
+ <attributes/>
+ <copy_empty_folders>Y</copy_empty_folders>
+ <arg_from_previous>N</arg_from_previous>
+ <overwrite_files>N</overwrite_files>
+ <include_subfolders>N</include_subfolders>
+ <remove_source_files>N</remove_source_files>
+ <add_result_filesname>N</add_result_filesname>
+ <destination_is_a_file>N</destination_is_a_file>
+ <create_destination_folder>Y</create_destination_folder>
+ <fields>
+ <field>
+
<source_filefolder>EMPTY_SOURCE_URL-0-${PROJECT_HOME}/datasets/</source_filefolder>
+
<source_configuration_name>LOCAL-SOURCE-FILE-0</source_configuration_name>
+
<destination_filefolder>EMPTY_DEST_URL-0-azit:/0006/copy/staging</destination_filefolder>
+ <destination_configuration_name/>
+ <wildcard>ingest.*\.csv</wildcard>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>528</xloc>
+ <yloc>192</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Delete files</name>
+ <description/>
+ <type>DELETE_FILES</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <include_subfolders>N</include_subfolders>
+ <fields>
+ <field>
+ <name>azit://0006/copy/staging</name>
+ <filemask>ingest.*\.csv</filemask>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>1232</xloc>
+ <yloc>192</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Checks if files exist</name>
+ <description/>
+ <type>FILES_EXIST</type>
+ <attributes/>
+ <filename/>
+ <fields>
+ <field>
+ <name>azit://0006/copy/staging/ingest-3.csv</name>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>768</xloc>
+ <yloc>192</yloc>
+ <attributes_hac/>
+ </action>
+ </actions>
+ <hops>
+ <hop>
+ <from>Start</from>
+ <to>Copy files to azit subfolder</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ <hop>
+ <from>Copy files to azit subfolder</from>
+ <to>Checks if files exist</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>Checks if files exist</from>
+ <to>Delete files</to>
+ <enabled>N</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ </hops>
+ <notepads>
+ </notepads>
+ <attributes/>
+</workflow>
diff --git
a/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
b/integration-tests/azure/0007-get-file-names.hpl
similarity index 64%
copy from integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
copy to integration-tests/azure/0007-get-file-names.hpl
index 39657d356c..c9d00f5da3 100644
--- a/integration-tests/azure/0002-read_parquet_from_azure_azure_scheme.hpl
+++ b/integration-tests/azure/0007-get-file-names.hpl
@@ -19,7 +19,7 @@ limitations under the License.
-->
<pipeline>
<info>
- <name>0002-read_parquet_from_azure_azure_scheme</name>
+ <name>0007-get-file-names</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
@@ -31,21 +31,26 @@ limitations under the License.
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
<created_user>-</created_user>
- <created_date>2024/02/28 17:03:34.971</created_date>
+ <created_date>2024/08/27 15:45:30.287</created_date>
<modified_user>-</modified_user>
- <modified_date>2024/02/28 17:03:34.971</modified_date>
+ <modified_date>2024/08/27 15:45:30.287</modified_date>
</info>
<notepads>
</notepads>
<order>
<hop>
- <from>Parquet File Input</from>
- <to>Dummy (do nothing)</to>
+ <from>Sparkle</from>
+ <to>Get file names</to>
<enabled>Y</enabled>
</hop>
<hop>
- <from>Generate rows Azurite (azfs scheme)</from>
- <to>Parquet File Input</to>
+ <from>Get file names</from>
+ <to>Select values</to>
+ <enabled>Y</enabled>
+ </hop>
+ <hop>
+ <from>Select values</from>
+ <to>Dummy (do nothing)</to>
<enabled>Y</enabled>
</hop>
</order>
@@ -63,12 +68,44 @@ limitations under the License.
<attributes/>
<GUI>
<xloc>928</xloc>
- <yloc>160</yloc>
+ <yloc>128</yloc>
</GUI>
</transform>
<transform>
- <name>Generate rows Azurite (azfs scheme)</name>
- <type>RowGenerator</type>
+ <name>Get file names</name>
+ <type>GetFileNames</type>
+ <description/>
+ <distribute>Y</distribute>
+ <custom_distribution/>
+ <copies>1</copies>
+ <partitioning>
+ <method>none</method>
+ <schema_name/>
+ </partitioning>
+ <doNotFailIfNoFile>N</doNotFailIfNoFile>
+ <dynamic_include_subfolders>N</dynamic_include_subfolders>
+ <file>
+ <file_required>N</file_required>
+ <include_subfolders>N</include_subfolders>
+ <name>azit://mycontainer/copy/staging</name>
+ </file>
+ <filefield>N</filefield>
+ <filter>
+ <filterfiletype>all_files</filterfiletype>
+ </filter>
+ <isaddresult>Y</isaddresult>
+ <limit>0</limit>
+ <raiseAnExceptionIfNoFile>N</raiseAnExceptionIfNoFile>
+ <rownum>N</rownum>
+ <attributes/>
+ <GUI>
+ <xloc>528</xloc>
+ <yloc>128</yloc>
+ </GUI>
+ </transform>
+ <transform>
+ <name>Select values</name>
+ <type>SelectValues</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
@@ -79,28 +116,19 @@ limitations under the License.
</partitioning>
<fields>
<field>
- <length>-1</length>
- <name>filename</name>
-
<nullif>azfs://devstoreaccount1/mycontainer/azfs-scheme.parquet</nullif>
- <precision>-1</precision>
- <set_empty_string>N</set_empty_string>
- <type>String</type>
+ <name>short_filename</name>
</field>
+ <select_unspecified>N</select_unspecified>
</fields>
- <interval_in_ms>5000</interval_in_ms>
- <last_time_field>FiveSecondsAgo</last_time_field>
- <limit>1</limit>
- <never_ending>N</never_ending>
- <row_time_field>now</row_time_field>
<attributes/>
<GUI>
- <xloc>432</xloc>
- <yloc>160</yloc>
+ <xloc>736</xloc>
+ <yloc>128</yloc>
</GUI>
</transform>
<transform>
- <name>Parquet File Input</name>
- <type>ParquetFileInput</type>
+ <name>Sparkle</name>
+ <type>RowGenerator</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
@@ -110,32 +138,16 @@ limitations under the License.
<schema_name/>
</partitioning>
<fields>
- <field>
- <source_field>id</source_field>
- <target_field>id</target_field>
- <target_type>Integer</target_type>
- </field>
- <field>
- <source_field>name</source_field>
- <target_field>name</target_field>
- <target_type>String</target_type>
- </field>
- <field>
- <source_field>company</source_field>
- <target_field>company</target_field>
- <target_type>String</target_type>
- </field>
- <field>
- <source_field>industry</source_field>
- <target_field>industry</target_field>
- <target_type>String</target_type>
- </field>
- </fields>
- <filename_field>filename</filename_field>
+</fields>
+ <interval_in_ms>5000</interval_in_ms>
+ <last_time_field>FiveSecondsAgo</last_time_field>
+ <limit>1</limit>
+ <never_ending>N</never_ending>
+ <row_time_field>now</row_time_field>
<attributes/>
<GUI>
- <xloc>680</xloc>
- <yloc>160</yloc>
+ <xloc>272</xloc>
+ <yloc>128</yloc>
</GUI>
</transform>
<transform_error_handling>
diff --git a/integration-tests/azure/datasets/golden-get-file-names.csv
b/integration-tests/azure/datasets/golden-get-file-names.csv
new file mode 100644
index 0000000000..c534e773a9
--- /dev/null
+++ b/integration-tests/azure/datasets/golden-get-file-names.csv
@@ -0,0 +1,4 @@
+short_filename
+ingest-3.csv
+ingest-30k.csv
+ingest-300.csv
diff --git a/integration-tests/azure/dev-env-config.json
b/integration-tests/azure/dev-env-config.json
index 6367db7992..427957b7f0 100644
--- a/integration-tests/azure/dev-env-config.json
+++ b/integration-tests/azure/dev-env-config.json
@@ -1,3 +1,6 @@
{
- "variables" : []
+ "variables" : [
+ { "name": "AZURE_ACCOUNT", "value": "<<<REPLACE_AZURE_ACCOUNT>>>" },
+ { "name": "AZURE_KEY", "value": "<<<REPLACE_AZURE_KEY>>>" }
+ ]
}
diff --git a/integration-tests/azure/disabled.txt
b/integration-tests/azure/disabled.txt
new file mode 100644
index 0000000000..7560e5b3bc
--- /dev/null
+++ b/integration-tests/azure/disabled.txt
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
\ No newline at end of file
diff --git a/integration-tests/azure/hop-config.json
b/integration-tests/azure/hop-config.json
index be6315624b..cde970cca2 100644
--- a/integration-tests/azure/hop-config.json
+++ b/integration-tests/azure/hop-config.json
@@ -288,8 +288,7 @@
"projectLifecycles": []
},
"azure" : {
- "account" : "devstoreaccount1",
- "key" :
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
- "emulatorUrl" : "http://azurite-blob:10000"
+ "account" : "<<<REPLACE_AZURE_ACCOUNT>>>",
+ "key" : "<<<REPLACE_AZURE_KEY>>>"
}
}
diff --git
a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
b/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
index 946d9ee0b4..d5c9aad251 100644
--- a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
+++ b/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
@@ -96,8 +96,6 @@ limitations under the License.
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
<filename>${PROJECT_HOME}/0001-read_parquet_from_azure_azfs_scheme.hpl</filename>
- <logext/>
- <logfile/>
<loglevel>Basic</loglevel>
<parameters>
<pass_all_parameters>Y</pass_all_parameters>
@@ -133,6 +131,42 @@ limitations under the License.
<yloc>416</yloc>
<attributes_hac/>
</action>
+ <action>
+ <name>Delete ALL files</name>
+ <description/>
+ <type>DELETE_FILES</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <include_subfolders>Y</include_subfolders>
+ <fields>
+ <field>
+ <name>azure:///mycontainer/</name>
+ <filemask>.*</filemask>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>160</xloc>
+ <yloc>96</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Delete folders</name>
+ <description/>
+ <type>DELETE_FOLDERS</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <success_condition>success_if_no_errors</success_condition>
+ <limit_folders>10</limit_folders>
+ <fields>
+ <field>
+ <name>azit://mycontainer/lvl1/lvl2</name>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>240</xloc>
+ <yloc>80</yloc>
+ <attributes_hac/>
+ </action>
</actions>
<hops>
<hop>
@@ -172,11 +206,25 @@ limitations under the License.
</hop>
<hop>
<from>Start</from>
- <to>Write parquet file with azfs:// scheme</to>
+ <to>Delete ALL files</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>Y</unconditional>
</hop>
+ <hop>
+ <from>Delete ALL files</from>
+ <to>Delete folders</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>Delete folders</from>
+ <to>Write parquet file with azfs:// scheme</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
</hops>
<notepads>
</notepads>
diff --git a/integration-tests/azure/main-0003-move-files.hwf
b/integration-tests/azure/main-0003-move-files.hwf
index 02d72fd4d2..eb50e3f82f 100644
--- a/integration-tests/azure/main-0003-move-files.hwf
+++ b/integration-tests/azure/main-0003-move-files.hwf
@@ -79,13 +79,13 @@ limitations under the License.
<simulate>N</simulate>
<fields>
<field>
-
<source_filefolder>azure:///mycontainer/azure-scheme.parquet</source_filefolder>
-
<destination_filefolder>azure:///mycontainer/azure-scheme-mod.parquet</destination_filefolder>
+
<source_filefolder>azfs://${AZURE_ACCOUNT}/mycontainer/azfs-scheme.parquet</source_filefolder>
+
<destination_filefolder>azfs://${AZURE_ACCOUNT}/mycontainer/azfs-scheme-mod.parquet</destination_filefolder>
<wildcard/>
</field>
<field>
-
<source_filefolder>azfs:///devstoreaccount1/mycontainer/azfs-scheme.parquet</source_filefolder>
-
<destination_filefolder>azfs:///devstoreaccount1/mycontainer/azfs-scheme-mod.parquet</destination_filefolder>
+
<source_filefolder>azure:///mycontainer/azure-scheme.parquet</source_filefolder>
+
<destination_filefolder>azure:///mycontainer/azure-scheme-mod.parquet</destination_filefolder>
<wildcard/>
</field>
</fields>
@@ -115,7 +115,7 @@ limitations under the License.
<name>azure:///mycontainer/azure-scheme-mod.parquet</name>
</field>
<field>
-
<name>azfs:///devstoreaccount1/mycontainer/azfs-scheme-mod.parquet</name>
+
<name>azfs:///${AZURE_ACCOUNT}/mycontainer/azfs-scheme-mod.parquet</name>
</field>
</fields>
<parallel>N</parallel>
@@ -134,7 +134,7 @@ limitations under the License.
<name>azure:///mycontainer/azure-scheme.parquet</name>
</field>
<field>
- <name>azfs:///devstoreaccount1/mycontainer/azfs-scheme.parquet</name>
+ <name>azfs:///${AZURE_ACCOUNT}/mycontainer/azfs-scheme.parquet</name>
</field>
</fields>
<parallel>N</parallel>
@@ -155,7 +155,7 @@ limitations under the License.
<filemask/>
</field>
<field>
-
<name>azfs:///devstoreaccount1/mycontainer/azfs-scheme-mod.parquet</name>
+
<name>azfs:///${AZURE_ACCOUNT}/mycontainer/azfs-scheme-mod.parquet</name>
<filemask/>
</field>
</fields>
diff --git
a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
b/integration-tests/azure/main-0005-write-and-read-parquet-multi-account.hwf
similarity index 83%
copy from
integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
copy to
integration-tests/azure/main-0005-write-and-read-parquet-multi-account.hwf
index 946d9ee0b4..3044b0f11a 100644
--- a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
+++ b/integration-tests/azure/main-0005-write-and-read-parquet-multi-account.hwf
@@ -18,7 +18,7 @@ limitations under the License.
-->
<workflow>
- <name>main-0001-write-and-read-parquet-azfs-scheme</name>
+ <name>main-0005-write-and-read-parquet-multi-account</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
@@ -44,12 +44,12 @@ limitations under the License.
<schedulerType>0</schedulerType>
<weekDay>1</weekDay>
<parallel>N</parallel>
- <xloc>48</xloc>
+ <xloc>112</xloc>
<yloc>112</yloc>
<attributes_hac/>
</action>
<action>
- <name>Write parquet file with azfs:// scheme</name>
+ <name>Write parquet file with Multi account credentials</name>
<description/>
<type>PIPELINE</type>
<attributes/>
@@ -59,7 +59,9 @@ limitations under the License.
<clear_rows>N</clear_rows>
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
-
<filename>${PROJECT_HOME}/0001-write_parquet_to_azure_azfs_scheme.hpl</filename>
+
<filename>${PROJECT_HOME}/0005-write_parquet_to_azure_multi_account.hpl</filename>
+ <logext/>
+ <logfile/>
<loglevel>Basic</loglevel>
<parameters>
<pass_all_parameters>Y</pass_all_parameters>
@@ -70,7 +72,7 @@ limitations under the License.
<set_logfile>N</set_logfile>
<wait_until_finished>Y</wait_until_finished>
<parallel>N</parallel>
- <xloc>320</xloc>
+ <xloc>368</xloc>
<yloc>112</yloc>
<attributes_hac/>
</action>
@@ -80,12 +82,12 @@ limitations under the License.
<type>SUCCESS</type>
<attributes/>
<parallel>N</parallel>
- <xloc>944</xloc>
+ <xloc>1056</xloc>
<yloc>112</yloc>
<attributes_hac/>
</action>
<action>
- <name>Read Parquet file with from Azure with azfs:// scheme</name>
+ <name>Read parquet from Azure with multi account credentials</name>
<description/>
<type>PIPELINE</type>
<attributes/>
@@ -95,9 +97,7 @@ limitations under the License.
<clear_rows>N</clear_rows>
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
-
<filename>${PROJECT_HOME}/0001-read_parquet_from_azure_azfs_scheme.hpl</filename>
- <logext/>
- <logfile/>
+
<filename>${PROJECT_HOME}/0005-read_parquet_from_azure_multi_account.hpl</filename>
<loglevel>Basic</loglevel>
<parameters>
<pass_all_parameters>Y</pass_all_parameters>
@@ -108,7 +108,7 @@ limitations under the License.
<set_logfile>N</set_logfile>
<wait_until_finished>Y</wait_until_finished>
<parallel>N</parallel>
- <xloc>640</xloc>
+ <xloc>752</xloc>
<yloc>112</yloc>
<attributes_hac/>
</action>
@@ -118,7 +118,7 @@ limitations under the License.
<type>DUMMY</type>
<attributes/>
<parallel>N</parallel>
- <xloc>496</xloc>
+ <xloc>544</xloc>
<yloc>288</yloc>
<attributes_hac/>
</action>
@@ -129,35 +129,35 @@ limitations under the License.
<attributes/>
<always_log_rows>N</always_log_rows>
<parallel>N</parallel>
- <xloc>496</xloc>
+ <xloc>544</xloc>
<yloc>416</yloc>
<attributes_hac/>
</action>
</actions>
<hops>
<hop>
- <from>Write parquet file with azfs:// scheme</from>
- <to>Read Parquet file with from Azure with azfs:// scheme</to>
+ <from>Write parquet file with Multi account credentials</from>
+ <to>Read parquet from Azure with multi account credentials</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
- <from>Read Parquet file with from Azure with azfs:// scheme</from>
+ <from>Read parquet from Azure with multi account credentials</from>
<to>Success</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
- <from>Write parquet file with azfs:// scheme</from>
+ <from>Write parquet file with Multi account credentials</from>
<to>Dummy</to>
<enabled>Y</enabled>
<evaluation>N</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
- <from>Read Parquet file with from Azure with azfs:// scheme</from>
+ <from>Read parquet from Azure with multi account credentials</from>
<to>Dummy</to>
<enabled>Y</enabled>
<evaluation>N</evaluation>
@@ -172,7 +172,7 @@ limitations under the License.
</hop>
<hop>
<from>Start</from>
- <to>Write parquet file with azfs:// scheme</to>
+ <to>Write parquet file with Multi account credentials</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>Y</unconditional>
diff --git a/integration-tests/azure/main-0006-smoke-test-multi-account.hwf
b/integration-tests/azure/main-0006-smoke-test-multi-account.hwf
new file mode 100644
index 0000000000..e3fa1d0ff4
--- /dev/null
+++ b/integration-tests/azure/main-0006-smoke-test-multi-account.hwf
@@ -0,0 +1,286 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-->
+<workflow>
+ <name>main-0006-smoke-test-multi-account</name>
+ <name_sync_with_filename>Y</name_sync_with_filename>
+ <description/>
+ <extended_description/>
+ <workflow_version/>
+ <created_user>-</created_user>
+ <created_date>2024/08/27 12:52:44.175</created_date>
+ <modified_user>-</modified_user>
+ <modified_date>2024/08/27 12:52:44.175</modified_date>
+ <parameters>
+ </parameters>
+ <actions>
+ <action>
+ <name>Start</name>
+ <description/>
+ <type>SPECIAL</type>
+ <attributes/>
+ <DayOfMonth>1</DayOfMonth>
+ <hour>12</hour>
+ <intervalMinutes>60</intervalMinutes>
+ <intervalSeconds>0</intervalSeconds>
+ <minutes>0</minutes>
+ <repeat>N</repeat>
+ <schedulerType>0</schedulerType>
+ <weekDay>1</weekDay>
+ <parallel>N</parallel>
+ <xloc>32</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>0006-create-file-from-csv.hpl</name>
+ <description/>
+ <type>PIPELINE</type>
+ <attributes/>
+ <add_date>N</add_date>
+ <add_time>N</add_time>
+ <clear_files>N</clear_files>
+ <clear_rows>N</clear_rows>
+ <create_parent_folder>N</create_parent_folder>
+ <exec_per_row>N</exec_per_row>
+ <filename>${PROJECT_HOME}/0006-create-file-from-csv.hpl</filename>
+ <loglevel>Basic</loglevel>
+ <parameters>
+ <pass_all_parameters>Y</pass_all_parameters>
+ </parameters>
+ <params_from_previous>N</params_from_previous>
+ <run_configuration>local</run_configuration>
+ <set_append_logfile>N</set_append_logfile>
+ <set_logfile>N</set_logfile>
+ <wait_until_finished>Y</wait_until_finished>
+ <parallel>N</parallel>
+ <xloc>512</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>File exists</name>
+ <description/>
+ <type>FILE_EXISTS</type>
+ <attributes/>
+ <filename>azit://mycontainer/${output_file}.csv</filename>
+ <parallel>N</parallel>
+ <xloc>768</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Set Output file name</name>
+ <description/>
+ <type>SET_VARIABLES</type>
+ <attributes/>
+ <fields>
+ <field>
+ <variable_name>output_file</variable_name>
+ <variable_type>CURRENT_WORKFLOW</variable_type>
+ <variable_value>output-0006</variable_value>
+ </field>
+ </fields>
+ <file_variable_type>CURRENT_WORKFLOW</file_variable_type>
+ <replacevars>Y</replacevars>
+ <parallel>N</parallel>
+ <xloc>176</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>File exists level1</name>
+ <description/>
+ <type>FILE_EXISTS</type>
+ <attributes/>
+ <filename>azit://mycontainer/lvl1/${output_file}.csv</filename>
+ <parallel>N</parallel>
+ <xloc>912</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Success</name>
+ <description/>
+ <type>SUCCESS</type>
+ <attributes/>
+ <parallel>N</parallel>
+ <xloc>1984</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>File exists level 2</name>
+ <description/>
+ <type>FILE_EXISTS</type>
+ <attributes/>
+ <filename>azit://mycontainer/lvl1/lvl2/${output_file}.csv</filename>
+ <parallel>N</parallel>
+ <xloc>1040</xloc>
+ <yloc>256</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Delete files</name>
+ <description/>
+ <type>DELETE_FILES</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <include_subfolders>Y</include_subfolders>
+ <fields>
+ <field>
+ <name>azit://mycontainer/${output_file}.csv</name>
+ <filemask/>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>1248</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Delete folders</name>
+ <description/>
+ <type>DELETE_FOLDERS</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <success_condition>success_if_no_errors</success_condition>
+ <limit_folders>10</limit_folders>
+ <fields>
+ <field>
+ <name>azit://mycontainer/lvl1</name>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>1600</xloc>
+ <yloc>240</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Aborting</name>
+ <description/>
+ <type>DUMMY</type>
+ <attributes/>
+ <parallel>Y</parallel>
+ <xloc>1040</xloc>
+ <yloc>544</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Abort workflow</name>
+ <description/>
+ <type>ABORT</type>
+ <attributes/>
+ <always_log_rows>N</always_log_rows>
+ <parallel>N</parallel>
+ <xloc>1248</xloc>
+ <yloc>576</yloc>
+ <attributes_hac/>
+ </action>
+ </actions>
+ <hops>
+ <hop>
+ <from>0006-create-file-from-csv.hpl</from>
+ <to>File exists</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>Start</from>
+ <to>Set Output file name</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ <hop>
+ <from>Set Output file name</from>
+ <to>0006-create-file-from-csv.hpl</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ <hop>
+ <from>Delete folders</from>
+ <to>Success</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>File exists level 2</from>
+ <to>Aborting</to>
+ <enabled>Y</enabled>
+ <evaluation>N</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>Aborting</from>
+ <to>Abort workflow</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>Y</unconditional>
+ </hop>
+ <hop>
+ <from>File exists</from>
+ <to>File exists level1</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>File exists level1</from>
+ <to>File exists level 2</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>File exists level1</from>
+ <to>Aborting</to>
+ <enabled>Y</enabled>
+ <evaluation>N</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>File exists</from>
+ <to>Aborting</to>
+ <enabled>Y</enabled>
+ <evaluation>N</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>File exists level 2</from>
+ <to>Delete files</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ <hop>
+ <from>Delete files</from>
+ <to>Delete folders</to>
+ <enabled>Y</enabled>
+ <evaluation>Y</evaluation>
+ <unconditional>N</unconditional>
+ </hop>
+ </hops>
+ <notepads>
+ </notepads>
+ <attributes/>
+</workflow>
diff --git
a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
b/integration-tests/azure/main-0007-files-manipulation.hwf
similarity index 53%
copy from
integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
copy to integration-tests/azure/main-0007-files-manipulation.hwf
index 946d9ee0b4..8cd1d171cd 100644
--- a/integration-tests/azure/main-0001-write-and-read-parquet-azfs-scheme.hwf
+++ b/integration-tests/azure/main-0007-files-manipulation.hwf
@@ -18,15 +18,16 @@ limitations under the License.
-->
<workflow>
- <name>main-0001-write-and-read-parquet-azfs-scheme</name>
+ <name>main-0007-files-manipulation</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<workflow_version/>
+ <workflow_status>0</workflow_status>
<created_user>-</created_user>
- <created_date>2024/03/28 10:33:42.564</created_date>
+ <created_date>2024/08/27 14:56:12.718</created_date>
<modified_user>-</modified_user>
- <modified_date>2024/03/28 10:33:42.564</modified_date>
+ <modified_date>2024/08/27 14:56:12.718</modified_date>
<parameters>
</parameters>
<actions>
@@ -44,48 +45,73 @@ limitations under the License.
<schedulerType>0</schedulerType>
<weekDay>1</weekDay>
<parallel>N</parallel>
- <xloc>48</xloc>
- <yloc>112</yloc>
+ <xloc>112</xloc>
+ <yloc>192</yloc>
<attributes_hac/>
</action>
<action>
- <name>Write parquet file with azfs:// scheme</name>
+ <name>Copy files to azit subfolder</name>
<description/>
- <type>PIPELINE</type>
+ <type>COPY_FILES</type>
<attributes/>
- <add_date>N</add_date>
- <add_time>N</add_time>
- <clear_files>N</clear_files>
- <clear_rows>N</clear_rows>
- <create_parent_folder>N</create_parent_folder>
- <exec_per_row>N</exec_per_row>
-
<filename>${PROJECT_HOME}/0001-write_parquet_to_azure_azfs_scheme.hpl</filename>
- <loglevel>Basic</loglevel>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <params_from_previous>N</params_from_previous>
- <run_configuration>local</run_configuration>
- <set_append_logfile>N</set_append_logfile>
- <set_logfile>N</set_logfile>
- <wait_until_finished>Y</wait_until_finished>
+ <copy_empty_folders>Y</copy_empty_folders>
+ <arg_from_previous>N</arg_from_previous>
+ <overwrite_files>N</overwrite_files>
+ <include_subfolders>N</include_subfolders>
+ <remove_source_files>N</remove_source_files>
+ <add_result_filesname>N</add_result_filesname>
+ <destination_is_a_file>N</destination_is_a_file>
+ <create_destination_folder>Y</create_destination_folder>
+ <fields>
+ <field>
+
<source_filefolder>EMPTY_SOURCE_URL-0-${PROJECT_HOME}/datasets/</source_filefolder>
+
<source_configuration_name>LOCAL-SOURCE-FILE-0</source_configuration_name>
+
<destination_filefolder>EMPTY_DEST_URL-0-azit:/mycontainer/copy/staging</destination_filefolder>
+ <destination_configuration_name/>
+ <wildcard>ingest.*\.csv</wildcard>
+ </field>
+ </fields>
<parallel>N</parallel>
- <xloc>320</xloc>
- <yloc>112</yloc>
+ <xloc>528</xloc>
+ <yloc>192</yloc>
<attributes_hac/>
</action>
<action>
- <name>Success</name>
+ <name>Delete files</name>
<description/>
- <type>SUCCESS</type>
+ <type>DELETE_FILES</type>
+ <attributes/>
+ <arg_from_previous>N</arg_from_previous>
+ <include_subfolders>N</include_subfolders>
+ <fields>
+ <field>
+ <name>azit://mycontainer/copy/staging</name>
+ <filemask>ingest.*\.csv</filemask>
+ </field>
+ </fields>
+ <parallel>N</parallel>
+ <xloc>1104</xloc>
+ <yloc>176</yloc>
+ <attributes_hac/>
+ </action>
+ <action>
+ <name>Checks if files exist</name>
+ <description/>
+ <type>FILES_EXIST</type>
<attributes/>
+ <filename/>
+ <fields>
+ <field>
+ <name>azit://mycontainer/copy/staging/ingest-3.csv</name>
+ </field>
+ </fields>
<parallel>N</parallel>
- <xloc>944</xloc>
- <yloc>112</yloc>
+ <xloc>752</xloc>
+ <yloc>176</yloc>
<attributes_hac/>
</action>
<action>
- <name>Read Parquet file with from Azure with azfs:// scheme</name>
+ <name>0007-get-file-names.hpl</name>
<description/>
<type>PIPELINE</type>
<attributes/>
@@ -95,7 +121,7 @@ limitations under the License.
<clear_rows>N</clear_rows>
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
-
<filename>${PROJECT_HOME}/0001-read_parquet_from_azure_azfs_scheme.hpl</filename>
+ <filename>${PROJECT_HOME}/0007-get-file-names.hpl</filename>
<logext/>
<logfile/>
<loglevel>Basic</loglevel>
@@ -108,77 +134,83 @@ limitations under the License.
<set_logfile>N</set_logfile>
<wait_until_finished>Y</wait_until_finished>
<parallel>N</parallel>
- <xloc>640</xloc>
- <yloc>112</yloc>
+ <xloc>912</xloc>
+ <yloc>176</yloc>
<attributes_hac/>
</action>
<action>
- <name>Dummy</name>
- <description/>
- <type>DUMMY</type>
- <attributes/>
- <parallel>N</parallel>
- <xloc>496</xloc>
- <yloc>288</yloc>
- <attributes_hac/>
- </action>
- <action>
- <name>Abort workflow</name>
+ <name>Success</name>
<description/>
- <type>ABORT</type>
+ <type>SUCCESS</type>
<attributes/>
- <always_log_rows>N</always_log_rows>
<parallel>N</parallel>
- <xloc>496</xloc>
- <yloc>416</yloc>
+ <xloc>1264</xloc>
+ <yloc>176</yloc>
<attributes_hac/>
</action>
</actions>
<hops>
<hop>
- <from>Write parquet file with azfs:// scheme</from>
- <to>Read Parquet file with from Azure with azfs:// scheme</to>
+ <from>Start</from>
+ <to>Copy files to azit subfolder</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
- <unconditional>N</unconditional>
+ <unconditional>Y</unconditional>
</hop>
<hop>
- <from>Read Parquet file with from Azure with azfs:// scheme</from>
- <to>Success</to>
+ <from>Copy files to azit subfolder</from>
+ <to>Checks if files exist</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
- <from>Write parquet file with azfs:// scheme</from>
- <to>Dummy</to>
+ <from>Checks if files exist</from>
+ <to>0007-get-file-names.hpl</to>
<enabled>Y</enabled>
- <evaluation>N</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>Read Parquet file with from Azure with azfs:// scheme</from>
- <to>Dummy</to>
- <enabled>Y</enabled>
- <evaluation>N</evaluation>
+ <evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
- <from>Dummy</from>
- <to>Abort workflow</to>
+ <from>0007-get-file-names.hpl</from>
+ <to>Delete files</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
- <unconditional>Y</unconditional>
+ <unconditional>N</unconditional>
</hop>
<hop>
- <from>Start</from>
- <to>Write parquet file with azfs:// scheme</to>
+ <from>Delete files</from>
+ <to>Success</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
- <unconditional>Y</unconditional>
+ <unconditional>N</unconditional>
</hop>
</hops>
<notepads>
</notepads>
- <attributes/>
+ <attributes>
+ <group>
+ <name>debug_plugin</name>
+ <attribute>
+ <key>Pipeline : action_log_result_rows</key>
+ <value>N</value>
+ </attribute>
+ <attribute>
+ <key>Pipeline : action_log_result_files</key>
+ <value>N</value>
+ </attribute>
+ <attribute>
+ <key>Pipeline : action_log_level</key>
+ <value>Debug</value>
+ </attribute>
+ <attribute>
+ <key>Pipeline : action_log_result</key>
+ <value>N</value>
+ </attribute>
+ <attribute>
+ <key>Pipeline : action_log_variables</key>
+ <value>N</value>
+ </attribute>
+ </group>
+ </attributes>
</workflow>
diff --git
a/integration-tests/azure/metadata/AzureConnectionDefinition/azit.json
b/integration-tests/azure/metadata/AzureConnectionDefinition/azit.json
new file mode 100644
index 0000000000..197e414f54
--- /dev/null
+++ b/integration-tests/azure/metadata/AzureConnectionDefinition/azit.json
@@ -0,0 +1,7 @@
+{
+ "name": "azit",
+ "description": "Azure Integration Tests",
+ "storageAccountEndpoint": "",
+ "storageAccountName": "${AZURE_ACCOUNT}",
+ "storageAccountKey": "${AZURE_KEY}"
+}
\ No newline at end of file
diff --git a/integration-tests/azure/metadata/dataset/Get File Names.json
b/integration-tests/azure/metadata/dataset/Get File Names.json
new file mode 100644
index 0000000000..8b9e2531cc
--- /dev/null
+++ b/integration-tests/azure/metadata/dataset/Get File Names.json
@@ -0,0 +1,16 @@
+{
+ "base_filename": "golden-get-file-names.csv",
+ "name": "Get File Names",
+ "description": "",
+ "dataset_fields": [
+ {
+ "field_comment": "",
+ "field_length": -1,
+ "field_type": 2,
+ "field_precision": -1,
+ "field_name": "short_filename",
+ "field_format": ""
+ }
+ ],
+ "folder_name": ""
+}
\ No newline at end of file
diff --git a/integration-tests/azure/metadata/unit-test/0007-get-file-names
UNIT.json b/integration-tests/azure/metadata/unit-test/0007-get-file-names
UNIT.json
new file mode 100644
index 0000000000..7e0df4ac23
--- /dev/null
+++ b/integration-tests/azure/metadata/unit-test/0007-get-file-names UNIT.json
@@ -0,0 +1,28 @@
+{
+ "variableValues": [],
+ "database_replacements": [],
+ "autoOpening": true,
+ "basePath": "",
+ "golden_data_sets": [
+ {
+ "field_mappings": [
+ {
+ "transform_field": "short_filename",
+ "data_set_field": "short_filename"
+ }
+ ],
+ "field_order": [
+ "short_filename"
+ ],
+ "data_set_name": "Get File Names",
+ "transform_name": "Dummy (do nothing)"
+ }
+ ],
+ "input_data_sets": [],
+ "name": "0007-get-file-names UNIT",
+ "description": "",
+ "persist_filename": "",
+ "trans_test_tweaks": [],
+ "pipeline_filename": "./0007-get-file-names.hpl",
+ "test_type": "UNIT_TEST"
+}
\ No newline at end of file
diff --git a/plugins/tech/azure/pom.xml b/plugins/tech/azure/pom.xml
index 21f262b259..fcf4e90e08 100755
--- a/plugins/tech/azure/pom.xml
+++ b/plugins/tech/azure/pom.xml
@@ -31,11 +31,18 @@
<properties>
<azure-eventhubs.version>3.3.0</azure-eventhubs.version>
- <azure-storage.version>8.6.6</azure-storage.version>
+ <azure-sdk-bom.version>1.2.26</azure-sdk-bom.version>
</properties>
<dependencyManagement>
<dependencies>
+ <dependency>
+ <groupId>com.azure</groupId>
+ <artifactId>azure-sdk-bom</artifactId>
+ <version>${azure-sdk-bom.version}</version>
+ <type>pom</type>
+ <scope>import</scope>
+ </dependency>
<dependency>
<groupId>org.apache.hop</groupId>
<artifactId>hop-libs</artifactId>
@@ -47,6 +54,28 @@
</dependencyManagement>
<dependencies>
+ <dependency>
+ <groupId>com.azure</groupId>
+ <artifactId>azure-storage-blob</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>*</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.azure</groupId>
+ <artifactId>azure-storage-file-datalake</artifactId>
+ </dependency>
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-eventhubs</artifactId>
@@ -89,46 +118,22 @@
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
- <artifactId>commons-lang3</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.google.code.gson</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
- <groupId>com.google.guava</groupId>
- <artifactId>*</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>com.microsoft.azure</groupId>
- <artifactId>azure-storage</artifactId>
- <version>${azure-storage.version}</version>
- <exclusions>
- <exclusion>
- <groupId>com.fasterxml.jackson.core</groupId>
+ <groupId>com.google.code.gson</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>*</artifactId>
</exclusion>
- <exclusion>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- <scope>provided</scope>
- </dependency>
</dependencies>
</project>
diff --git a/plugins/tech/azure/src/assembly/assembly.xml
b/plugins/tech/azure/src/assembly/assembly.xml
index 40ab8dcedf..b182a1a6d3 100644
--- a/plugins/tech/azure/src/assembly/assembly.xml
+++ b/plugins/tech/azure/src/assembly/assembly.xml
@@ -52,12 +52,5 @@
</excludes>
<outputDirectory>plugins/tech/azure/lib</outputDirectory>
</dependencySet>
- <dependencySet>
- <scope>provided</scope>
- <includes>
- <include>org.apache.commons:commons-lang3:jar</include>
- </includes>
- <outputDirectory>lib/core</outputDirectory>
- </dependencySet>
</dependencySets>
</assembly>
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileNameParser.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileNameParser.java
index 734af4391f..23ded6e4f0 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileNameParser.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileNameParser.java
@@ -18,7 +18,7 @@
package org.apache.hop.vfs.azure;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.io.FilenameUtils;
import org.apache.commons.vfs2.FileName;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
@@ -73,10 +73,7 @@ public class AzureFileNameParser extends HostFileNameParser {
String path =
normalizedUri.substring(normalizedUri.indexOf('/', 1),
normalizedUri.length());
AzureConfig azureConfig = AzureConfigSingleton.getConfig();
- absPath =
- StringUtils.isNotBlank(azureConfig.getAccount())
- ? path.replace("/" + azureConfig.getAccount(), "")
- : path;
+ absPath = stripFirstFolder(path);
}
}
return new AzureFileName(scheme, absPath, fileType);
@@ -89,4 +86,24 @@ public class AzureFileNameParser extends HostFileNameParser {
return FileType.FILE;
}
}
+
+ public static String stripFirstFolder(String path) {
+ // Normalize the path to ensure correct separators
+ String normalizedPath = FilenameUtils.normalizeNoEndSeparator(path, true);
+
+ // Remove the leading '/'
+ String withoutLeadingSlash =
+ normalizedPath.startsWith("/") ? normalizedPath.substring(1) :
normalizedPath;
+
+ // Find the index of the first '/'
+ int index = withoutLeadingSlash.indexOf("/");
+
+ // If '/' is found, return the substring after the first folder
+ if (index != -1) {
+ return "/" + withoutLeadingSlash.substring(index + 1);
+ } else {
+ // If there's no '/', return an empty string or the original path
+ return "";
+ }
+ }
}
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
index a047abaf15..c3381a9ef1 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileObject.java
@@ -18,39 +18,38 @@
package org.apache.hop.vfs.azure;
-import com.microsoft.azure.storage.StorageException;
-import com.microsoft.azure.storage.blob.CloudBlob;
-import com.microsoft.azure.storage.blob.CloudBlobClient;
-import com.microsoft.azure.storage.blob.CloudBlobContainer;
-import com.microsoft.azure.storage.blob.CloudBlobDirectory;
-import com.microsoft.azure.storage.blob.CloudBlockBlob;
-import com.microsoft.azure.storage.blob.ListBlobItem;
+import com.azure.core.http.rest.PagedIterable;
+import com.azure.core.util.Context;
+import com.azure.storage.file.datalake.DataLakeDirectoryClient;
+import com.azure.storage.file.datalake.DataLakeFileClient;
+import com.azure.storage.file.datalake.DataLakeFileSystemClient;
+import com.azure.storage.file.datalake.DataLakeServiceClient;
+import com.azure.storage.file.datalake.models.ListFileSystemsOptions;
+import com.azure.storage.file.datalake.models.ListPathsOptions;
+import com.azure.storage.file.datalake.models.PathItem;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
+import java.time.Duration;
import java.util.ArrayList;
-import java.util.Date;
import java.util.List;
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.provider.AbstractFileName;
import org.apache.commons.vfs2.provider.AbstractFileObject;
-import org.apache.commons.vfs2.provider.UriParser;
+import org.apache.hop.core.exception.HopException;
public class AzureFileObject extends AbstractFileObject<AzureFileSystem> {
public class BlockBlobOutputStream extends OutputStream {
- private final CloudBlockBlob bb;
private final OutputStream outputStream;
long written = 0;
- public BlockBlobOutputStream(CloudBlockBlob bb, OutputStream outputStream)
{
- this.bb = bb;
+ public BlockBlobOutputStream(OutputStream outputStream) {
this.outputStream = outputStream;
}
@@ -88,138 +87,151 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
}
}
- private final CloudBlobClient service;
+ private final DataLakeServiceClient service;
private boolean attached = false;
private long size;
private long lastModified;
private FileType type;
private List<String> children = null;
- private CloudBlobContainer container;
- private String containerPath;
- private CloudBlob cloudBlob;
- private CloudBlobDirectory cloudDir;
+ private DataLakeFileClient dataLakeFileClient;
+ private String currentFilePath;
+ private PathItem pathItem;
+ private PathItem dirPathItem;
private final String markerFileName = ".cvfs.temp";
private OutputStream blobOutputStream;
+ private String containerName;
public AzureFileObject(
- AbstractFileName fileName, AzureFileSystem fileSystem, CloudBlobClient
service)
+ AbstractFileName fileName, AzureFileSystem fileSystem,
DataLakeServiceClient service)
throws FileSystemException {
super(fileName, fileSystem);
this.service = service;
}
@Override
- protected void doAttach() throws URISyntaxException, StorageException {
- if (!attached) {
- if (getName().getPath().equals("/")) {
- children = new ArrayList<>();
- for (CloudBlobContainer container : service.listContainers()) {
- children.add(container.getName());
- }
- size = children.size();
- lastModified = 0;
+ protected void doAttach() throws HopException {
+ if (attached) {
+ return;
+ }
+ containerName = ((AzureFileName) getName()).getContainer();
+ String fullPath = ((AzureFileName) getName()).getPath();
+ DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+ ListPathsOptions lpo = new ListPathsOptions();
+ children = new ArrayList<>();
+ if (isFileSystemRoot(fullPath)) {
+ service
+ .listFileSystems()
+ .iterator()
+ .forEachRemaining(
+ item -> {
+ children.add(StringUtils.substringAfterLast(item.getName(),
"/"));
+ });
+
+ size = children.size();
+ lastModified = 0;
+ type = FileType.FOLDER;
+ dataLakeFileClient = null;
+ currentFilePath = "";
+ } else if (isContainer(fullPath)) {
+ if (containerExists()) {
+ type = FileType.FOLDER;
+ fileSystemClient.listPaths().forEach(pi -> children.add(pi.getName()));
+ } else {
+ type = FileType.IMAGINARY;
+ throw new HopException("Container does not exist: " + fullPath);
+ }
+ } else {
+ // this is a subdirectory or file or a container/file system
+ currentFilePath = ((AzureFileName) getName()).getPathAfterContainer();
+ if (StringUtils.isEmpty(currentFilePath)) {
type = FileType.FOLDER;
- container = null;
- containerPath = "";
+ fileSystemClient.listPaths().forEach(pi -> children.add(pi.getName()));
} else {
- String containerName = ((AzureFileName) getName()).getContainer();
- container = service.getContainerReference(containerName);
- containerPath = ((AzureFileName) getName()).getPathAfterContainer();
- String thisPath = "/" + containerName + containerPath;
- if (container.exists()) {
+ lpo.setPath(currentFilePath);
+ DataLakeDirectoryClient directoryClient =
+ fileSystemClient.getDirectoryClient(currentFilePath);
+ final Boolean exists = directoryClient.exists();
+
+ final Boolean isDirectory =
+ exists
+ && fileSystemClient
+ .getDirectoryClient(currentFilePath)
+ .getProperties()
+ .isDirectory();
+ final Boolean isFile = !isDirectory;
+ if (exists && isDirectory) {
children = new ArrayList<>();
- if (containerPath.equals("")) {
- if (container.exists()) {
- for (ListBlobItem item : container.listBlobs()) {
- StringBuilder path = new
StringBuilder(item.getUri().getPath());
- UriParser.extractFirstElement(path);
- children.add(path.substring(1));
- }
- type = FileType.FOLDER;
- } else {
- type = FileType.IMAGINARY;
- }
- lastModified = 0;
- size = children.size();
- } else {
- /*
- * Look in the parent path for this filename AND and
- * direct descendents
- */
- cloudBlob = null;
- cloudDir = null;
- String relpath =
- removeLeadingSlash(
- ((AzureFileName)
(getName().getParent())).getPathAfterContainer());
- for (ListBlobItem item :
- relpath.equals("") ? container.listBlobs() :
container.listBlobs(relpath + "/")) {
- String itemPath = removeTrailingSlash(item.getUri().getPath());
- if (pathsMatch(itemPath, thisPath)) {
- if (item instanceof CloudBlob) {
- cloudBlob = (CloudBlob) item;
- } else {
- cloudDir = (CloudBlobDirectory) item;
- for (ListBlobItem blob : cloudDir.listBlobs()) {
- URI blobUri = blob.getUri();
- String path = blobUri.getPath();
- while (path.endsWith("/")) path = path.substring(0,
path.length() - 1);
- int idx = path.lastIndexOf('/');
- if (idx != -1) path = path.substring(idx + 1);
- children.add(path);
- }
- }
- break;
- }
- }
- if (cloudBlob != null) {
- type = FileType.FILE;
- size = cloudBlob.getProperties().getLength();
- if (cloudBlob.getMetadata().containsKey("ActualLength")) {
- size =
Long.parseLong(cloudBlob.getMetadata().get("ActualLength"));
- }
- String disp = cloudBlob.getProperties().getContentDisposition();
- if (disp != null && disp.startsWith("vfs ; length=\"")) {
- size = Long.parseLong(disp.substring(14, disp.length() - 1));
- }
- Date lastModified2 = cloudBlob.getProperties().getLastModified();
- lastModified = lastModified2 == null ? 0 :
lastModified2.getTime();
- } else if (cloudDir != null) {
- type = FileType.FOLDER;
- size = children.size();
- lastModified = 0;
- } else {
- lastModified = 0;
- type = FileType.IMAGINARY;
- size = 0;
- }
- }
+ PagedIterable<PathItem> pathItems = fileSystemClient.listPaths(lpo,
null);
+ pathItems.forEach(
+ item -> {
+ children.add(
+ StringUtils.removeStart(
+
item.getName().replace(StringUtils.removeStart(currentFilePath, "/"), ""),
+ "/"));
+ });
+ size = children.size();
+ type = FileType.FOLDER;
+ lastModified =
directoryClient.getProperties().getLastModified().toEpochSecond();
+ } else if (exists && isFile) {
+ dataLakeFileClient = fileSystemClient.getFileClient(currentFilePath);
+ size = dataLakeFileClient.getProperties().getFileSize();
+ type = FileType.FILE;
+ lastModified =
dataLakeFileClient.getProperties().getLastModified().toEpochSecond();
} else {
lastModified = 0;
type = FileType.IMAGINARY;
size = 0;
- cloudBlob = null;
- cloudDir = null;
+ pathItem = null;
+ dirPathItem = null;
}
}
}
}
- private boolean pathsMatch(String path, String currentPath) {
- return path.replace("/" + ((AzureFileSystem)
getFileSystem()).getAccount(), "")
- .equals(currentPath);
+ private boolean containerExists() {
+ String containerName = ((AzureFileName) getName()).getContainer();
+ ListFileSystemsOptions fileSystemsOptions = new ListFileSystemsOptions();
+ fileSystemsOptions.setPrefix(containerName);
+
+ final DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+
+ try {
+ return fileSystemClient.existsWithResponse(Duration.ofSeconds(5),
Context.NONE).getValue();
+ } catch (IllegalStateException e) {
+ return false;
+ }
+ }
+
+ private boolean isContainer(String fullPath) {
+ final String container = ((AzureFileName) getName()).getContainer();
+ final String fullPathWithoutTralilingSlash =
StringUtils.removeStart(fullPath, "/");
+ if (StringUtils.equals(container, fullPathWithoutTralilingSlash)
+ && !StringUtils.isEmpty(fullPathWithoutTralilingSlash)) {
+ return true;
+ }
+ return false;
+ }
+
+ private static boolean isFileSystemRoot(String fullPath) {
+ return "/".equals(fullPath);
+ }
+
+ private String getFilePath(String filename) {
+ String filePath = filename.substring(filename.indexOf('/'),
filename.length());
+ return filePath;
}
@Override
- protected void doDetach() throws Exception {
+ protected void doDetach() {
if (this.attached) {
this.attached = false;
this.children = null;
this.size = 0;
this.type = null;
- this.container = null;
- this.containerPath = null;
- this.cloudBlob = null;
- this.cloudDir = null;
+ this.dataLakeFileClient = null;
+ this.currentFilePath = null;
+ this.pathItem = null;
+ this.dirPathItem = null;
}
}
@@ -233,30 +245,48 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
return getName().getBaseName().equals(markerFileName);
}
+ /**
+ * Check if the file can be renamed to the new file This is not a feature
supported by Azure SDK,
+ * but we can implement renaming by copying the file to the new location and
deleting the old one.
+ * So renaming a file is possible
+ *
+ * @param newfile the new file
+ * @return
+ */
public boolean canRenameTo(FileObject newfile) {
- throw new UnsupportedOperationException();
+ return true;
}
@Override
protected void doDelete() throws Exception {
- if (container == null) {
+ DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+ DataLakeFileClient fileClient =
fileSystemClient.getFileClient(currentFilePath.substring(1));
+ if (fileClient == null) {
throw new UnsupportedOperationException();
} else {
FileObject parent = getParent();
boolean lastFile = ((AzureFileObject) parent).doListChildren().length ==
1;
try {
- if (containerPath.equals("")) {
- container.delete();
+ if (currentFilePath.equals("")) {
+ fileClient.delete();
} else {
- if (cloudBlob != null) cloudBlob.delete();
- else if (cloudDir != null) {
- for (ListBlobItem item :
- container.listBlobs(((AzureFileName)
getName()).getPathAfterContainer(), true)) {
- String path = item.getUri().getPath();
- if (item instanceof CloudBlob &&
path.startsWith(getName().getPath())) {
- ((CloudBlob) item).delete();
- }
- }
+ if (StringUtils.isNotEmpty(currentFilePath) && fileClient.exists()) {
+ fileClient.delete();
+ } else if (dirPathItem != null) {
+ ListPathsOptions lpo = new ListPathsOptions();
+ lpo.setPath(((AzureFileName) getName()).getPathAfterContainer());
+
+ fileSystemClient
+ .listPaths(lpo, null)
+ .forEach(
+ pi -> {
+ if (!pi.isDirectory()
+ &&
getFilePath(pi.getName()).startsWith(getName().getPath())) {
+ DataLakeFileClient dataLakeFileClient =
+ fileSystemClient.getFileClient(pathItem.getName());
+ dataLakeFileClient.delete();
+ }
+ });
} else {
throw new UnsupportedOperationException();
}
@@ -283,46 +313,31 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
@Override
protected void doRename(FileObject newfile) throws Exception {
- if (cloudBlob != null) {
- // Get the new blob reference
- CloudBlobContainer newContainer =
- service.getContainerReference(((AzureFileName)
newfile.getName()).getContainer());
- CloudBlob newBlob =
- newContainer.getBlobReferenceFromServer(
- ((AzureFileName)
newfile.getName()).getPathAfterContainer().substring(1));
+ if (!StringUtils.isEmpty(currentFilePath)) {
+ DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+ DataLakeFileClient fileClient =
fileSystemClient.getFileClient(currentFilePath.substring(1));
+ // Get the new blob reference
+ // CloudBlobContainer newContainer =
+ // service.getContainerReference(((AzureFileName)
newfile.getName()).getContainer());
+ // CloudBlob newBlob =
+ // newContainer.getBlobReferenceFromServer(
+ // ((AzureFileName)
newfile.getName()).getPathAfterContainer().substring(1));
// Start the copy operation
- newBlob.startCopy(cloudBlob.getUri());
+ fileClient.rename(
+ containerName, ((AzureFileName)
newfile.getName()).getPathAfterContainer().substring(1));
+ // newBlob.startCopy(cloudBlob.getUri());
// Delete the original blob
- doDelete();
+ // doDelete();
} else {
throw new FileSystemException("Renaming of directories not supported on
this file.");
}
}
@Override
- protected void doCreateFolder() throws StorageException, URISyntaxException,
IOException {
- if (container == null) {
- throw new UnsupportedOperationException();
- } else if (containerPath.equals("")) {
- container.create();
- type = FileType.FOLDER;
- children = new ArrayList<>();
- } else {
- /*
- * Azure doesn't actually have folders, so we create a temporary
- * 'file' in the 'folder'
- */
- CloudBlockBlob blob =
- container.getBlockBlobReference(containerPath.substring(1) + "/" +
markerFileName);
- byte[] buf =
- ("This is a temporary blob created by a Commons VFS application to
simulate a folder. It "
- + "may be safely deleted, but this will hide the folder in
the application if it is empty.")
- .getBytes(StandardCharsets.UTF_8);
- blob.uploadFromByteArray(buf, 0, buf.length);
- type = FileType.FOLDER;
- children = new ArrayList<>();
- }
+ protected void doCreateFolder() {
+ // create a folder, we already know the path
+
service.getFileSystemClient(containerName).createDirectory(currentFilePath.substring(1));
}
@Override
@@ -332,13 +347,17 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
@Override
protected OutputStream doGetOutputStream(boolean bAppend) throws Exception {
- if (container != null && !containerPath.equals("")) {
- if (bAppend) throw new UnsupportedOperationException();
- final CloudBlockBlob cbb =
container.getBlockBlobReference(removeLeadingSlash(containerPath));
+ if (StringUtils.isEmpty(currentFilePath)) {
+ throw new UnsupportedOperationException();
+ }
+ DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+ DataLakeFileClient dataLakeFileClient =
fileSystemClient.getFileClient(currentFilePath);
+ if (dataLakeFileClient != null) {
+ if (bAppend) {
+ throw new UnsupportedOperationException();
+ }
type = FileType.FILE;
- blobOutputStream =
-
container.getBlockBlobReference(removeLeadingSlash(containerPath)).openOutputStream();
- return new BlockBlobOutputStream(cbb, blobOutputStream);
+ return new BlockBlobOutputStream(dataLakeFileClient.getOutputStream());
} else {
throw new UnsupportedOperationException();
}
@@ -346,8 +365,13 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
@Override
protected InputStream doGetInputStream() throws Exception {
- if (container != null && !containerPath.equals("") && type ==
FileType.FILE) {
- return new BlobInputStream(cloudBlob.openInputStream(), size);
+ if (!currentFilePath.equals("") && type == FileType.FILE) {
+ DataLakeFileSystemClient fileSystemClient =
service.getFileSystemClient(containerName);
+ DataLakeFileClient fileClient =
fileSystemClient.getFileClient(currentFilePath);
+ if (!fileSystemClient.exists() || !fileClient.exists()) {
+ throw new FileSystemException("File not found: " + currentFilePath);
+ }
+ return new BlobInputStream(fileClient.openInputStream(), size);
} else {
throw new UnsupportedOperationException();
}
@@ -360,7 +384,9 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
@Override
protected String[] doListChildren() throws Exception {
- return children == null ? null : children.toArray(new String[0]);
+ return children == null
+ ? ArrayUtils.toStringArray(getChildren())
+ : children.toArray(new String[0]);
}
@Override
@@ -373,6 +399,20 @@ public class AzureFileObject extends
AbstractFileObject<AzureFileSystem> {
return size;
}
+ @Override
+ public boolean delete() throws FileSystemException {
+ if (dataLakeFileClient.exists()) {
+ try {
+ doDelete();
+ return true;
+ } catch (Exception e) {
+ return false;
+ // TODO log an error
+ }
+ }
+ return false;
+ }
+
private static String removeTrailingSlash(String itemPath) {
while (itemPath.endsWith("/")) itemPath = itemPath.substring(0,
itemPath.length() - 1);
return itemPath;
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
index 433d81499f..1a44ca5736 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileProvider.java
@@ -18,13 +18,13 @@
package org.apache.hop.vfs.azure;
-import com.microsoft.azure.storage.CloudStorageAccount;
-import com.microsoft.azure.storage.blob.CloudBlobClient;
-import java.net.URISyntaxException;
-import java.security.InvalidKeyException;
+import com.azure.storage.common.StorageSharedKeyCredential;
+import com.azure.storage.file.datalake.DataLakeServiceClient;
+import com.azure.storage.file.datalake.DataLakeServiceClientBuilder;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Locale;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -38,6 +38,7 @@ import org.apache.commons.vfs2.UserAuthenticationData;
import org.apache.commons.vfs2.provider.AbstractOriginatingFileProvider;
import org.apache.commons.vfs2.util.UserAuthenticatorUtils;
import org.apache.hop.core.encryption.Encr;
+import org.apache.hop.core.util.Utils;
import org.apache.hop.core.variables.IVariables;
import org.apache.hop.core.variables.Variables;
import org.apache.hop.vfs.azure.config.AzureConfig;
@@ -97,18 +98,19 @@ public class AzureFileProvider extends
AbstractOriginatingFileProvider {
FileSystemOptions fsOptions =
fileSystemOptions != null ? fileSystemOptions :
getDefaultFileSystemOptions();
- UserAuthenticationData authData = null;
- CloudBlobClient service;
+ UserAuthenticationData authData = null;
+ AzureFileSystem azureFileSystem;
String account;
String key;
- String url;
+ String endpoint;
try {
authData = UserAuthenticatorUtils.authenticate(fsOptions,
AUTHENTICATOR_TYPES);
logger.info("Initialize Azure client");
+ AzureFileName azureRootName = (AzureFileName) fileName;
if (azureMetadataType != null) {
if (StringUtils.isEmpty(azureMetadataType.getStorageAccountName())) {
@@ -128,8 +130,10 @@ public class AzureFileProvider extends
AbstractOriginatingFileProvider {
key =
Encr.decryptPasswordOptionallyEncrypted(
variables.resolve(azureMetadataType.getStorageAccountKey()));
- url = variables.resolve(azureMetadataType.getStorageAccountEndpoint());
-
+ endpoint =
+ (!Utils.isEmpty(azureMetadataType.getStorageAccountEndpoint()))
+ ?
variables.resolve(azureMetadataType.getStorageAccountEndpoint())
+ : String.format(Locale.ROOT,
"https://%s.dfs.core.windows.net", account);
} else {
AzureConfig config = AzureConfigSingleton.getConfig();
@@ -145,29 +149,34 @@ public class AzureFileProvider extends
AbstractOriginatingFileProvider {
IVariables newVariables = Variables.getADefaultVariableSpace();
account = newVariables.resolve(config.getAccount());
key =
Encr.decryptPasswordOptionallyEncrypted(newVariables.resolve(config.getKey()));
- url = newVariables.resolve(config.getEmulatorUrl());
+ endpoint =
+ (!Utils.isEmpty(config.getEmulatorUrl()))
+ ? newVariables.resolve(config.getEmulatorUrl())
+ : String.format(Locale.ROOT,
"https://%s.dfs.core.windows.net", account);
}
- String storageConnectionString =
- StringUtils.isBlank(url)
- ? String.format(
-
"DefaultEndpointsProtocol=https;AccountName=%s;AccountKey=%s;EndpointSuffix=%s",
- account, key, AZURE_ENDPOINT_SUFFIX)
- : String.format(
-
"AccountName=%s;AccountKey=%s;DefaultEndpointsProtocol=http;BlobEndpoint=%s/%s",
- account, key, url, account);
- CloudStorageAccount storageAccount =
CloudStorageAccount.parse(storageConnectionString);
- service = storageAccount.createCloudBlobClient();
-
- } catch (InvalidKeyException e) {
- throw new FileSystemException(e.getMessage(), e);
- } catch (URISyntaxException e) {
- throw new FileSystemException(e.getMessage(), e);
+ StorageSharedKeyCredential storageCreds = new
StorageSharedKeyCredential(account, key);
+
+ DataLakeServiceClient serviceClient =
+ new DataLakeServiceClientBuilder()
+ .endpoint(endpoint)
+ .credential(storageCreds)
+ // .httpClient((HttpClient) client)
+ .buildClient();
+
+ azureFileSystem =
+ new AzureFileSystem(
+ azureRootName,
+ serviceClient,
+ ((AzureFileName) fileName).getContainer(),
+ fileSystemOptions,
+ account);
+
} finally {
UserAuthenticatorUtils.cleanup(authData);
}
- return new AzureFileSystem((AzureFileName) fileName, service, fsOptions,
account);
+ return azureFileSystem;
}
@Override
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
index 8041fdeb55..49d6662f01 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/AzureFileSystem.java
@@ -18,7 +18,7 @@
package org.apache.hop.vfs.azure;
-import com.microsoft.azure.storage.blob.CloudBlobClient;
+import com.azure.storage.file.datalake.DataLakeServiceClient;
import java.util.Collection;
import org.apache.commons.vfs2.Capability;
import org.apache.commons.vfs2.FileObject;
@@ -29,18 +29,20 @@ import org.apache.commons.vfs2.provider.AbstractFileSystem;
public class AzureFileSystem extends AbstractFileSystem {
- private final CloudBlobClient client;
-
+ private final DataLakeServiceClient serviceClient;
+ private final String fsName;
private final String account;
public AzureFileSystem(
AzureFileName fileName,
- CloudBlobClient service,
+ DataLakeServiceClient serviceClient,
+ String fsName,
FileSystemOptions fileSystemOptions,
String account)
throws FileSystemException {
super(fileName, null, fileSystemOptions);
- this.client = service;
+ this.serviceClient = serviceClient;
+ this.fsName = fsName;
this.account = account;
}
@@ -51,7 +53,11 @@ public class AzureFileSystem extends AbstractFileSystem {
@Override
protected FileObject createFile(AbstractFileName name) throws Exception {
- return new AzureFileObject(name, this, client);
+ return new AzureFileObject(name, this, serviceClient);
+ }
+
+ public String getFilesystemName() {
+ return fsName;
}
public String getAccount() {
diff --git
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/BlobInputStream.java
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/BlobInputStream.java
index 0b3711ef8f..be3f9331ae 100644
---
a/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/BlobInputStream.java
+++
b/plugins/tech/azure/src/main/java/org/apache/hop/vfs/azure/BlobInputStream.java
@@ -18,18 +18,18 @@
package org.apache.hop.vfs.azure;
+import
com.azure.storage.file.datalake.models.DataLakeFileOpenInputStreamResult;
import java.io.IOException;
import java.io.InputStream;
public class BlobInputStream extends InputStream {
- private com.microsoft.azure.storage.blob.BlobInputStream inputStream;
+ private InputStream inputStream;
private long fileSize;
private long totalRead = 0;
- public BlobInputStream(
- com.microsoft.azure.storage.blob.BlobInputStream inputStream, long
fileSize) {
- this.inputStream = inputStream;
+ public BlobInputStream(DataLakeFileOpenInputStreamResult inputStream, long
fileSize) {
+ this.inputStream = inputStream.getInputStream();
this.fileSize = fileSize;
}
diff --git
a/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureFileNameParserTest.java
b/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureFileNameParserTest.java
index b840ad15be..169b0c2f6f 100644
---
a/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureFileNameParserTest.java
+++
b/plugins/tech/azure/src/test/java/org/apache/hop/vfs/azure/AzureFileNameParserTest.java
@@ -19,36 +19,25 @@ package org.apache.hop.vfs.azure;
import static org.junit.Assert.assertEquals;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.Arrays;
-import java.util.Collection;
+import java.util.stream.Stream;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.provider.VfsComponentContext;
import org.apache.hop.vfs.azure.config.AzureConfig;
import org.apache.hop.vfs.azure.config.AzureConfigSingleton;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.MockedStatic;
import org.mockito.Mockito;
-@RunWith(Parameterized.class)
public class AzureFileNameParserTest {
private AzureFileNameParser parser;
- private final String inputUri;
- private final String expectedScheme;
- private final String expectedContainer;
- private final String expectedPathAfterContainer;
-
- private final FileType expectedType;
-
- @BeforeClass
+ @BeforeAll
public static void init() {
AzureConfig azureConfig = new AzureConfig();
azureConfig.setAccount("hopsa");
@@ -58,67 +47,79 @@ public class AzureFileNameParserTest {
azureConfigSingleton.when(AzureConfigSingleton::getConfig).thenReturn(azureConfig);
}
- @Before
+ @BeforeEach
public void setup() {
parser = new AzureFileNameParser();
}
- public AzureFileNameParserTest(
- String inputUri,
- String expectedScheme,
- String expectedContainer,
- String expectedPathAfterContainer,
- FileType expectedType) {
- this.inputUri = inputUri;
- this.expectedScheme = expectedScheme;
- this.expectedContainer = expectedContainer;
- this.expectedPathAfterContainer = expectedPathAfterContainer;
- this.expectedType = expectedType;
- }
-
- @Parameterized.Parameters
- public static Collection azureUris() {
- return Arrays.asList(
- new Object[][] {
- {
+ // public AzureFileNameParserTest(
+ // String inputUri,
+ // String expectedScheme,
+ // String expectedContainer,
+ // String expectedPathAfterContainer,
+ // FileType expectedType) {
+ // this.inputUri = inputUri;
+ // this.expectedScheme = expectedScheme;
+ // this.expectedContainer = expectedContainer;
+ // this.expectedPathAfterContainer = expectedPathAfterContainer;
+ // this.expectedType = expectedType;
+ // }
+
+ public static Stream<Arguments> azureUris() {
+ return Stream.of(
+ Arguments.of(
"azfs://hopsa/container/folder1/parquet-test-delo2-azfs-00-0001.parquet",
"azfs",
"container",
"/folder1/parquet-test-delo2-azfs-00-0001.parquet",
- FileType.FILE
- },
- {"azfs:/hopsa/container/folder1/", "azfs", "container", "/folder1",
FileType.FOLDER},
- {"azure://test/folder1/", "azure", "test", "/folder1",
FileType.FOLDER},
- {
+ FileType.FILE),
+ Arguments.of(
+ "azfs:/hopsa/container/folder1/", "azfs", "container", "/folder1",
FileType.FOLDER),
+ Arguments.of("azure://test/folder1/", "azure", "test", "/folder1",
FileType.FOLDER),
+ Arguments.of(
"azure://mycontainer/folder1/parquet-test-delo2-azfs-00-0001.parquet",
"azure",
"mycontainer",
"/folder1/parquet-test-delo2-azfs-00-0001.parquet",
- FileType.FILE
- },
- {
+ FileType.FILE),
+ Arguments.of(
"azfs://hopsa/delo/delo3-azfs-00-0001.parquet",
"azfs",
"delo",
"/delo3-azfs-00-0001.parquet",
- FileType.FILE
- },
- {"azfs://hopsa/container/folder1/", "azfs", "container", "/folder1",
FileType.FOLDER},
- {"azfs://container/", "azfs", "container", "", FileType.FOLDER},
- {"azfs://container/myfile.txt", "azfs", "container", "/myfile.txt",
FileType.FILE},
- {"azfs:///container/myfile.txt", "azfs", "container", "/myfile.txt",
FileType.FILE},
- {
- "azfs:///container/path/to/resource/myfile.txt",
+ FileType.FILE),
+ Arguments.of(
+ "azfs://hopsa/container/folder1/", "azfs", "container",
"/folder1", FileType.FOLDER),
+ Arguments.of("azfs://account/container/", "azfs", "container", "",
FileType.FOLDER),
+ Arguments.of(
+ "azfs://otheraccount/container/myfile.txt",
+ "azfs",
+ "container",
+ "/myfile.txt",
+ FileType.FILE),
+ Arguments.of(
+ "azfs:///account1/container/myfile.txt",
+ "azfs",
+ "container",
+ "/myfile.txt",
+ FileType.FILE),
+ Arguments.of(
+ "azfs:///fake/container/path/to/resource/myfile.txt",
"azfs",
"container",
"/path/to/resource/myfile.txt",
- FileType.FILE
- }
- });
+ FileType.FILE));
}
- @Test
- public void parseUri() throws FileSystemException {
+ @ParameterizedTest
+ @MethodSource("azureUris")
+ void parseUri(
+ String inputUri,
+ String expectedScheme,
+ String expectedContainer,
+ String expectedPathAfterContainer,
+ FileType expectedType)
+ throws FileSystemException {
VfsComponentContext context = Mockito.mock(VfsComponentContext.class);
AzureFileName actual = (AzureFileName) parser.parseUri(context, null,
inputUri);
@@ -134,17 +135,4 @@ public class AzureFileNameParserTest {
assertEquals(expectedPathAfterContainer, actual.getPathAfterContainer());
assertEquals(expectedType, actual.getType());
}
-
- // @Test
- public void print() throws URISyntaxException {
- System.out.println("--------------------------");
- URI uri = new URI(inputUri);
- System.out.println(inputUri);
- System.out.println("Scheme: " + uri.getScheme());
- System.out.println("Host: " + uri.getHost());
- System.out.println("Authority: " + uri.getAuthority());
- System.out.println("Path: " + uri.getPath());
- System.out.println("RawSchemeSpecificPart: " +
uri.getRawSchemeSpecificPart());
- System.out.println("--------------------------");
- }
}