This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 02e37e28c2 GH-48721: [C++] Add test for file creation with UTF-8 
filenames (#48722)
02e37e28c2 is described below

commit 02e37e28c231741fd0a8beb23efcb3b9a0792676
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Jan 21 23:33:51 2026 +0900

    GH-48721: [C++] Add test for file creation with UTF-8 filenames (#48722)
    
    ### Rationale for this change
    
    
https://github.com/apache/arrow/commit/4937d9fd2033a80a8f5ad81d25cde30195dfe620 
(ARROW-5102) added the TODO comment requesting a test with valid UTF-8 
filenames. Later, the UTF-8 to UTF-16 conversion logic on Windows was 
introduced in commit 
https://github.com/apache/arrow/commit/eb23ea952441cdc0e1046467d688445288db9742 
(ARROW-5648) which should fix the issue.
    
    Essentially we should add a test for:
    
https://github.com/apache/arrow/blob/727106f7ff65065298e1e79071fed2a408b4b4d6/cpp/src/arrow/util/io_util.cc#L143-L149
 (`StringToNative()`). This test complements existing 
`FileNameWideCharConversionRangeException` test (invalid UTF-8).
    
    ### What changes are included in this PR?
    
    This PR adds the test described above.
    
    ### Are these changes tested?
    
    Unittest was added.
    
    ### Are there any user-facing changes?
    
    No, test-only.
    * GitHub Issue: #48721
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/io/file_test.cc | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index 81ae716ef6..8970dfe7cc 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -114,10 +114,28 @@ TEST_F(TestFileOutputStream, 
FileNameWideCharConversionRangeException) {
   ASSERT_RAISES(Invalid, FileOutputStream::Open(file_name));
   ASSERT_RAISES(Invalid, ReadableFile::Open(file_name));
 }
-
-// TODO add a test with a valid utf-8 filename
 #endif
 
+TEST_F(TestFileOutputStream, FileNameValidUtf8) {
+  // Test that file operations work with UTF-8 filenames (Korean + emoji).
+  // On Windows, PlatformFilename::FromString() converts UTF-8 strings to wide 
strings.
+  // On Unix, filenames are treated as opaque byte strings.
+  std::string utf8_file_name = "test_file_한국어_😀.txt";
+  std::string utf8_path = TempFile(utf8_file_name);
+
+  ASSERT_OK_AND_ASSIGN(auto file, FileOutputStream::Open(utf8_path));
+  const char* data = "test content";
+  ASSERT_OK(file->Write(data, strlen(data)));
+  ASSERT_OK(file->Close());
+
+  // Verify we can read it back
+  ASSERT_OK_AND_ASSIGN(auto readable_file, ReadableFile::Open(utf8_path));
+  ASSERT_OK_AND_ASSIGN(auto buffer, readable_file->ReadAt(0, strlen(data)));
+  ASSERT_EQ(std::string(reinterpret_cast<const char*>(buffer->data()), 
buffer->size()),
+            std::string(data));
+  ASSERT_OK(readable_file->Close());
+}
+
 TEST_F(TestFileOutputStream, DestructorClosesFile) {
   int fd_file;
 

Reply via email to