This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new e89ca3359 ORC-1684: [C++] Find tzdb without TZDIR when in 
conda-environments
e89ca3359 is described below

commit e89ca3359b791f377717ea220df76288077f50f4
Author: H. Vetinari <[email protected]>
AuthorDate: Fri Apr 12 12:58:20 2024 +0800

    ORC-1684: [C++] Find tzdb without TZDIR when in conda-environments
    
    ### What changes were proposed in this pull request?
    
    Find tzdb without having to set `TZDIR` when in a conda-environment (where 
`tzdata` 
[has](https://conda-metadata-app.streamlit.app/?q=conda-forge%2Fnoarch%2Ftzdata-2024a-h0c530f3_0.conda)
 a uniform location of `$CONDA_PREFIX/share/zoneinfo` across all platforms).
    
    ### Why are the changes needed?
    
    This is due to issues in arrow (see 
https://github.com/apache/arrow/issues/36026) that cannot really be fixed 
there, as it assumes that orc >=2.0 knows how to find the tzdb. Having to set 
`TZDIR` in all user environments is an intrusive change that should be avoided, 
and since the cost here is checking a single environment variable, it's 
hopefully not too onerous for consideration.
    
    ### How was this patch tested?
    
    CI here
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    CC wgtmac
    
    See also: #1587
    
    Closes #1882 from h-vetinari/tzdb.
    
    Authored-by: H. Vetinari <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 c++/src/Timezone.cc      | 12 ++++++++++--
 c++/test/TestTimezone.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc
index 32276a850..29db7f37c 100644
--- a/c++/src/Timezone.cc
+++ b/c++/src/Timezone.cc
@@ -655,10 +655,18 @@ namespace orc {
     epoch_ = utcEpoch - getVariant(utcEpoch).gmtOffset;
   }
 
-  const char* getTimezoneDirectory() {
+  std::string getTimezoneDirectory() {
     const char* dir = getenv("TZDIR");
     if (!dir) {
-      dir = DEFAULT_TZDIR;
+      // this is present if we're in an activated conda environment
+      const char* condaPrefix = getenv("CONDA_PREFIX");
+      if (condaPrefix) {
+        std::string condaDir(condaPrefix);
+        condaDir += "/share/zoneinfo";
+        return condaDir;
+      } else {
+        dir = DEFAULT_TZDIR;
+      }
     }
     return dir;
   }
diff --git a/c++/test/TestTimezone.cc b/c++/test/TestTimezone.cc
index 2330fcfb0..727d6a276 100644
--- a/c++/test/TestTimezone.cc
+++ b/c++/test/TestTimezone.cc
@@ -21,6 +21,7 @@
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
 
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
 #include <vector>
@@ -421,8 +422,12 @@ namespace orc {
   }
 
   TEST(TestTimezone, testMissingTZDB) {
-    const char* tzDirBackup = std::getenv("TZDIR");
-    if (tzDirBackup != nullptr) {
+    const char* tzDir = std::getenv("TZDIR");
+    std::string tzDirBackup;
+    if (tzDir != nullptr) {
+      // std::string creates a deepcopy of buffer, which avoids that
+      // unsetting environment variable wrecks pointer to tzDir
+      tzDirBackup = tzDir;
       ASSERT_TRUE(delEnv("TZDIR"));
     }
     ASSERT_TRUE(setEnv("TZDIR", "/path/to/wrong/tzdb"));
@@ -430,11 +435,48 @@ namespace orc {
                 testing::ThrowsMessage<TimezoneError>(testing::HasSubstr(
                     "Time zone file /path/to/wrong/tzdb/America/Los_Angeles 
does not exist."
                     " Please install IANA time zone database and set TZDIR 
env.")));
-    if (tzDirBackup != nullptr) {
-      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup));
+    if (!tzDirBackup.empty()) {
+      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
     } else {
       ASSERT_TRUE(delEnv("TZDIR"));
     }
   }
 
+  TEST(TestTimezone, testTzdbFromCondaEnv) {
+    const char* tzDir = std::getenv("TZDIR");
+    // test only makes sense if TZDIR exists
+    if (tzDir != nullptr) {
+      std::string tzDirBackup = tzDir;
+      ASSERT_TRUE(delEnv("TZDIR"));
+
+      // remove "/share/zoneinfo" from TZDIR (as set through TZDATA_DIR in CI) 
to
+      // get the equivalent of CONDA_PREFIX, relative to the location of the 
tzdb
+      std::string condaPrefix(tzDirBackup);
+      condaPrefix += "/../..";
+      ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+      // small test sample to ensure tzbd loads with CONDA_PREFIX, even 
without TZDIR
+      const Timezone* zrh = &getTimezoneByName("Europe/Zurich");
+      EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-03-31 00:59:59"));
+      EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-03-31 01:00:00"));
+      EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-10-27 00:59:59"));
+      EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-10-27 01:00:00"));
+
+      // CONDA_PREFIX contains backslashes on windows; test that this doesn't 
blow up
+      std::replace(condaPrefix.begin(), condaPrefix.end(), '/', '\\');
+      ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+      // as above, but different timezone to avoid hitting cache
+      const Timezone* syd = &getTimezoneByName("Australia/Sydney");
+      EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-04-06 15:59:59"));
+      EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-04-06 16:00:00"));
+      EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-10-05 15:59:59"));
+      EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-10-05 16:00:00"));
+
+      // restore state of environment variables
+      ASSERT_TRUE(delEnv("CONDA_PREFIX"));
+      ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
+    }
+  }
+
 }  // namespace orc

Reply via email to