This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new f2d2a1197 ORC-1684: [C++] Find tzdb without TZDIR when in
conda-environments
f2d2a1197 is described below
commit f2d2a1197338f50438f67cffad6f6a649c49220a
Author: H. Vetinari <[email protected]>
AuthorDate: Fri Apr 12 12:58:20 2024 +0800
ORC-1684: [C++] Find tzdb without TZDIR when in conda-environments
### What changes were proposed in this pull request?
Find tzdb without having to set `TZDIR` when in a conda-environment (where
`tzdata`
[has](https://conda-metadata-app.streamlit.app/?q=conda-forge%2Fnoarch%2Ftzdata-2024a-h0c530f3_0.conda)
a uniform location of `$CONDA_PREFIX/share/zoneinfo` across all platforms).
### Why are the changes needed?
This is due to issues in arrow (see
https://github.com/apache/arrow/issues/36026) that cannot really be fixed
there, as it assumes that orc >=2.0 knows how to find the tzdb. Having to set
`TZDIR` in all user environments is an intrusive change that should be avoided,
and since the cost here is checking a single environment variable, it's
hopefully not too onerous for consideration.
### How was this patch tested?
CI here
### Was this patch authored or co-authored using generative AI tooling?
No
CC wgtmac
See also: #1587
Closes #1882 from h-vetinari/tzdb.
Authored-by: H. Vetinari <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
(cherry picked from commit e89ca3359b791f377717ea220df76288077f50f4)
Signed-off-by: Gang Wu <[email protected]>
---
c++/src/Timezone.cc | 12 ++++++++++--
c++/test/TestTimezone.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc
index 8b8ac69b1..ebaf222fc 100644
--- a/c++/src/Timezone.cc
+++ b/c++/src/Timezone.cc
@@ -655,10 +655,18 @@ namespace orc {
epoch = utcEpoch - getVariant(utcEpoch).gmtOffset;
}
- const char* getTimezoneDirectory() {
+ std::string getTimezoneDirectory() {
const char* dir = getenv("TZDIR");
if (!dir) {
- dir = DEFAULT_TZDIR;
+ // this is present if we're in an activated conda environment
+ const char* condaPrefix = getenv("CONDA_PREFIX");
+ if (condaPrefix) {
+ std::string condaDir(condaPrefix);
+ condaDir += "/share/zoneinfo";
+ return condaDir;
+ } else {
+ dir = DEFAULT_TZDIR;
+ }
}
return dir;
}
diff --git a/c++/test/TestTimezone.cc b/c++/test/TestTimezone.cc
index 2330fcfb0..727d6a276 100644
--- a/c++/test/TestTimezone.cc
+++ b/c++/test/TestTimezone.cc
@@ -21,6 +21,7 @@
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"
+#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <vector>
@@ -421,8 +422,12 @@ namespace orc {
}
TEST(TestTimezone, testMissingTZDB) {
- const char* tzDirBackup = std::getenv("TZDIR");
- if (tzDirBackup != nullptr) {
+ const char* tzDir = std::getenv("TZDIR");
+ std::string tzDirBackup;
+ if (tzDir != nullptr) {
+ // std::string creates a deepcopy of buffer, which avoids that
+ // unsetting environment variable wrecks pointer to tzDir
+ tzDirBackup = tzDir;
ASSERT_TRUE(delEnv("TZDIR"));
}
ASSERT_TRUE(setEnv("TZDIR", "/path/to/wrong/tzdb"));
@@ -430,11 +435,48 @@ namespace orc {
testing::ThrowsMessage<TimezoneError>(testing::HasSubstr(
"Time zone file /path/to/wrong/tzdb/America/Los_Angeles
does not exist."
" Please install IANA time zone database and set TZDIR
env.")));
- if (tzDirBackup != nullptr) {
- ASSERT_TRUE(setEnv("TZDIR", tzDirBackup));
+ if (!tzDirBackup.empty()) {
+ ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
} else {
ASSERT_TRUE(delEnv("TZDIR"));
}
}
+ TEST(TestTimezone, testTzdbFromCondaEnv) {
+ const char* tzDir = std::getenv("TZDIR");
+ // test only makes sense if TZDIR exists
+ if (tzDir != nullptr) {
+ std::string tzDirBackup = tzDir;
+ ASSERT_TRUE(delEnv("TZDIR"));
+
+ // remove "/share/zoneinfo" from TZDIR (as set through TZDATA_DIR in CI)
to
+ // get the equivalent of CONDA_PREFIX, relative to the location of the
tzdb
+ std::string condaPrefix(tzDirBackup);
+ condaPrefix += "/../..";
+ ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+ // small test sample to ensure tzbd loads with CONDA_PREFIX, even
without TZDIR
+ const Timezone* zrh = &getTimezoneByName("Europe/Zurich");
+ EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-03-31 00:59:59"));
+ EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-03-31 01:00:00"));
+ EXPECT_EQ("CEST", getVariantFromZone(*zrh, "2024-10-27 00:59:59"));
+ EXPECT_EQ("CET", getVariantFromZone(*zrh, "2024-10-27 01:00:00"));
+
+ // CONDA_PREFIX contains backslashes on windows; test that this doesn't
blow up
+ std::replace(condaPrefix.begin(), condaPrefix.end(), '/', '\\');
+ ASSERT_TRUE(setEnv("CONDA_PREFIX", condaPrefix.c_str()));
+
+ // as above, but different timezone to avoid hitting cache
+ const Timezone* syd = &getTimezoneByName("Australia/Sydney");
+ EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-04-06 15:59:59"));
+ EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-04-06 16:00:00"));
+ EXPECT_EQ("AEST", getVariantFromZone(*syd, "2024-10-05 15:59:59"));
+ EXPECT_EQ("AEDT", getVariantFromZone(*syd, "2024-10-05 16:00:00"));
+
+ // restore state of environment variables
+ ASSERT_TRUE(delEnv("CONDA_PREFIX"));
+ ASSERT_TRUE(setEnv("TZDIR", tzDirBackup.c_str()));
+ }
+ }
+
} // namespace orc