rok commented on a change in pull request #10457:
URL: https://github.com/apache/arrow/pull/10457#discussion_r651290261



##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
##########
@@ -143,39 +142,202 @@ TEST(ScalarTemporalTest, 
TestTemporalComponentExtractionWithDifferentUnits) {
     CheckScalarUnary("quarter", unit, times, int64(), quarter);
     CheckScalarUnary("hour", unit, times, int64(), hour);
     CheckScalarUnary("minute", unit, times, int64(), minute);
-    CheckScalarUnary("second", unit, times, float64(), second);
+    CheckScalarUnary("second", unit, times, int64(), second);
     CheckScalarUnary("millisecond", unit, times, int64(), zeros);
     CheckScalarUnary("microsecond", unit, times, int64(), zeros);
     CheckScalarUnary("nanosecond", unit, times, int64(), zeros);
     CheckScalarUnary("subsecond", unit, times, float64(), zeros);
   }
 }
 
-TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) {
-  std::string timezone = "Asia/Kolkata";
-  const char* times = R"(["1970-01-01T00:00:59", null])";
+TEST(ScalarTemporalTest, TestZoned1) {
+  const char* times =
+      R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
+          "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
+          "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+          "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+          "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";
+  auto unit = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+  auto year =
+      "[1969, 2000, 1898, 2033, 2019, 2019, 2019, 2009, 2009, 2010, 2010, 
2005, 2005, "
+      "2008, 2008, 2011, null]";
+  auto month = "[12, 2, 12, 5, 12, 12, 12, 12, 12, 1, 1, 12, 12, 12, 12, 12, 
null]";
+  auto day = "[31, 29, 31, 17, 31, 30, 29, 30, 31, 2, 3, 31, 31, 27, 28, 31, 
null]";
+  auto day_of_week = "[2, 1, 5, 1, 1, 0, 6, 2, 3, 5, 6, 5, 5, 5, 6, 5, null]";
+  auto day_of_year =
+      "[365, 60, 365, 137, 365, 364, 363, 364, 365, 2, 3, 365, 365, 362, 363, 
365, null]";
+  auto iso_year =
+      "[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 
2005, 2005, "
+      "2008, 2008, 2011, null]";
+  auto iso_week = "[1, 9, 52, 20, 1, 1, 52, 53, 53, 53, 53, 52, 52, 52, 52, 
52, null]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 
3},
+                        {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 
2},
+                        {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 
6},
+                        {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 
2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 
2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 
1},
+                        {"iso_year": 2019, "iso_week": 52, "iso_day_of_week": 
7},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
3},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
4},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
6},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
7},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 
6},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 
6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 
6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 
7},
+                        {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 
6}, null])");
+  auto quarter = "[4, 1, 4, 2, 4, 4, 4, 4, 4, 1, 1, 4, 4, 4, 4, 4, null]";
+  auto hour = "[14, 13, 15, 18, 15, 16, 17, 18, 19, 21, 22, 23, 0, 14, 14, 15, 
null]";
+  auto minute = "[30, 53, 41, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, 
null]";
+  auto second = "[59, 23, 20, 20, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3, 
null]";
+  auto millisecond = "[123, 999, 1, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 
null]";
+  auto microsecond = "[456, 999, 1, 0, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 
0, null]";
+  auto nanosecond = "[789, 999, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
null]";
+  auto subsecond =
+      "[0.123456789, 0.999999999, 0.001001001, 0.0, 0.001, 0.002, 0.003, 
0.004132, "
+      "0.005321, 0.006163, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, null]";
 
-  for (auto u : internal::AllTimeUnits()) {
-    auto unit = timestamp(u, timezone);
-    auto timestamps = ArrayFromJSON(unit, times);
-
-    ASSERT_RAISES(Invalid, Year(timestamps));
-    ASSERT_RAISES(Invalid, Month(timestamps));
-    ASSERT_RAISES(Invalid, Day(timestamps));
-    ASSERT_RAISES(Invalid, DayOfWeek(timestamps));
-    ASSERT_RAISES(Invalid, DayOfYear(timestamps));
-    ASSERT_RAISES(Invalid, ISOYear(timestamps));
-    ASSERT_RAISES(Invalid, ISOWeek(timestamps));
-    ASSERT_RAISES(Invalid, ISOCalendar(timestamps));
-    ASSERT_RAISES(Invalid, Quarter(timestamps));
-    ASSERT_RAISES(Invalid, Hour(timestamps));
-    ASSERT_RAISES(Invalid, Minute(timestamps));
-    ASSERT_RAISES(Invalid, Second(timestamps));
-    ASSERT_RAISES(Invalid, Millisecond(timestamps));
-    ASSERT_RAISES(Invalid, Microsecond(timestamps));
-    ASSERT_RAISES(Invalid, Nanosecond(timestamps));
-    ASSERT_RAISES(Invalid, Subsecond(timestamps));
-  }
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+TEST(ScalarTemporalTest, TestZoned2) {
+  const char* times =
+      R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
+          "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
+          "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+          "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+          "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+          "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+          "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";
+  auto unit = timestamp(TimeUnit::NANO, "Australia/Broken_Hill");
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+  auto year =
+      "[1970, 2000, 1899, 2033, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 
2006, 2005, "
+      "2008, 2008, 2012, null]";
+  auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]";
+  auto day = "[1, 1, 1, 18, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1, null]";
+  auto day_of_week = "[3, 2, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]";
+  auto day_of_year =
+      "[1, 61, 1, 138, 1, 365, 364, 365, 1, 3, 4, 1, 365, 363, 364, 1, null]";
+  auto iso_year =
+      "[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 
2005, 2005, "
+      "2008, 2009, 2011, null]";
+  auto iso_week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, 
null]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 
4},
+                        {"iso_year": 2000, "iso_week": 9, "iso_day_of_week": 
3},
+                        {"iso_year": 1898, "iso_week": 52, "iso_day_of_week": 
7},
+                        {"iso_year": 2033, "iso_week": 20, "iso_day_of_week": 
3},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 
3},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 
2},
+                        {"iso_year": 2020, "iso_week": 1, "iso_day_of_week": 
1},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
4},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
5},
+                        {"iso_year": 2009, "iso_week": 53, "iso_day_of_week": 
7},
+                        {"iso_year": 2010, "iso_week": 1, "iso_day_of_week": 
1},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 
7},
+                        {"iso_year": 2005, "iso_week": 52, "iso_day_of_week": 
6},
+                        {"iso_year": 2008, "iso_week": 52, "iso_day_of_week": 
7},
+                        {"iso_year": 2009, "iso_week": 1, "iso_day_of_week": 
1},
+                        {"iso_year": 2011, "iso_week": 52, "iso_day_of_week": 
7}, null])");
+  auto quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]";
+  auto hour = "[9, 9, 9, 13, 11, 12, 13, 14, 15, 17, 18, 19, 20, 10, 10, 11, 
null]";
+  auto minute = "[30, 53, 59, 3, 35, 40, 45, 50, 55, 0, 5, 10, 15, 30, 30, 32, 
null]";
+  auto second = "[59, 23, 20, 20, 5, 10, 15, 20, 25, 30, 35, 40, 45, 0, 0, 3, 
null]";
+  auto millisecond = "[123, 999, 1, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 
null]";
+  auto microsecond = "[456, 999, 1, 0, 0, 0, 0, 132, 321, 163, 0, 0, 0, 0, 0, 
0, null]";
+  auto nanosecond = "[789, 999, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
null]";
+  auto subsecond =
+      "[0.123456789, 0.999999999, 0.001001001, 0.0, 0.001, 0.002, 0.003, 
0.004132, "
+      "0.005321, 0.006163, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, null]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
+}
+
+TEST(ScalarTemporalTest, TestOverflow) {
+  const char* times = R"(["1677-09-20T00:00:59.123456789", 
"2262-04-13T23:23:23.999999999"])";
+
+  auto unit = timestamp(TimeUnit::NANO);
+  auto iso_calendar_type =
+      struct_({field("iso_year", int64()), field("iso_week", int64()),
+               field("iso_day_of_week", int64())});
+
+  auto year = "[2262, 1677]";
+  auto month = "[4, 9]";
+  auto day = "[10, 22]";
+  auto day_of_week = "[3, 2]";
+  auto day_of_year = "[100, 265]";
+  auto iso_year = "[2262, 1677]";
+  auto iso_week = "[15, 38]";
+  auto iso_calendar =
+      ArrayFromJSON(iso_calendar_type,
+                    R"([{"iso_year": 2262, "iso_week": 15, "iso_day_of_week": 
4},
+                        {"iso_year": 1677, "iso_week": 38, "iso_day_of_week": 
3}])");
+  auto quarter = "[2, 3]";
+  auto hour = "[23, 23]";
+  auto minute = "[35, 48]";
+  auto second = "[32, 50]";
+  auto millisecond = "[833, 290]";
+  auto microsecond = "[8, 448]";
+  auto nanosecond = "[405, 383]";
+  auto subsecond = "[0.833008405, 0.290448383]";
+
+  CheckScalarUnary("year", unit, times, int64(), year);
+  CheckScalarUnary("month", unit, times, int64(), month);
+  CheckScalarUnary("day", unit, times, int64(), day);
+  CheckScalarUnary("day_of_week", unit, times, int64(), day_of_week);
+  CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year);
+  CheckScalarUnary("iso_year", unit, times, int64(), iso_year);
+  CheckScalarUnary("iso_week", unit, times, int64(), iso_week);
+  CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar);
+  CheckScalarUnary("quarter", unit, times, int64(), quarter);
+  CheckScalarUnary("hour", unit, times, int64(), hour);
+  CheckScalarUnary("minute", unit, times, int64(), minute);
+  CheckScalarUnary("second", unit, times, int64(), second);
+  CheckScalarUnary("millisecond", unit, times, int64(), millisecond);
+  CheckScalarUnary("microsecond", unit, times, int64(), microsecond);
+  CheckScalarUnary("nanosecond", unit, times, int64(), nanosecond);
+  CheckScalarUnary("subsecond", unit, times, float64(), subsecond);
 }

Review comment:
       It this what you had in mind 
[here](https://github.com/apache/arrow/pull/10176#issuecomment-858392898).

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -1255,6 +1255,78 @@ def test_strptime():
     assert got == expected
 
 
+def _check_datetime_components(timestamps, timezone=None):
+    import pandas as pd
+
+    if timezone:
+        ts = pd.to_datetime(timestamps).tz_localize(timezone).to_series()
+    else:
+        ts = pd.to_datetime(timestamps).to_series()
+
+    tsa = pa.array(ts)
+
+    subseconds = ((ts.dt.microsecond * 10**3 +
+                  ts.dt.nanosecond) * 10**-9).round(9)
+    iso_calendar_fields = [
+        pa.field('iso_year', pa.int64()),
+        pa.field('iso_week', pa.int64()),
+        pa.field('iso_day_of_week', pa.int64())
+    ]
+
+    iso_year = ts.dt.isocalendar()["year"].astype(int)
+    iso_week = ts.dt.isocalendar()["week"].astype(int)
+    iso_day = ts.dt.isocalendar()["day"].astype(int)
+    iso_calendar = pa.StructArray.from_arrays(
+        [iso_year, iso_week, iso_day], fields=iso_calendar_fields)
+
+    assert pc.year(tsa).equals(pa.array(ts.dt.year))
+    assert pc.month(tsa).equals(pa.array(ts.dt.month))
+    assert pc.day(tsa).equals(pa.array(ts.dt.day))
+    assert pc.day_of_week(tsa).equals(pa.array(ts.dt.day_of_week))
+    assert pc.day_of_year(tsa).equals(pa.array(ts.dt.day_of_year))
+    assert pc.iso_year(tsa).equals(pa.array(iso_year))
+    assert pc.iso_week(tsa).equals(pa.array(iso_week))
+    assert pc.iso_calendar(tsa).equals(iso_calendar)
+    assert pc.quarter(tsa).equals(pa.array(ts.dt.quarter))
+    assert pc.hour(tsa).equals(pa.array(ts.dt.hour))
+    assert pc.minute(tsa).equals(pa.array(ts.dt.minute))
+    assert pc.second(tsa).equals(pa.array(ts.dt.second.values))
+    assert pc.millisecond(tsa).equals(pa.array(ts.dt.microsecond // 10**3))
+    assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10**3))
+    assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond))
+    assert pc.subsecond(tsa).equals(pa.array(subseconds))
+
+
+@pytest.mark.pandas
+def test_extract_datetime_components():
+    # TODO: see https://github.com/pandas-dev/pandas/issues/41834
+    # "1899-01-01T00:59:20.001001001"

Review comment:
       Probably best to remove this since I believe pandas is wrong and it's 
not really our TODO.
   It's kinda hard to confirm who is right.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to