This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ee59aaf ARROW-4928: [Python] Fix Hypothesis test failures
ee59aaf is described below
commit ee59aaf011ae9b35d536bda67d4bec865d5fb6cd
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Mar 19 08:57:48 2019 +0100
ARROW-4928: [Python] Fix Hypothesis test failures
Author: Antoine Pitrou <[email protected]>
Closes #3966 from pitrou/ARROW-4928-hypothesis-test-failures and squashes
the following commits:
2b90e8b7 <Antoine Pitrou> ARROW-4928: Fix Hypothesis test failures
---
cpp/src/arrow/pretty_print-test.cc | 8 ++++----
cpp/src/arrow/util/string.h | 6 +++---
python/pyarrow/tests/conftest.py | 2 +-
python/pyarrow/tests/strategies.py | 11 ++++++++---
python/pyarrow/tests/test_array.py | 14 ++++++++++++++
5 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/cpp/src/arrow/pretty_print-test.cc
b/cpp/src/arrow/pretty_print-test.cc
index 4637737..d914a75 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -215,12 +215,12 @@ TEST_F(TestPrettyPrint, StructTypeAdvanced) {
}
TEST_F(TestPrettyPrint, BinaryType) {
- std::vector<bool> is_valid = {true, true, false, true, false};
- std::vector<std::string> values = {"foo", "bar", "", "baz", ""};
- static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A,\n
null\n]";
+ std::vector<bool> is_valid = {true, true, false, true, true, true};
+ std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
+ static const char* ex = "[\n 666F6F,\n 626172,\n null,\n 62617A,\n ,\n
FF\n]";
CheckPrimitive<BinaryType, std::string>({0}, is_valid, values, ex);
static const char* ex_in2 =
- " [\n 666F6F,\n 626172,\n null,\n 62617A,\n null\n ]";
+ " [\n 666F6F,\n 626172,\n null,\n 62617A,\n ,\n FF\n
]";
CheckPrimitive<BinaryType, std::string>({2}, is_valid, values, ex_in2);
}
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index e4dbcf7..1d716c5 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -28,7 +28,7 @@ namespace arrow {
static const char* kAsciiTable = "0123456789ABCDEF";
-static inline std::string HexEncode(const char* data, size_t length) {
+static inline std::string HexEncode(const uint8_t* data, size_t length) {
std::string hex_string;
hex_string.reserve(length * 2);
for (size_t j = 0; j < length; ++j) {
@@ -39,8 +39,8 @@ static inline std::string HexEncode(const char* data, size_t
length) {
return hex_string;
}
-static inline std::string HexEncode(const uint8_t* data, int32_t length) {
- return HexEncode(reinterpret_cast<const char*>(data), length);
+static inline std::string HexEncode(const char* data, size_t length) {
+ return HexEncode(reinterpret_cast<const uint8_t*>(data), length);
}
static inline std::string HexEncode(util::string_view str) {
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index daaba59..b02fd3e 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -34,7 +34,7 @@ h.settings.register_profile('debug', max_examples=10,
# load default hypothesis profile, either set HYPOTHESIS_PROFILE environment
# variable or pass --hypothesis-profile option to pytest, to see the generated
# examples try: pytest pyarrow -sv --only-hypothesis --hypothesis-profile=debug
-h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 'default'))
+h.settings.load_profile(os.environ.get('HYPOTHESIS_PROFILE', 'dev'))
groups = [
diff --git a/python/pyarrow/tests/strategies.py
b/python/pyarrow/tests/strategies.py
index 2d81ddd..514c5ad 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -60,8 +60,8 @@ floating_types = st.sampled_from([
])
decimal_type = st.builds(
pa.decimal128,
- precision=st.integers(min_value=0, max_value=38),
- scale=st.integers(min_value=0, max_value=38)
+ precision=st.integers(min_value=1, max_value=38),
+ scale=st.integers(min_value=1, max_value=38)
)
numeric_types = st.one_of(integer_types, floating_types, decimal_type)
@@ -174,7 +174,12 @@ def arrays(draw, type, size=None):
if (pa.types.is_boolean(type) or pa.types.is_integer(type) or
pa.types.is_floating(type)):
values = npst.arrays(type.to_pandas_dtype(), shape=(size,))
- return pa.array(draw(values), type=type)
+ np_arr = draw(values)
+ if pa.types.is_floating(type):
+ # Workaround ARROW-4952: no easy way to assert array equality
+ # in a NaN-tolerant way.
+ np_arr[np.isnan(np_arr)] = -42.0
+ return pa.array(np_arr, type=type)
if pa.types.is_null(type):
value = st.none()
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index eabc875..cef9822 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -99,6 +99,20 @@ def test_long_array_format():
assert result == expected
+def test_binary_format():
+ arr = pa.array([b'\x00', b'', None, b'\x01foo', b'\x80\xff'])
+ result = arr.format()
+ expected = """\
+[
+ 00,
+ ,
+ null,
+ 01666F6F,
+ 80FF
+]"""
+ assert result == expected
+
+
def test_to_numpy_zero_copy():
arr = pa.array(range(10))
old_refcount = sys.getrefcount(arr)