David Li created ARROW-12668:
--------------------------------
Summary: [C++][Dataset] CountRows occasionally segfaulting
Key: ARROW-12668
URL: https://issues.apache.org/jira/browse/ARROW-12668
Project: Apache Arrow
Issue Type: Improvement
Components: C++
Reporter: David Li
Assignee: David Li
Fix For: 5.0.0
[https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312525]
{noformat}
Start test: dim() correctly determine numbers of rows and columns on
arrow_dplyr_query object
*** caught segfault ***
address 0x7ff7cf2cf8f8, cause 'invalid permissions'
Traceback:
1: dataset___Scanner__CountRows(self)
2: scanner$CountRows()
3: dim.arrow_dplyr_query(.)
4: dim(.)
5: ds %>% filter(chr == "a") %>% dim()
6: eval_bare(expr, quo_get_env(quo))
7: quasi_label(enquo(object), label, arg = "object")
8: expect_identical(ds %>% filter(chr == "a") %>% dim(), c(2L, 7L))
9: eval(code, test_env)
10: eval(code, test_env)
11: withCallingHandlers({ eval(code, test_env) if (!handled &&
!is.null(test)) { skip_empty() }}, expectation = handle_expectation,
skip = handle_skip, warning = handle_warning, message = handle_message,
error = handle_error)
12: doTryCatch(return(expr), name, parentenv, handler)
13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
14: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
15: doTryCatch(return(expr), name, parentenv, handler)
16: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),
names[nh], parentenv, handlers[[nh]])
17: tryCatchList(expr, classes, parentenv, handlers)
18: tryCatch(withCallingHandlers({ eval(code, test_env) if (!handled &&
!is.null(test)) { skip_empty() }}, expectation = handle_expectation,
skip = handle_skip, warning = handle_warning, message = handle_message,
error = handle_error), error = handle_fatal, skip = function(e) { })
19: test_code(desc, code, env = parent.frame(), reporter = reporter)
20: testthat::test_that(what, { skip_if(getOption("..skip.tests", TRUE),
"arrow C++ library not available") code})
21: test_that("dim() correctly determine numbers of rows and columns on
arrow_dplyr_query object", { skip_if_not_available("parquet")
ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
expect_identical(ds %>% filter(chr == "a") %>% dim(), c(2L, 7L))
expect_equal(ds %>% select(chr, fct, int) %>% dim(), c(20L,
3L)) expect_identical(ds %>% select(chr, fct, int) %>% filter(chr ==
"a") %>% dim(), c(2L, 3L)) })
22: eval(code, test_env)
23: eval(code, test_env)
24: withCallingHandlers({ eval(code, test_env) if (!handled &&
!is.null(test)) { skip_empty() }}, expectation = handle_expectation,
skip = handle_skip, warning = handle_warning, message = handle_message,
error = handle_error)
25: doTryCatch(return(expr), name, parentenv, handler)
26: tryCatchOne(expr, names, parentenv, handlers[[1L]])
27: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
28: doTryCatch(return(expr), name, parentenv, handler)
29: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),
names[nh], parentenv, handlers[[nh]])
30: tryCatchList(expr, classes, parentenv, handlers)
31: tryCatch(withCallingHandlers({ eval(code, test_env) if (!handled &&
!is.null(test)) { skip_empty() }}, expectation = handle_expectation,
skip = handle_skip, warning = handle_warning, message = handle_message,
error = handle_error), error = handle_fatal, skip = function(e) { })
32: test_code(NULL, exprs, env)
33: source_file(path, child_env(env), wrap = wrap)
34: FUN(X[[i]], ...)
35: lapply(test_paths, test_one_file, env = env, wrap = wrap)
36: force(code)
37: doWithOneRestart(return(expr), restart)
38: withOneRestart(expr, restarts[[1L]])
39: withRestarts(testthat_abort_reporter = function() NULL, force(code))
40: with_reporter(reporters$multi, lapply(test_paths, test_one_file, env =
env, wrap = wrap))
41: test_files(test_dir = test_dir, test_package = test_package, test_paths
= test_paths, load_helpers = load_helpers, reporter = reporter, env = env,
stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning, wrap
= wrap, load_package = load_package)
42: test_files(test_dir = path, test_paths = test_paths, test_package =
package, reporter = reporter, load_helpers = load_helpers, env = env,
stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning, wrap
= wrap, load_package = load_package, parallel = parallel)
43: test_dir("testthat", package = package, reporter = reporter, ...,
load_package = "installed")
44: test_check("arrow", reporter = arrow_reporter)
An irrecoverable exception occurred. R is aborting now ...{noformat}
The test also seems to give the wrong results sometimes
([https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312803])
{noformat}
== Failed tests ================================================================
-- Failure (test-dataset.R:148:3): dim() correctly determine numbers of rows
and columns on arrow_dplyr_query object --
ds %>% filter(chr == "a") %>% dim() not identical to c(2L, 7L).
1/2 mismatches
[1] 1 - 2 == -1
{noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)