David Li created ARROW-12668:
--------------------------------

             Summary: [C++][Dataset] CountRows occasionally segfaulting
                 Key: ARROW-12668
                 URL: https://issues.apache.org/jira/browse/ARROW-12668
             Project: Apache Arrow
          Issue Type: Improvement
          Components: C++
            Reporter: David Li
            Assignee: David Li
             Fix For: 5.0.0


[https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312525]
{noformat}
Start test: dim() correctly determine numbers of rows and columns on 
arrow_dplyr_query object

 *** caught segfault ***
address 0x7ff7cf2cf8f8, cause 'invalid permissions'

Traceback:
 1: dataset___Scanner__CountRows(self)
 2: scanner$CountRows()
 3: dim.arrow_dplyr_query(.)
 4: dim(.)
 5: ds %>% filter(chr == "a") %>% dim()
 6: eval_bare(expr, quo_get_env(quo))
 7: quasi_label(enquo(object), label, arg = "object")
 8: expect_identical(ds %>% filter(chr == "a") %>% dim(), c(2L, 7L))
 9: eval(code, test_env)
10: eval(code, test_env)
11: withCallingHandlers({    eval(code, test_env)    if (!handled && 
!is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, 
skip = handle_skip, warning = handle_warning,     message = handle_message, 
error = handle_error)
12: doTryCatch(return(expr), name, parentenv, handler)
13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
14: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
15: doTryCatch(return(expr), name, parentenv, handler)
16: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     
names[nh], parentenv, handlers[[nh]])
17: tryCatchList(expr, classes, parentenv, handlers)
18: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && 
!is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, 
skip = handle_skip, warning = handle_warning,     message = handle_message, 
error = handle_error), error = handle_fatal,     skip = function(e) {    })
19: test_code(desc, code, env = parent.frame(), reporter = reporter)
20: testthat::test_that(what, {    skip_if(getOption("..skip.tests", TRUE), 
"arrow C++ library not available")    code})
21: test_that("dim() correctly determine numbers of rows and columns on 
arrow_dplyr_query object",     {        skip_if_not_available("parquet")        
ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))        
expect_identical(ds %>% filter(chr == "a") %>% dim(),             c(2L, 7L))    
    expect_equal(ds %>% select(chr, fct, int) %>% dim(),             c(20L, 
3L))        expect_identical(ds %>% select(chr, fct, int) %>% filter(chr ==     
        "a") %>% dim(), c(2L, 3L))    })
22: eval(code, test_env)
23: eval(code, test_env)
24: withCallingHandlers({    eval(code, test_env)    if (!handled && 
!is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, 
skip = handle_skip, warning = handle_warning,     message = handle_message, 
error = handle_error)
25: doTryCatch(return(expr), name, parentenv, handler)
26: tryCatchOne(expr, names, parentenv, handlers[[1L]])
27: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
28: doTryCatch(return(expr), name, parentenv, handler)
29: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     
names[nh], parentenv, handlers[[nh]])
30: tryCatchList(expr, classes, parentenv, handlers)
31: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && 
!is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, 
skip = handle_skip, warning = handle_warning,     message = handle_message, 
error = handle_error), error = handle_fatal,     skip = function(e) {    })
32: test_code(NULL, exprs, env)
33: source_file(path, child_env(env), wrap = wrap)
34: FUN(X[[i]], ...)
35: lapply(test_paths, test_one_file, env = env, wrap = wrap)
36: force(code)
37: doWithOneRestart(return(expr), restart)
38: withOneRestart(expr, restarts[[1L]])
39: withRestarts(testthat_abort_reporter = function() NULL, force(code))
40: with_reporter(reporters$multi, lapply(test_paths, test_one_file,     env = 
env, wrap = wrap))
41: test_files(test_dir = test_dir, test_package = test_package,     test_paths 
= test_paths, load_helpers = load_helpers, reporter = reporter,     env = env, 
stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap 
= wrap, load_package = load_package)
42: test_files(test_dir = path, test_paths = test_paths, test_package = 
package,     reporter = reporter, load_helpers = load_helpers, env = env,     
stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap 
= wrap, load_package = load_package, parallel = parallel)
43: test_dir("testthat", package = package, reporter = reporter,     ..., 
load_package = "installed")
44: test_check("arrow", reporter = arrow_reporter)
An irrecoverable exception occurred. R is aborting now ...{noformat}
The test also seems to give the wrong results sometimes 
([https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312803])
{noformat}
== Failed tests ================================================================
-- Failure (test-dataset.R:148:3): dim() correctly determine numbers of rows 
and columns on arrow_dplyr_query object --
ds %>% filter(chr == "a") %>% dim() not identical to c(2L, 7L).
1/2 mismatches
[1] 1 - 2 == -1
 {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to