[
https://issues.apache.org/jira/browse/FLINK-10616?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Gary Yao updated FLINK-10616:
-----------------------------
Description:
While tearing down Hadoop, the tests sporadically fail with the exception below:
{noformat}
Caused by: java.lang.RuntimeException: sudo -S -u root bash -c "cd /; ps aux |
grep hadoop | grep -v grep | awk \"\{print \\\$2}\" | xargs kill -9" returned
non-zero exit status 123 on 172.31.39.235. STDOUT:
STDERR:
at jepsen.control$throw_on_nonzero_exit.invokeStatic(control.clj:129)
~[jepsen-0.1.10.jar:na]
at jepsen.control$throw_on_nonzero_exit.invoke(control.clj:122)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec_STAR_.invokeStatic(control.clj:166)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec_STAR_.doInvoke(control.clj:163)
~[jepsen-0.1.10.jar:na]
at clojure.lang.RestFn.applyTo(RestFn.java:137) [clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:657) ~[clojure-1.9.0.jar:na]
at clojure.core$apply.invoke(core.clj:652) ~[clojure-1.9.0.jar:na]
at jepsen.control$exec.invokeStatic(control.clj:182)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec.doInvoke(control.clj:176) ~[jepsen-0.1.10.jar:na]
at clojure.lang.RestFn.invoke(RestFn.java:2088) [clojure-1.9.0.jar:na]
at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:197)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:194)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
~[classes/:na]
at jepsen.flink.hadoop$db$reify__3102.teardown_BANG_(hadoop.clj:128)
~[classes/:na]
at jepsen.flink.db$combined_db$reify__217$fn__220.invoke(db.clj:119)
~[na:na]
at clojure.core$map$fn__5587.invoke(core.clj:2745)
~[clojure-1.9.0.jar:na]
at clojure.lang.LazySeq.sval(LazySeq.java:40) ~[clojure-1.9.0.jar:na]
at clojure.lang.LazySeq.seq(LazySeq.java:49) ~[clojure-1.9.0.jar:na]
at clojure.lang.RT.seq(RT.java:528) ~[clojure-1.9.0.jar:na]
at clojure.core$seq__5124.invokeStatic(core.clj:137)
~[clojure-1.9.0.jar:na]
at clojure.core$dorun.invokeStatic(core.clj:3125)
~[clojure-1.9.0.jar:na]
at clojure.core$doall.invokeStatic(core.clj:3140)
~[clojure-1.9.0.jar:na]
at clojure.core$doall.invoke(core.clj:3140) ~[clojure-1.9.0.jar:na]
at jepsen.flink.db$combined_db$reify__217.teardown_BANG_(db.clj:119)
~[na:na]
at jepsen.db$fn__2137$G__2133__2141.invoke(db.clj:8)
~[jepsen-0.1.10.jar:na]
at jepsen.db$fn__2137$G__2132__2146.invoke(db.clj:8)
~[jepsen-0.1.10.jar:na]
at clojure.core$partial$fn__5561.invoke(core.clj:2617)
~[clojure-1.9.0.jar:na]
at jepsen.control$on_nodes$fn__2116.invoke(control.clj:372)
~[jepsen-0.1.10.jar:na]
at clojure.lang.AFn.applyToHelper(AFn.java:154) ~[clojure-1.9.0.jar:na]
at clojure.lang.AFn.applyTo(AFn.java:144) ~[clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:657) ~[clojure-1.9.0.jar:na]
at clojure.core$with_bindings_STAR_.invokeStatic(core.clj:1965)
~[clojure-1.9.0.jar:na]
at clojure.core$with_bindings_STAR_.doInvoke(core.clj:1965)
~[clojure-1.9.0.jar:na]
at clojure.lang.RestFn.applyTo(RestFn.java:142) [clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:661) ~[clojure-1.9.0.jar:na]
at clojure.core$bound_fn_STAR_$fn__5471.doInvoke(core.clj:1995)
~[clojure-1.9.0.jar:na]
at clojure.lang.RestFn.invoke(RestFn.java:408) [clojure-1.9.0.jar:na]
at jepsen.util$real_pmap$launcher__1168$fn__1169.invoke(util.clj:49)
~[jepsen-0.1.10.jar:na]
at clojure.core$binding_conveyor_fn$fn__5476.invoke(core.clj:2022)
~[clojure-1.9.0.jar:na]
at clojure.lang.AFn.call(AFn.java:18) ~[clojure-1.9.0.jar:na]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
~[na:1.8.0_171]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[na:1.8.0_171]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[na:1.8.0_171]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_171]
{noformat}
was:
While tearing down Hadoop, the test fails with the exception below:
{noformat}
Caused by: java.lang.RuntimeException: sudo -S -u root bash -c "cd /; ps aux |
grep hadoop | grep -v grep | awk \"\{print \\\$2}\" | xargs kill -9" returned
non-zero exit status 123 on 172.31.39.235. STDOUT:
STDERR:
at jepsen.control$throw_on_nonzero_exit.invokeStatic(control.clj:129)
~[jepsen-0.1.10.jar:na]
at jepsen.control$throw_on_nonzero_exit.invoke(control.clj:122)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec_STAR_.invokeStatic(control.clj:166)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec_STAR_.doInvoke(control.clj:163)
~[jepsen-0.1.10.jar:na]
at clojure.lang.RestFn.applyTo(RestFn.java:137) [clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:657) ~[clojure-1.9.0.jar:na]
at clojure.core$apply.invoke(core.clj:652) ~[clojure-1.9.0.jar:na]
at jepsen.control$exec.invokeStatic(control.clj:182)
~[jepsen-0.1.10.jar:na]
at jepsen.control$exec.doInvoke(control.clj:176) ~[jepsen-0.1.10.jar:na]
at clojure.lang.RestFn.invoke(RestFn.java:2088) [clojure-1.9.0.jar:na]
at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:197)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:194)
~[classes/:na]
at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
~[classes/:na]
at jepsen.flink.hadoop$db$reify__3102.teardown_BANG_(hadoop.clj:128)
~[classes/:na]
at jepsen.flink.db$combined_db$reify__217$fn__220.invoke(db.clj:119)
~[na:na]
at clojure.core$map$fn__5587.invoke(core.clj:2745)
~[clojure-1.9.0.jar:na]
at clojure.lang.LazySeq.sval(LazySeq.java:40) ~[clojure-1.9.0.jar:na]
at clojure.lang.LazySeq.seq(LazySeq.java:49) ~[clojure-1.9.0.jar:na]
at clojure.lang.RT.seq(RT.java:528) ~[clojure-1.9.0.jar:na]
at clojure.core$seq__5124.invokeStatic(core.clj:137)
~[clojure-1.9.0.jar:na]
at clojure.core$dorun.invokeStatic(core.clj:3125)
~[clojure-1.9.0.jar:na]
at clojure.core$doall.invokeStatic(core.clj:3140)
~[clojure-1.9.0.jar:na]
at clojure.core$doall.invoke(core.clj:3140) ~[clojure-1.9.0.jar:na]
at jepsen.flink.db$combined_db$reify__217.teardown_BANG_(db.clj:119)
~[na:na]
at jepsen.db$fn__2137$G__2133__2141.invoke(db.clj:8)
~[jepsen-0.1.10.jar:na]
at jepsen.db$fn__2137$G__2132__2146.invoke(db.clj:8)
~[jepsen-0.1.10.jar:na]
at clojure.core$partial$fn__5561.invoke(core.clj:2617)
~[clojure-1.9.0.jar:na]
at jepsen.control$on_nodes$fn__2116.invoke(control.clj:372)
~[jepsen-0.1.10.jar:na]
at clojure.lang.AFn.applyToHelper(AFn.java:154) ~[clojure-1.9.0.jar:na]
at clojure.lang.AFn.applyTo(AFn.java:144) ~[clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:657) ~[clojure-1.9.0.jar:na]
at clojure.core$with_bindings_STAR_.invokeStatic(core.clj:1965)
~[clojure-1.9.0.jar:na]
at clojure.core$with_bindings_STAR_.doInvoke(core.clj:1965)
~[clojure-1.9.0.jar:na]
at clojure.lang.RestFn.applyTo(RestFn.java:142) [clojure-1.9.0.jar:na]
at clojure.core$apply.invokeStatic(core.clj:661) ~[clojure-1.9.0.jar:na]
at clojure.core$bound_fn_STAR_$fn__5471.doInvoke(core.clj:1995)
~[clojure-1.9.0.jar:na]
at clojure.lang.RestFn.invoke(RestFn.java:408) [clojure-1.9.0.jar:na]
at jepsen.util$real_pmap$launcher__1168$fn__1169.invoke(util.clj:49)
~[jepsen-0.1.10.jar:na]
at clojure.core$binding_conveyor_fn$fn__5476.invoke(core.clj:2022)
~[clojure-1.9.0.jar:na]
at clojure.lang.AFn.call(AFn.java:18) ~[clojure-1.9.0.jar:na]
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
~[na:1.8.0_171]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
~[na:1.8.0_171]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
~[na:1.8.0_171]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_171]
{noformat}
> Jepsen test fails while tearing down Hadoop
> -------------------------------------------
>
> Key: FLINK-10616
> URL: https://issues.apache.org/jira/browse/FLINK-10616
> Project: Flink
> Issue Type: Bug
> Components: Tests
> Affects Versions: 1.6.1, 1.7.0
> Reporter: Gary Yao
> Priority: Major
>
> While tearing down Hadoop, the tests sporadically fail with the exception
> below:
> {noformat}
> Caused by: java.lang.RuntimeException: sudo -S -u root bash -c "cd /; ps aux
> | grep hadoop | grep -v grep | awk \"\{print \\\$2}\" | xargs kill -9"
> returned non-zero exit status 123 on 172.31.39.235. STDOUT:
> STDERR:
> at jepsen.control$throw_on_nonzero_exit.invokeStatic(control.clj:129)
> ~[jepsen-0.1.10.jar:na]
> at jepsen.control$throw_on_nonzero_exit.invoke(control.clj:122)
> ~[jepsen-0.1.10.jar:na]
> at jepsen.control$exec_STAR_.invokeStatic(control.clj:166)
> ~[jepsen-0.1.10.jar:na]
> at jepsen.control$exec_STAR_.doInvoke(control.clj:163)
> ~[jepsen-0.1.10.jar:na]
> at clojure.lang.RestFn.applyTo(RestFn.java:137) [clojure-1.9.0.jar:na]
> at clojure.core$apply.invokeStatic(core.clj:657)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$apply.invoke(core.clj:652) ~[clojure-1.9.0.jar:na]
> at jepsen.control$exec.invokeStatic(control.clj:182)
> ~[jepsen-0.1.10.jar:na]
> at jepsen.control$exec.doInvoke(control.clj:176)
> ~[jepsen-0.1.10.jar:na]
> at clojure.lang.RestFn.invoke(RestFn.java:2088) [clojure-1.9.0.jar:na]
> at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:197)
> ~[classes/:na]
> at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
> ~[classes/:na]
> at jepsen.control.util$grepkill_BANG_.invokeStatic(util.clj:194)
> ~[classes/:na]
> at jepsen.control.util$grepkill_BANG_.invoke(util.clj:191)
> ~[classes/:na]
> at jepsen.flink.hadoop$db$reify__3102.teardown_BANG_(hadoop.clj:128)
> ~[classes/:na]
> at jepsen.flink.db$combined_db$reify__217$fn__220.invoke(db.clj:119)
> ~[na:na]
> at clojure.core$map$fn__5587.invoke(core.clj:2745)
> ~[clojure-1.9.0.jar:na]
> at clojure.lang.LazySeq.sval(LazySeq.java:40) ~[clojure-1.9.0.jar:na]
> at clojure.lang.LazySeq.seq(LazySeq.java:49) ~[clojure-1.9.0.jar:na]
> at clojure.lang.RT.seq(RT.java:528) ~[clojure-1.9.0.jar:na]
> at clojure.core$seq__5124.invokeStatic(core.clj:137)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$dorun.invokeStatic(core.clj:3125)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$doall.invokeStatic(core.clj:3140)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$doall.invoke(core.clj:3140) ~[clojure-1.9.0.jar:na]
> at jepsen.flink.db$combined_db$reify__217.teardown_BANG_(db.clj:119)
> ~[na:na]
> at jepsen.db$fn__2137$G__2133__2141.invoke(db.clj:8)
> ~[jepsen-0.1.10.jar:na]
> at jepsen.db$fn__2137$G__2132__2146.invoke(db.clj:8)
> ~[jepsen-0.1.10.jar:na]
> at clojure.core$partial$fn__5561.invoke(core.clj:2617)
> ~[clojure-1.9.0.jar:na]
> at jepsen.control$on_nodes$fn__2116.invoke(control.clj:372)
> ~[jepsen-0.1.10.jar:na]
> at clojure.lang.AFn.applyToHelper(AFn.java:154)
> ~[clojure-1.9.0.jar:na]
> at clojure.lang.AFn.applyTo(AFn.java:144) ~[clojure-1.9.0.jar:na]
> at clojure.core$apply.invokeStatic(core.clj:657)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$with_bindings_STAR_.invokeStatic(core.clj:1965)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$with_bindings_STAR_.doInvoke(core.clj:1965)
> ~[clojure-1.9.0.jar:na]
> at clojure.lang.RestFn.applyTo(RestFn.java:142) [clojure-1.9.0.jar:na]
> at clojure.core$apply.invokeStatic(core.clj:661)
> ~[clojure-1.9.0.jar:na]
> at clojure.core$bound_fn_STAR_$fn__5471.doInvoke(core.clj:1995)
> ~[clojure-1.9.0.jar:na]
> at clojure.lang.RestFn.invoke(RestFn.java:408) [clojure-1.9.0.jar:na]
> at jepsen.util$real_pmap$launcher__1168$fn__1169.invoke(util.clj:49)
> ~[jepsen-0.1.10.jar:na]
> at clojure.core$binding_conveyor_fn$fn__5476.invoke(core.clj:2022)
> ~[clojure-1.9.0.jar:na]
> at clojure.lang.AFn.call(AFn.java:18) ~[clojure-1.9.0.jar:na]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[na:1.8.0_171]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[na:1.8.0_171]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[na:1.8.0_171]
> at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_171]
> {noformat}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)