[
https://issues.apache.org/jira/browse/SOLR-16414?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17607364#comment-17607364
]
Kevin Risden commented on SOLR-16414:
-------------------------------------
It looks like this is causing NPE in
org.apache.solr.cloud.ZkControllerTest.testPublishAndWaitForDownStates
https://lists.apache.org/[email protected]:lte=1M:org.apache.solr.cloud.ZkControllerTest.testPublishAndWaitForDownStates
{code:java}
FAILED: org.apache.solr.cloud.ZkControllerTest.testPublishAndWaitForDownStates
Error Message:
java.lang.NullPointerException: Cannot invoke
"org.apache.solr.common.cloud.DocCollection.getName()" because "coll" is null
Stack Trace:
java.lang.NullPointerException: Cannot invoke
"org.apache.solr.common.cloud.DocCollection.getName()" because "coll" is null
at __randomizedtesting.SeedInfo.seed([9041825327B5F309:B74EEEBF8622BC36]:0)
at
org.apache.solr.cloud.ZkController.lambda$publishNodeAsDown$24(ZkController.java:2930)
at
java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:183)
at java.base/java.util.Collections$2.tryAdvance(Collections.java:4820)
at java.base/java.util.Collections$2.forEachRemaining(Collections.java:4828)
at
java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
at
java.base/java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:290)
at
java.base/java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:746)
at java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:295)
at java.base/java.util.concurrent.ForkJoinTask.doInvoke(ForkJoinTask.java:413)
at java.base/java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:741)
at
java.base/java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:159)
at
java.base/java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:173)
at
java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
at
java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:596)
at
java.base/java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:765)
at org.apache.solr.cloud.ZkController.publishNodeAsDown(ZkController.java:2927)
at
org.apache.solr.cloud.ZkController.publishAndWaitForDownStates(ZkController.java:1131)
at
org.apache.solr.cloud.ZkControllerTest.testPublishAndWaitForDownStates(ZkControllerTest.java:330)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native
Method)
at
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:78)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:567)
at
com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1758)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:946)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:982)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:996)
at
com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:80)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at
org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:44)
at
org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
at
org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45)
at
org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60)
at
org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390)
at
com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:843)
at
com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:490)
at
com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:955)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:840)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:891)
at
com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:902)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
at
org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
at
com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:80)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at
org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38)
at
com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40)
at
com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53)
at
org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43)
at
org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44)
at
org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60)
at
org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47)
at org.junit.rules.RunRules.evaluate(RunRules.java:20)
at
com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36)
at
com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390)
at
com.carrotsearch.randomizedtesting.ThreadLeakControl.lambda$forkTimeoutingTask$0(ThreadLeakControl.java:850)
at java.base/java.lang.Thread.run(Thread.java:831)
{code}
> Race condition in PRS state updates
> -----------------------------------
>
> Key: SOLR-16414
> URL: https://issues.apache.org/jira/browse/SOLR-16414
> Project: Solr
> Issue Type: Bug
> Security Level: Public(Default Security Level. Issues are Public)
> Reporter: Noble Paul
> Priority: Major
> Fix For: 9.1
>
> Time Spent: 0.5h
> Remaining Estimate: 0h
>
> For PRS collections the individual states are potentially updated from
> individual nodes and sometimes from overseer too. it's possible that
>
> # OP1 is sent to overseer at T1
> # OP2 is executed in the node itself at T2
>
> Because we cannot guarantee that the OP1 sent to overseer may execute before
> OP2 tyhe final state will be the result of OP1 which is incorrect and can
> lead to errors .
> The solution is to never do any PRS writes from overseer.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]