[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447419952 ## File path: solr/core/src/test/org/apache/solr/schema/RankFieldTest.java ## @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Ignore; + +public class RankFieldTest extends SolrTestCaseJ4 { + + private static final String RANK_1 = "rank_1"; + private static final String RANK_2 = "rank_2"; + + @BeforeClass + public static void beforeClass() throws Exception { +initCore("solrconfig-minimal.xml","schema-rank-fields.xml"); + } + + @Override + public void setUp() throws Exception { +clearIndex(); +assertU(commit()); +super.setUp(); + } + + public void testInternalFieldName() { +assertEquals("RankField.INTERNAL_RANK_FIELD_NAME changed in an incompatible way", +"_rank_", RankField.INTERNAL_RANK_FIELD_NAME); + } + + public void testBasic() { +assertNotNull(h.getCore().getLatestSchema().getFieldOrNull(RANK_1)); +assertEquals(RankField.class, h.getCore().getLatestSchema().getField(RANK_1).getType().getClass()); + } + + public void testBadFormat() { +ignoreException("Expecting float"); +assertFailedU(adoc( +"id", "1", +RANK_1, "foo" +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, "1.2.3" +)); + +unIgnoreException("Expecting float"); + +ignoreException("must be finite"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.POSITIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NEGATIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NaN) +)); + +unIgnoreException("must be finite"); + +ignoreException("must be a positive"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-0.0f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-1f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(0.0f) +)); +unIgnoreException("must be a positive"); + } + + public void testAddRandom() { +for (int i = 0 ; i < random().nextInt(TEST_NIGHTLY ? 1 : 100); i++) { + assertU(adoc( + "id", String.valueOf(i), + RANK_1, Float.toString(random().nextFloat()) + )); +} +assertU(commit()); + } + + public void testSkipEmpty() { +assertU(adoc( +"id", "1", +RANK_1, "" +)); + } + + public void testBasicAdd() throws IOException { +assertU(adoc( +"id", "testBasicAdd", +RANK_1, "1" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testBasicAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in + assertNotNull(reader.getFieldInfos().fieldInfo(RankField.INTERNAL_RANK_FIELD_NAME)); + // assert that the feature made it in + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_1.getBytes(StandardCharsets.UTF_8; + return null; +}); + } + + public void testMultipleRankFields() throws IOException { +assertU(adoc( +"id", "testMultiValueAdd", +RANK_1, "1", +RANK_2, "2" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testMultiValueAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in +
[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447419709 ## File path: solr/core/src/test/org/apache/solr/schema/RankFieldTest.java ## @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Ignore; + +public class RankFieldTest extends SolrTestCaseJ4 { + + private static final String RANK_1 = "rank_1"; + private static final String RANK_2 = "rank_2"; + + @BeforeClass + public static void beforeClass() throws Exception { +initCore("solrconfig-minimal.xml","schema-rank-fields.xml"); + } + + @Override + public void setUp() throws Exception { +clearIndex(); +assertU(commit()); +super.setUp(); + } + + public void testInternalFieldName() { +assertEquals("RankField.INTERNAL_RANK_FIELD_NAME changed in an incompatible way", +"_rank_", RankField.INTERNAL_RANK_FIELD_NAME); + } + + public void testBasic() { +assertNotNull(h.getCore().getLatestSchema().getFieldOrNull(RANK_1)); +assertEquals(RankField.class, h.getCore().getLatestSchema().getField(RANK_1).getType().getClass()); + } + + public void testBadFormat() { +ignoreException("Expecting float"); +assertFailedU(adoc( +"id", "1", +RANK_1, "foo" +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, "1.2.3" +)); + +unIgnoreException("Expecting float"); + +ignoreException("must be finite"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.POSITIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NEGATIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NaN) +)); + +unIgnoreException("must be finite"); + +ignoreException("must be a positive"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-0.0f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-1f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(0.0f) +)); +unIgnoreException("must be a positive"); + } + + public void testAddRandom() { +for (int i = 0 ; i < random().nextInt(TEST_NIGHTLY ? 1 : 100); i++) { + assertU(adoc( + "id", String.valueOf(i), + RANK_1, Float.toString(random().nextFloat()) + )); +} +assertU(commit()); + } + + public void testSkipEmpty() { +assertU(adoc( +"id", "1", +RANK_1, "" +)); + } + + public void testBasicAdd() throws IOException { +assertU(adoc( +"id", "testBasicAdd", +RANK_1, "1" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testBasicAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in + assertNotNull(reader.getFieldInfos().fieldInfo(RankField.INTERNAL_RANK_FIELD_NAME)); + // assert that the feature made it in + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_1.getBytes(StandardCharsets.UTF_8; + return null; +}); + } + + public void testMultipleRankFields() throws IOException { +assertU(adoc( +"id", "testMultiValueAdd", +RANK_1, "1", +RANK_2, "2" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testMultiValueAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in +
[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447418777 ## File path: solr/core/src/test/org/apache/solr/schema/RankFieldTest.java ## @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Ignore; + +public class RankFieldTest extends SolrTestCaseJ4 { + + private static final String RANK_1 = "rank_1"; + private static final String RANK_2 = "rank_2"; + + @BeforeClass + public static void beforeClass() throws Exception { +initCore("solrconfig-minimal.xml","schema-rank-fields.xml"); + } + + @Override + public void setUp() throws Exception { +clearIndex(); +assertU(commit()); +super.setUp(); + } + + public void testInternalFieldName() { +assertEquals("RankField.INTERNAL_RANK_FIELD_NAME changed in an incompatible way", +"_rank_", RankField.INTERNAL_RANK_FIELD_NAME); + } + + public void testBasic() { +assertNotNull(h.getCore().getLatestSchema().getFieldOrNull(RANK_1)); +assertEquals(RankField.class, h.getCore().getLatestSchema().getField(RANK_1).getType().getClass()); + } + + public void testBadFormat() { +ignoreException("Expecting float"); +assertFailedU(adoc( +"id", "1", +RANK_1, "foo" +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, "1.2.3" +)); + +unIgnoreException("Expecting float"); + +ignoreException("must be finite"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.POSITIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NEGATIVE_INFINITY) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(Float.NaN) +)); + +unIgnoreException("must be finite"); + +ignoreException("must be a positive"); +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-0.0f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(-1f) +)); + +assertFailedU(adoc( +"id", "1", +RANK_1, Float.toString(0.0f) +)); +unIgnoreException("must be a positive"); + } + + public void testAddRandom() { +for (int i = 0 ; i < random().nextInt(TEST_NIGHTLY ? 1 : 100); i++) { + assertU(adoc( + "id", String.valueOf(i), + RANK_1, Float.toString(random().nextFloat()) + )); +} +assertU(commit()); + } + + public void testSkipEmpty() { +assertU(adoc( +"id", "1", +RANK_1, "" +)); + } + + public void testBasicAdd() throws IOException { +assertU(adoc( +"id", "testBasicAdd", +RANK_1, "1" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testBasicAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in + assertNotNull(reader.getFieldInfos().fieldInfo(RankField.INTERNAL_RANK_FIELD_NAME)); + // assert that the feature made it in + assertTrue(reader.terms(RankField.INTERNAL_RANK_FIELD_NAME).iterator().seekExact(new BytesRef(RANK_1.getBytes(StandardCharsets.UTF_8; + return null; +}); + } + + public void testMultipleRankFields() throws IOException { +assertU(adoc( +"id", "testMultiValueAdd", +RANK_1, "1", +RANK_2, "2" +)); +assertU(commit()); +//assert that the document made it in +assertQ(req("q", "id:testMultiValueAdd"), "//*[@numFound='1']"); +h.getCore().withSearcher((searcher) -> { + LeafReader reader = searcher.getIndexReader().getContext().leaves().get(0).reader(); + // assert that the field made it in +
[GitHub] [lucene-solr] noblepaul commented on a change in pull request #1624: use MethodHandles in AnnotatedAPI
noblepaul commented on a change in pull request #1624: URL: https://github.com/apache/lucene-solr/pull/1624#discussion_r447412157 ## File path: solr/core/src/java/org/apache/solr/api/AnnotatedApi.java ## @@ -87,16 +88,18 @@ public EndPoint getEndPoint() { public static List getApis(Object obj) { return getApis(obj.getClass(), obj); } - public static List getApis(Class klas , Object obj) { -if (!Modifier.isPublic(klas.getModifiers())) { - throw new RuntimeException(klas.getName() + " is not public"); + public static List getApis(Class theClass , Object obj) { +Class klas = null; +try { + klas = MethodHandles.publicLookup().accessClass(theClass); +} catch (IllegalAccessException e) { + throw new RuntimeException(klas.getName() + " is not public", e); Review comment: Thanks, everyone. I wanted to get a quick review for the implementation. Basically, I was not too sure about the idioms we normally use for `MethodHandle` based reflection code. Moreover, We are more worried about the security manager complaining and wanted to do a PoC if that problem goes away This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447412254 ## File path: solr/core/src/java/org/apache/solr/search/RankQParserPlugin.java ## @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import java.util.Locale; +import java.util.Objects; + +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.search.Query; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.RankField; +import org.apache.solr.schema.SchemaField; +/** + * {@code RankQParserPlugin} can be used to introduce document-depending scoring factors to ranking. + * While this {@code QParser} delivers a (subset of) functionality already available via {@link FunctionQParser}, + * the benefit is that {@code RankQParserPlugin} can be used in combination with the {@code minExactCount} to + * use BlockMax-WAND algorithm (skip non-competitive documents) to provide faster responses. + * + * @see RankField + * + * @lucene.experimental + * @since 8.6 + */ +public class RankQParserPlugin extends QParserPlugin { + + public static final String NAME = "rank"; + public static final String FIELD = "f"; Review comment: All parsers in https://lucene.apache.org/solr/guide/8_5/other-parsers.html that expect a field use `f` as the key This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] noblepaul commented on a change in pull request #1624: use MethodHandles in AnnotatedAPI
noblepaul commented on a change in pull request #1624: URL: https://github.com/apache/lucene-solr/pull/1624#discussion_r447412157 ## File path: solr/core/src/java/org/apache/solr/api/AnnotatedApi.java ## @@ -87,16 +88,18 @@ public EndPoint getEndPoint() { public static List getApis(Object obj) { return getApis(obj.getClass(), obj); } - public static List getApis(Class klas , Object obj) { -if (!Modifier.isPublic(klas.getModifiers())) { - throw new RuntimeException(klas.getName() + " is not public"); + public static List getApis(Class theClass , Object obj) { +Class klas = null; +try { + klas = MethodHandles.publicLookup().accessClass(theClass); +} catch (IllegalAccessException e) { + throw new RuntimeException(klas.getName() + " is not public", e); Review comment: Thanks, everyone. I wanted to get a quick review for the implementation. Basically, I was not too sure about the idioms we normally use for `MethodHandle` based reflection code. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447411333 ## File path: solr/core/src/java/org/apache/solr/schema/RankField.java ## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.solr.common.SolrException; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; +import org.apache.solr.uninverting.UninvertingReader.Type; + +public class RankField extends FieldType { + + public static final String INTERNAL_RANK_FIELD_NAME = "_internal_rank_field"; + + @Override + public Type getUninversionType(SchemaField sf) { +throw null; + } + + @Override + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { + } + + @Override + protected void init(IndexSchema schema, Map args) { +super.init(schema, args); +if (schema.getFieldOrNull(INTERNAL_RANK_FIELD_NAME) != null) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "A field named \"" + INTERNAL_RANK_FIELD_NAME + "\" can't be defined in the schema"); +} +for (int prop:new int[] {STORED, DOC_VALUES, OMIT_TF_POSITIONS, SORT_MISSING_FIRST, SORT_MISSING_LAST}) { + if ((trueProperties & prop) != 0) { +throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Property \"" + getPropertyName(prop) + "\" can't be set to true in RankFields"); + } +} +for (int prop:new int[] {UNINVERTIBLE, INDEXED, MULTIVALUED}) { + if ((falseProperties & prop) != 0) { +throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Property \"" + getPropertyName(prop) + "\" can't be set to false in RankFields"); + } +} +properties &= ~(UNINVERTIBLE | STORED | DOC_VALUES); + + } + + @Override + protected IndexableField createField(String name, String val, IndexableFieldType type) { +if (val == null || val.isEmpty()) { + return null; +} +float featureValue; +try { + featureValue = Float.parseFloat(val); +} catch (NumberFormatException nfe) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error while creating field '" + name + "' from value '" + val + "'. Expecting float.", nfe); +} +return new FeatureField(INTERNAL_RANK_FIELD_NAME, name, featureValue); + } + + @Override + public Query getFieldQuery(QParser parser, SchemaField field, String externalVal) { Review comment: > Thinking aloud on another option - What if we rewrite ( like how you have it right now ) when the value is * and throw an exception otherwise? Nice! we added it to `getExistenceQuery` and in `getFieldQuery` we throw an exception! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] vthacker commented on a change in pull request #1620: SOLR-14590 : Add support for Lucene's FeatureField in Solr
vthacker commented on a change in pull request #1620: URL: https://github.com/apache/lucene-solr/pull/1620#discussion_r447410253 ## File path: solr/core/src/java/org/apache/solr/schema/RankField.java ## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.schema; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.solr.common.SolrException; +import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; +import org.apache.solr.search.RankQParserPlugin; +import org.apache.solr.uninverting.UninvertingReader.Type; + +/** + * + * {@code RankField}s can be used to store scoring factors to improve document ranking. They should be used + * in combination with {@link RankQParserPlugin}. To use: + * + * + * Define the {@code RankField} {@code fieldType} in your schema: + * + * + * fieldType name="rank" class="solr.RankField" / + * + * + * Add fields to the schema, i.e.: + * + * + * field name="rank_1" type="rank" / Review comment: small nit: something like `document_length_boost` might be a better example to help a user realize how they can leverage this feature? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14605) use https need encrypt keystorePass
[ https://issues.apache.org/jira/browse/SOLR-14605?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] calm cloudy updated SOLR-14605: --- Description: solr + tomcat with https , we config keystore in two tomcat file : *server.xml :* *catalina.properties :* javax.net.ssl.keyStore=/xxx.keystore javax.net.ssl.keyStorePassword="{color:#ff}password{color}" javax.net.ssl.trustStore=/xxx.keystore javax.net.ssl.trustStorePassword="{color:#ff}password{color}" the problem is the {color:#ff}password {color:#172b4d}above is cleartext , it's not secure , we need{color} encrypt the password{color:#172b4d} ,then store them{color}{color} So, we need solution such as plugin interface function that make sure solr can load the {color:#de350b}encryption password {color} anyone has idea to resolve it ? thanks was: solr + tomcat with https , we config keystore in two tomcat file : *server.xml :* *catalina.properties :* javax.net.ssl.keyStore=/xxx.keystore javax.net.ssl.keyStorePassword="{color:#FF}password{color}" javax.net.ssl.trustStore=/xxx.keystore javax.net.ssl.trustStorePassword="{color:#FF}password{color}" the problem is the {color:#FF}password {color:#172b4d}above is cleartext , it's not secure , we need encrypt the {color:#de350b}password {color:#172b4d},then store them{color}{color}{color}{color} So, we need solution such as plugin interface function that can load the encryption password anyone has ideato resolve it ? thanks > use https need encrypt keystorePass > > > Key: SOLR-14605 > URL: https://issues.apache.org/jira/browse/SOLR-14605 > Project: Solr > Issue Type: Wish > Security Level: Public(Default Security Level. Issues are Public) >Reporter: calm cloudy >Priority: Major > > solr + tomcat with https , we config keystore in two tomcat file : > *server.xml :* > protocol="org.apache.coyote.http11.Http11NioProtocol" > . > keystoreFile="/xxx.keystore" > keystorePass="{color:#ff}password{color}" > truststoreFile="/xxx.keystore" > truststorePass="{color:#ff}password{color}" /> > *catalina.properties :* > javax.net.ssl.keyStore=/xxx.keystore > javax.net.ssl.keyStorePassword="{color:#ff}password{color}" > javax.net.ssl.trustStore=/xxx.keystore > javax.net.ssl.trustStorePassword="{color:#ff}password{color}" > > the problem is the {color:#ff}password {color:#172b4d}above is > cleartext , it's not secure , we need{color} encrypt the > password{color:#172b4d} ,then store them{color}{color} > So, we need solution such as plugin interface function that make sure > solr can load the {color:#de350b}encryption password {color} > anyone has idea to resolve it ? thanks > > -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Created] (SOLR-14605) use https need encrypt keystorePass
calm cloudy created SOLR-14605: -- Summary: use https need encrypt keystorePass Key: SOLR-14605 URL: https://issues.apache.org/jira/browse/SOLR-14605 Project: Solr Issue Type: Wish Security Level: Public (Default Security Level. Issues are Public) Reporter: calm cloudy solr + tomcat with https , we config keystore in two tomcat file : *server.xml :* *catalina.properties :* javax.net.ssl.keyStore=/xxx.keystore javax.net.ssl.keyStorePassword="{color:#FF}password{color}" javax.net.ssl.trustStore=/xxx.keystore javax.net.ssl.trustStorePassword="{color:#FF}password{color}" the problem is the {color:#FF}password {color:#172b4d}above is cleartext , it's not secure , we need encrypt the {color:#de350b}password {color:#172b4d},then store them{color}{color}{color}{color} So, we need solution such as plugin interface function that can load the encryption password anyone has ideato resolve it ? thanks -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14595) json.facet subfacet 'sort:"index asc", refine:true' can return diff results using method:enum
[ https://issues.apache.org/jira/browse/SOLR-14595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chris M. Hostetter updated SOLR-14595: -- Attachment: SOLR-14595.patch Status: Open (was: Open) Ok, here's what seems to be happening: * there is a "parent" facet using a sort that can cause documents to "move down" after refinement (ex: "count asc" or "sum(field) asc") and a "refine:true" child facet that sorts on "index asc" (which means it _may_ be processed by "method:stream" if requested) * during phase #1, the "parent" facet has some bucket "P_X" returned by both shards ** depending on the shard, the list of buckets returned for the "child" facet under parent bucket "P_X" can be very different, with little/no overlap (ie: imagine all "odd" values<25 on shard1 and all "even" values<25 on shard2) *** if we use "method:stream" then there is *no implicit (or user specified explicit) overrequest considered* so fewer total buckets are returned by each shard _and there is a *higher* chance the shard will return "more:true"_ ie: "limit:10" with odd/even<25 shards will return 1,3,5,7,9,11,13,15,17,19,*more:true* and 2,4,6,8,10,12,14,16,18,20,*more:true* from our odd/even shards *** if we use any other "method" then there is some overrequest done at the shard level (either implicitly or via an explicit param) so we get more buckets returnd by each shard _and there is a *lower* chance the shard will return "more:true"_ ie: limit:10 will cause overrequest=5 and our odd/even<25 shards will return 1,3,5,7,9,11,13,15,17,19,*21,23,more:false* and 2,4,6,8,10,12,14,16,18,20,*22,24,more:false* respectively * during phase #2 "P_X" may not be a "competitive" parent bucket, and it may not be considered for refinement ** but after refinement, other buckets may "move down", and "P_X" may "move up" in the sorted list making it a (complete) bucket to return ** when considering some "child" facet bucket "C_Y", to determine if it is "complete", the only consideration is if we there are any shards that did not return bucket "C_Y" (under "P_X") but indicated that they have "more:true" *** if phase#1 used "method:stream" and got "more:true" then a bucket "C_Y" not returned by all shards will not be considered complete _even though the "index asc" garuntees there's no chance it exists on any shard that didn't return it_ *** *BUT* if phase#1 used some other "method" then it may have gotten "more:false" from more shards then "method:stream" would have because of the overrequest, and more buckets will be considered "complete" ...i've updated the attached patch to include the smallest possible test of this discrepency can think of in TestJsonFacetRefinement Depending on how you look at it, this is either: * a FacetFieldProcessByTermEnum bug for not supporting/implementing overrequest (either using the implicit hueristic or checking for an explicit param) so we don't get consistent behavior between various processors during "phase #1" ** "Fixing" this would be fairly straight forward, but would cause "wasted work" (and network traffic) in the per-shard requests for the "common case" usage of FacetFieldProcessByTermEnum * a FacetRequestSortedMerger.isBucketComplete() bug, because it only considers wether "shardHasMoreBuckets" but doesn't consider the sort order and wether or not "having more buckets" has anything to do with if/why a particulr bucket might be returned. ** making this method sophisticated enough to consider wether a bucket _might_ be returned by a shard that 'hasMoreBuckets" given what we know about the facet sort would be pretty hard This is such an esoteric edge case thta isn't likely to come up in any "real world" usage of "method:stream" ... so I'm not sure how much effort it's really worth to try and "fix" this (particularly since it doesn't cause any "incorrect calculation" type bugs, it only causes fewer buckets to be returned) in a way that won't hurt performance in the "typical" usage. I'm going to think on this a little more, but i suspect that for now i'll just modify TestCloudJSONFacetSKGEquiv to work around this (either by never testing "index asc" combined with "refine:true", or perhaps only ever testing "index asc" with "overrequest:0" ... not sure yet) > json.facet subfacet 'sort:"index asc", refine:true' can return diff results > using method:enum > - > > Key: SOLR-14595 > URL: https://issues.apache.org/jira/browse/SOLR-14595 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: Facet Module >Reporter: Chris M. Hostetter >Assignee: Chris M. Hostetter >Priority: Major > Attachments:
[jira] [Updated] (SOLR-14481) Add drill Streaming Expression for efficient and accurate high cardinality aggregation
[ https://issues.apache.org/jira/browse/SOLR-14481?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-14481: -- Summary: Add drill Streaming Expression for efficient and accurate high cardinality aggregation (was: Add drill Streaming Expression) > Add drill Streaming Expression for efficient and accurate high cardinality > aggregation > -- > > Key: SOLR-14481 > URL: https://issues.apache.org/jira/browse/SOLR-14481 > Project: Solr > Issue Type: New Feature > Components: streaming expressions >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Fix For: 8.6 > > Attachments: SOLR-14481.patch, SOLR-14481.patch > > > This ticket will add the *drill* Streaming Expression. The drill Streaming > Expression is a wrapper around the functionality that is described in > SOLR-14470. The idea is for drill to contact the /export handler on one > replica in each shard of a collection and pass four parameters: > * q: query > * fl: field list > * sort: sort spec > * expr: The Streaming Expression sent to the /export handler to be executed. > The export handler will pass the result set through the streaming expression > performing an aggregation on the sorted result set and return the aggregated > tuples. The drill expression will simply maintain the sort order of the > tuples and emit them so that a wrapper expression can perform operations on > the sorted aggregate tuples. > Sample syntax: > {code:java} > drill(collection1, q="*:*", fl="a,b,c", sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))) {code} > In order to finish the aggregation other expressions can be used: > {code:java} > rollup( > select( >drill(collection1, > q="*:*", > fl="a,b,c", > sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))), >a, >b, >sum(c) as sums), > over="a, b", > sum(sums)) > > {code} > This provides fast aggregation over fields with infinite cardinality by > pushing down the first level of aggregation into the /export handler. > > -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14481) Add drill Streaming Expression
[ https://issues.apache.org/jira/browse/SOLR-14481?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148232#comment-17148232 ] ASF subversion and git services commented on SOLR-14481: Commit 9e8555056498c24e4bb4c451b0321833b042a12f in lucene-solr's branch refs/heads/branch_8x from Joel Bernstein [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=9e85550 ] SOLR-14481: Update CHANGES.txt > Add drill Streaming Expression > -- > > Key: SOLR-14481 > URL: https://issues.apache.org/jira/browse/SOLR-14481 > Project: Solr > Issue Type: New Feature > Components: streaming expressions >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Fix For: 8.6 > > Attachments: SOLR-14481.patch, SOLR-14481.patch > > > This ticket will add the *drill* Streaming Expression. The drill Streaming > Expression is a wrapper around the functionality that is described in > SOLR-14470. The idea is for drill to contact the /export handler on one > replica in each shard of a collection and pass four parameters: > * q: query > * fl: field list > * sort: sort spec > * expr: The Streaming Expression sent to the /export handler to be executed. > The export handler will pass the result set through the streaming expression > performing an aggregation on the sorted result set and return the aggregated > tuples. The drill expression will simply maintain the sort order of the > tuples and emit them so that a wrapper expression can perform operations on > the sorted aggregate tuples. > Sample syntax: > {code:java} > drill(collection1, q="*:*", fl="a,b,c", sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))) {code} > In order to finish the aggregation other expressions can be used: > {code:java} > rollup( > select( >drill(collection1, > q="*:*", > fl="a,b,c", > sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))), >a, >b, >sum(c) as sums), > over="a, b", > sum(sums)) > > {code} > This provides fast aggregation over fields with infinite cardinality by > pushing down the first level of aggregation into the /export handler. > > -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14481) Add drill Streaming Expression
[ https://issues.apache.org/jira/browse/SOLR-14481?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148229#comment-17148229 ] ASF subversion and git services commented on SOLR-14481: Commit 0cc45f6cdf2bc9f4c8d88f5126f76dab08fe02f8 in lucene-solr's branch refs/heads/master from Joel Bernstein [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=0cc45f6 ] SOLR-14481: Update CHANGES.txt > Add drill Streaming Expression > -- > > Key: SOLR-14481 > URL: https://issues.apache.org/jira/browse/SOLR-14481 > Project: Solr > Issue Type: New Feature > Components: streaming expressions >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Fix For: 8.6 > > Attachments: SOLR-14481.patch, SOLR-14481.patch > > > This ticket will add the *drill* Streaming Expression. The drill Streaming > Expression is a wrapper around the functionality that is described in > SOLR-14470. The idea is for drill to contact the /export handler on one > replica in each shard of a collection and pass four parameters: > * q: query > * fl: field list > * sort: sort spec > * expr: The Streaming Expression sent to the /export handler to be executed. > The export handler will pass the result set through the streaming expression > performing an aggregation on the sorted result set and return the aggregated > tuples. The drill expression will simply maintain the sort order of the > tuples and emit them so that a wrapper expression can perform operations on > the sorted aggregate tuples. > Sample syntax: > {code:java} > drill(collection1, q="*:*", fl="a,b,c", sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))) {code} > In order to finish the aggregation other expressions can be used: > {code:java} > rollup( > select( >drill(collection1, > q="*:*", > fl="a,b,c", > sort="a desc, b desc", > rollup(input(), over="a,b", sum(c))), >a, >b, >sum(c) as sums), > over="a, b", > sum(sums)) > > {code} > This provides fast aggregation over fields with infinite cardinality by > pushing down the first level of aggregation into the /export handler. > > -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Created] (LUCENE-9422) Detailed logging for MergePolicy$MergeException stack trace
Viral Gandhi created LUCENE-9422: Summary: Detailed logging for MergePolicy$MergeException stack trace Key: LUCENE-9422 URL: https://issues.apache.org/jira/browse/LUCENE-9422 Project: Lucene - Core Issue Type: Improvement Reporter: Viral Gandhi We hit the following exception: {code:java} Uncaught exception: org.apache.lucene.index.MergePolicy$MergeException: java.lang.IllegalStateException: files were not computed yet; segment=_3g5 maxDoc=3095 in thread Thread[Lucene Merge Thread #456,5,main] org.apache.lucene.index.MergePolicy$MergeException: java.lang.IllegalStateException: files were not computed yet; segment=_3g5 maxDoc=3095 at org.apache.lucene.index.ConcurrentMergeScheduler.handleMergeException(ConcurrentMergeScheduler.java:704) at org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:684) Caused by: java.lang.IllegalStateException: files were not computed yet; segment=_3g5 maxDoc=3095 at org.apache.lucene.index.SegmentInfo.files(SegmentInfo.java:176) at org.apache.lucene.index.SegmentCommitInfo.files(SegmentCommitInfo.java:228) at org.apache.lucene.index.IndexWriter$2.mergeFinished(IndexWriter.java:3181) at org.apache.lucene.index.IndexWriter.closeMergeReaders(IndexWriter.java:) at org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4744) at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4170) at org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:625) at com.amazon.lucene.index.ConcurrentMergeSchedulerWrapper.doMerge(ConcurrentMergeSchedulerWrapper.java:64) at org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:662) {code} After a merge thread hit an exception, and in trying to throw the exception, Lucene called _SegmentInfo.files()_ which then threw another exception. Maybe this caused in losing root cause exception? Having more details regarding the root cause would have been helpful here. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Comment Edited] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147694#comment-17147694 ] Jan Høydahl edited comment on SOLR-14561 at 6/29/20, 7:41 PM: -- Committed PR 1629 to master in 49a3f0a11d41f7124b893a08dc9e67594c32e2ee branch_8x: b52bf267c7aa528b282d09c637344650f9034956 was (Author: janhoy): Committed PR 1629 to master in 49a3f0a11d41f7124b893a08dc9e67594c32e2ee > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 4h > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Resolved] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jan Høydahl resolved SOLR-14561. Resolution: Fixed > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 4h > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148099#comment-17148099 ] ASF subversion and git services commented on SOLR-14561: Commit b52bf267c7aa528b282d09c637344650f9034956 in lucene-solr's branch refs/heads/branch_8x from Jan Høydahl [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=b52bf26 ] SOLR-14561 Followup - validate params for more core operations (#1629) Add template to solr.in scripts Also testes Windows paths Added RefGuide documentation to some params (cherry picked from commit 49a3f0a11d41f7124b893a08dc9e67594c32e2ee and adapted to Java8) > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 4h > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#issuecomment-651305132 @ErickErickson @sigram Thanks for reviewing. I have updated the PR per your comments. Unfortunately I had to force push due to the magnitude of removing the solrconfig.xml file changes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447189990 ## File path: solr/solr-ref-guide/src/circuit-breakers.adoc ## @@ -0,0 +1,81 @@ += Circuit Breakers +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Solr's circuit breaker infrastructure allows prevention of actions that can cause a node to go beyond its capacity or to go down. The +premise of circuit breakers is to ensure a higher quality of service and only accept request loads that are serviceable in the current +resource configuration. + +== When To Use Circuit Breakers +Circuit breakers should be used when the user wishes to trade request throughput for a higher Solr stability. If circuit breakers +are enabled, requests may be rejected under the condition of high node duress with an appropriate HTTP error code (typically 503). + +It is upto the client to handle the same and potentially build a retrial logic as this should ideally be a transient situation. + +== Types Of Circuit Breakers +Circuit breakers can be of two types: + +=== Admission Control Checks + +Circuit breakers that are checked at admission control (request handlers). These circuit breakers are typically attached to a set +of requests that check them before proceeding with the request. Example is JVM heap usage based circuit breaker (described below). + +For these type of circuit breakers, it is a good idea to register them with CircuitBreakerManager +(org.apache.solr.util.circuitbreaker.CircuitBreakerManager) to allow a holistic check at the required admission control point. Review comment: I removed the section completely -- didnt make a lot of sense in user docs This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447188868 ## File path: solr/solr-ref-guide/src/circuit-breakers.adoc ## @@ -0,0 +1,81 @@ += Circuit Breakers +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Solr's circuit breaker infrastructure allows prevention of actions that can cause a node to go beyond its capacity or to go down. The +premise of circuit breakers is to ensure a higher quality of service and only accept request loads that are serviceable in the current +resource configuration. + +== When To Use Circuit Breakers +Circuit breakers should be used when the user wishes to trade request throughput for a higher Solr stability. If circuit breakers +are enabled, requests may be rejected under the condition of high node duress with an appropriate HTTP error code (typically 503). + +It is upto the client to handle the same and potentially build a retrial logic as this should ideally be a transient situation. + +== Types Of Circuit Breakers +Circuit breakers can be of two types: Review comment: Removed the same, thanks This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447185389 ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java ## @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; + +import org.apache.solr.core.SolrCore; + +/** + * Tracks the current JVM heap usage and triggers if it exceeds the defined percentage of the maximum + * heap size allocated to the JVM. This circuit breaker is a part of the default CircuitBreakerManager + * so is checked for every request -- hence it is realtime. Once the memory usage goes below the threshold, + * it will start allowing queries again. + * + * The memory threshold is defined as a percentage of the maximum memory allocated -- see memoryCircuitBreakerThreshold + * in solrconfig.xml + */ + +public class MemoryCircuitBreaker extends CircuitBreaker { + private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean(); + + private final long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax(); + + // Assumption -- the value of these parameters will be set correctly before invoking printDebugInfo() + private ThreadLocal seenMemory = new ThreadLocal<>(); + private ThreadLocal allowedMemory = new ThreadLocal<>(); + + public MemoryCircuitBreaker(SolrCore solrCore) { +super(solrCore); + +if (currentMaxHeap <= 0) { + throw new IllegalArgumentException("Invalid JVM state for the max heap usage"); +} + } + + // TODO: An optimization can be to trip the circuit breaker for a duration of time + // after the circuit breaker condition is matched. This will optimize for per call + // overhead of calculating the condition parameters but can result in false positives. + @Override + public boolean isCircuitBreakerGauntletTripped() { +if (!isCircuitBreakerEnabled()) { + return false; +} + +allowedMemory.set(getCurrentMemoryThreshold()); + +seenMemory.set(calculateLiveMemoryUsage()); + +return (seenMemory.get() >= allowedMemory.get()); + } + + @Override + public String printDebugInfo() { +return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get(); + } + + private long getCurrentMemoryThreshold() { +int thresholdValueInPercentage = solrCore.getSolrConfig().memoryCircuitBreakerThreshold; +double thresholdInFraction = thresholdValueInPercentage / (double) 100; +long actualLimit = (long) (currentMaxHeap * thresholdInFraction); + +if (actualLimit <= 0) { + throw new IllegalStateException("Memory limit cannot be less than or equal to zero"); +} + +return actualLimit; + } + + /** + * Calculate the live memory usage for the system. This method has package visibility + * to allow using for testing + * @return Memory usage in bytes + */ + protected long calculateLiveMemoryUsage() { +// NOTE: MemoryUsageGaugeSet provides memory usage statistics but we do not use them +// here since MemoryUsageGaugeSet provides combination of heap and non heap usage and Review comment: Better wording, thanks! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (LUCENE-9413) Add a char filter corresponding to CJKWidthFilter
[ https://issues.apache.org/jira/browse/LUCENE-9413?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148068#comment-17148068 ] Jim Ferenczi commented on LUCENE-9413: -- +1, I like the idea, currently we ask users to install the icu normalizer but it could be nice to have a simple char filter in core to apply the normalization. In essence, this is similar to https://issues.apache.org/jira/browse/LUCENE-8972 but with a more contained scope. > The mecab-ipadic dictionary has entries which includes FULL width characters, >so this naive approach - FULL / HALF width character normalization before >tokenizing can break tokenization. :/ I think that's an acceptable trade-off, these entries with full width characters don't seem to be high quality anyway ;). > Add a char filter corresponding to CJKWidthFilter > - > > Key: LUCENE-9413 > URL: https://issues.apache.org/jira/browse/LUCENE-9413 > Project: Lucene - Core > Issue Type: New Feature >Reporter: Tomoko Uchida >Priority: Minor > > In association with issues in Elasticsearch > ([https://github.com/elastic/elasticsearch/issues/58384] and > [https://github.com/elastic/elasticsearch/issues/58385]), it might be useful > for Japanese default analyzer. > Although I don't think it's a bug to not normalize FULL and HALF width > characters before tokenization, the behaviour sometimes confuses beginners or > users who have limited knowledge about Japanese analysis (and Unicode). > If we have a FULL and HALF width character normalization filter in > {{analyzers-common}}, we can include it into JapaneseAnalyzer (currently, > JapaneseAnalyzer contains CJKWidthFilter but it is applied after tokenization > so some of FULL width numbers or latin alphabets are separated by the > tokenizer). -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Resolved] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg resolved SOLR-14462. -- Resolution: Fixed Merged into branch_8x for inclusion in 8.6 release. > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 3h 10m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial
[jira] [Updated] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg updated SOLR-14462: - Fix Version/s: 8.6 > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Fix For: 8.6 > > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 3h 10m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial session was used 45 times
[jira] [Commented] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148059#comment-17148059 ] ASF subversion and git services commented on SOLR-14462: Commit 78152876fda92c61d1c6bcdf5e8953042a592b4f in lucene-solr's branch refs/heads/branch_8x from Ilan Ginzburg [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=7815287 ] SOLR-14462: cache more than one autoscaling session (#1630) Cherry picked from 25428013fb0ed8f8fdbebdef3f1d65dea77129c2 > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 3h 10m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of
[GitHub] [lucene-solr] murblanc merged pull request #1630: SOLR-14462: cache more than one autoscaling session
murblanc merged pull request #1630: URL: https://github.com/apache/lucene-solr/pull/1630 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14582) Expose IWC.setMaxCommitMergeWaitMillis as an expert feature in Solr's index config
[ https://issues.apache.org/jira/browse/SOLR-14582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Tomas Eduardo Fernandez Lobbe updated SOLR-14582: - Summary: Expose IWC.setMaxCommitMergeWaitMillis as an expert feature in Solr's index config (was: Expose IWC.setMaxCommitMergeWaitSeconds as an expert feature in Solr's index config) > Expose IWC.setMaxCommitMergeWaitMillis as an expert feature in Solr's index > config > -- > > Key: SOLR-14582 > URL: https://issues.apache.org/jira/browse/SOLR-14582 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Tomas Eduardo Fernandez Lobbe >Priority: Trivial > > LUCENE-8962 added the ability to merge segments synchronously on commit. This > isn't done by default and the default {{MergePolicy}} won't do it, but custom > merge policies can take advantage of this. Solr allows plugging in custom > merge policies, so if someone wants to make use of this feature they could, > however, they need to set {{IndexWriterConfig.maxCommitMergeWaitSeconds}} to > something greater than 0. > Since this is an expert feature, I plan to document it only in javadoc and > not the ref guide. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14582) Expose IWC.setMaxCommitMergeWaitSeconds as an expert feature in Solr's index config
[ https://issues.apache.org/jira/browse/SOLR-14582?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17148037#comment-17148037 ] Tomas Eduardo Fernandez Lobbe commented on SOLR-14582: -- You mean, make the code check for the MP? maybe this would bite us if the MP is updated to do support this? Do you think a warning in the docs is enough? > Expose IWC.setMaxCommitMergeWaitSeconds as an expert feature in Solr's index > config > --- > > Key: SOLR-14582 > URL: https://issues.apache.org/jira/browse/SOLR-14582 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Tomas Eduardo Fernandez Lobbe >Priority: Trivial > > LUCENE-8962 added the ability to merge segments synchronously on commit. This > isn't done by default and the default {{MergePolicy}} won't do it, but custom > merge policies can take advantage of this. Solr allows plugging in custom > merge policies, so if someone wants to make use of this feature they could, > however, they need to set {{IndexWriterConfig.maxCommitMergeWaitSeconds}} to > something greater than 0. > Since this is an expert feature, I plan to document it only in javadoc and > not the ref guide. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447157028 ## File path: solr/contrib/prometheus-exporter/src/test-files/solr/collection1/conf/solrconfig.xml ## @@ -83,6 +83,10 @@ 200 +false + +100 Review comment: Agreed. Removed the unnecessary config changes and added a new section, thanks. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] tflobbe commented on pull request #1616: SOLR-14590: Add RankQParserPlugin
tflobbe commented on pull request #1616: URL: https://github.com/apache/lucene-solr/pull/1616#issuecomment-651255112 Superseded by https://github.com/apache/lucene-solr/pull/1620 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] tflobbe closed pull request #1616: SOLR-14590: Add RankQParserPlugin
tflobbe closed pull request #1616: URL: https://github.com/apache/lucene-solr/pull/1616 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447123680 ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java ## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.core.SolrCore; + +/** + * Manages all registered circuit breaker instances. Responsible for a holistic view + * of whether a circuit breaker has tripped or not. + * + * There are two typical ways of using this class's instance: + * 1. Check if any circuit breaker has triggered -- and know which circuit breaker has triggered. + * 2. Get an instance of a specific circuit breaker and perform checks. + * Review comment: Agreed. I added the single configuration since there is only one code path. I have a SIP upcoming to add an API for controlling these configs, will add this to the same. Thanks for bringing the same up. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14431) Using "Segments Info" UI screen can cause future stalls in replication/recovery/core-reload (/admin/segments)
[ https://issues.apache.org/jira/browse/SOLR-14431?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Chris M. Hostetter updated SOLR-14431: -- Component/s: SolrCloud replication (java) Description: A bug in the {{SegmentsInfoRequestHandler}} (aka: {{/admin/segments}} - which is used under the covers when viewing the "Segments Info" panel of a core in the Admin UI) causes it to increment the internal "ref-count" of the IndexWriter by default, with out ever decrementing that ref-count. This can cause delayed problems in any situation where the IndexWriter needs updated/replaced/locked: * Core {{RELOAD}} operations * Master/Slave replication (via IndexFetcher) * {{PULL}} Replica updates (via IndexFetcher) * {{TLOG}} Replica updates (via IndexFetcher) * {{NRT}} Recovery from Leader (via IndexFetcher) ...these manifest as operations that "stall" due to the threads attempting to execute them blocking forever waiting for a {{ReentrantReadWriteLock}} in {{DefaultSolrCoreState}} that will never be released. A config only workaround exists for this problem, by explicitly declaring the {{/admin/segments}} handler in {{solrconfig.xml}} with an {{invariants}} param that requests additional info, forcing it down a code path where it _uses_ the IndexWriter, *and decrements the ref-count, releasing the lock*. {code:java|title=solrconfig.xml workaround} true {code} Example stack traces of what this can look like {noformat:title=IndexFetcher example stalled thread"} "thread",{ "id":65, "name":"indexFetcher-19-thread-1", "state":"TIMED_WAITING", "lock":"java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync@22a18ed", "cpuTime":"1454860.0285ms", "userTime":"622230.ms", "stackTrace":["java.base@11.0.7/jdk.internal.misc.Unsafe.park(Native Method)", "java.base@11.0.7/java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:234)", "java.base@11.0.7/java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireNanos(AbstractQueuedSynchronizer.java:980)", "java.base@11.0.7/java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireNanos(AbstractQueuedSynchronizer.java:1288)", "java.base@11.0.7/java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.tryLock(ReentrantReadWriteLock.java:1131)", "org.apache.solr.update.DefaultSolrCoreState.lock(DefaultSolrCoreState.java:179)", "org.apache.solr.update.DefaultSolrCoreState.closeIndexWriter(DefaultSolrCoreState.java:240)", "org.apache.solr.handler.IndexFetcher.fetchLatestIndex(IndexFetcher.java:569)", "org.apache.solr.handler.IndexFetcher.fetchLatestIndex(IndexFetcher.java:351)", "org.apache.solr.handler.ReplicationHandler.doFetch(ReplicationHandler.java:424)", "org.apache.solr.handler.ReplicationHandler.lambda$setupPolling$13(ReplicationHandler.java:1210)", "org.apache.solr.handler.ReplicationHandler$$Lambda$513/0x0008006bf440.run(Unknown Source)", "java.base@11.0.7/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)", "java.base@11.0.7/java.util.concurrent.FutureTask.runAndReset(FutureTask.java:305)", "java.base@11.0.7/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:305)", "java.base@11.0.7/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)", "java.base@11.0.7/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)", "java.base@11.0.7/java.lang.Thread.run(Thread.java:834)"]}, {noformat} {noformat:title=Core RELOAD example stalled thread} "thread",{ "id":16, "name":"qtp1558079303-16", "state":"WAITING", "lock":"java.lang.Object@70c81fe1", "cpuTime":"73.4453ms", "userTime":"60.ms", "stackTrace":["java.base@11.0.4/java.lang.Object.wait(Native Method)", "java.base@11.0.4/java.lang.Object.wait(Object.java:328)", "org.apache.solr.core.SolrCores.waitAddPendingCoreOps(SolrCores.java:394)", "org.apache.solr.core.CoreContainer.reload(CoreContainer.java:1545)", "org.apache.solr.handler.admin.CoreAdminOperation.lambda$static$2(CoreAdminOperation.java:132)", "org.apache.solr.handler.admin.CoreAdminOperation$$Lambda$266/0x000100431040.execute(Unknown Source)", "org.apache.solr.handler.admin.CoreAdminOperation.execute(CoreAdminOperation.java:362)", "org.apache.solr.handler.admin.CoreAdminHandler$CallInfo.call(CoreAdminHandler.java:397)", "org.apache.solr.handler.admin.CoreAdminHandler.handleRequestBody(CoreAdminHandler.java:181)",
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447116490 ## File path: solr/core/src/test-files/solr/collection1/conf/solrconfig-implicitproperties.xml ## @@ -42,6 +42,8 @@ true 20 20 +false Review comment: Fair point, removed, thanks. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447094476 ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerType.java ## @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +/** + * Types of circuit breakers + */ Review comment: I prefer cleaner abstractions -- helps avoid compiler errors ;) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447093442 ## File path: solr/core/src/java/org/apache/solr/core/SolrConfig.java ## @@ -804,6 +813,14 @@ private void initLibs(SolrResourceLoader loader, boolean isConfigsetTrusted) { loader.reloadLuceneSPI(); } + private void validateMemoryBreakerThreshold() { +if (useCircuitBreakers) { + if (memoryCircuitBreakerThreshold > 100 || memoryCircuitBreakerThreshold < 0) { +throw new IllegalArgumentException("memoryCircuitBreakerThreshold is not a valid percentage"); + } Review comment: Same as above. If we wish to enforce a smaller domain of values, we should discuss the range and I will add extra checks. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Resolved] (LUCENE-9417) Tessellator might fail when several holes share are connected to the same vertex
[ https://issues.apache.org/jira/browse/LUCENE-9417?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ignacio Vera resolved LUCENE-9417. -- Fix Version/s: 8.6 Assignee: Ignacio Vera Resolution: Fixed > Tessellator might fail when several holes share are connected to the same > vertex > > > Key: LUCENE-9417 > URL: https://issues.apache.org/jira/browse/LUCENE-9417 > Project: Lucene - Core > Issue Type: Bug >Reporter: Ignacio Vera >Assignee: Ignacio Vera >Priority: Major > Fix For: 8.6 > > Attachments: image-2020-06-25-11-03-02-075.png, > image-2020-06-25-11-05-50-406.png > > Time Spent: 20m > Remaining Estimate: 0h > > Related to LUCENE-9400, tessellator might fail when several polygons are > connected to the same polygon vertex, and this vertex is connected from more > than one segment. For example the following polygon: > > !image-2020-06-25-11-03-02-075.png|width=377,height=232! > > Holes get connected the following way: > > !image-2020-06-25-11-05-50-406.png|width=357,height=236! > > Two holes are connected to the same point, and this point is shared across > different segments. We need to make sure we connect in such a way that we are > not crossing each other. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (LUCENE-9417) Tessellator might fail when several holes share are connected to the same vertex
[ https://issues.apache.org/jira/browse/LUCENE-9417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147906#comment-17147906 ] ASF subversion and git services commented on LUCENE-9417: - Commit 3bcd86646ffd1ba3f6681440e55da37dcee3ea2e in lucene-solr's branch refs/heads/branch_8x from Ignacio Vera [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=3bcd866 ] LUCENE-9417: Tessellator might fail when several holes share are connected to the same vertex (#1614) > Tessellator might fail when several holes share are connected to the same > vertex > > > Key: LUCENE-9417 > URL: https://issues.apache.org/jira/browse/LUCENE-9417 > Project: Lucene - Core > Issue Type: Bug >Reporter: Ignacio Vera >Priority: Major > Attachments: image-2020-06-25-11-03-02-075.png, > image-2020-06-25-11-05-50-406.png > > Time Spent: 20m > Remaining Estimate: 0h > > Related to LUCENE-9400, tessellator might fail when several polygons are > connected to the same polygon vertex, and this vertex is connected from more > than one segment. For example the following polygon: > > !image-2020-06-25-11-03-02-075.png|width=377,height=232! > > Holes get connected the following way: > > !image-2020-06-25-11-05-50-406.png|width=357,height=236! > > Two holes are connected to the same point, and this point is shared across > different segments. We need to make sure we connect in such a way that we are > not crossing each other. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (LUCENE-9417) Tessellator might fail when several holes share are connected to the same vertex
[ https://issues.apache.org/jira/browse/LUCENE-9417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147900#comment-17147900 ] ASF subversion and git services commented on LUCENE-9417: - Commit 0cef29f1381e21337863471346294bb76acb3a6c in lucene-solr's branch refs/heads/master from Ignacio Vera [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=0cef29f ] LUCENE-9417: Tessellator might fail when several holes share are connected to the same vertex (#1614) > Tessellator might fail when several holes share are connected to the same > vertex > > > Key: LUCENE-9417 > URL: https://issues.apache.org/jira/browse/LUCENE-9417 > Project: Lucene - Core > Issue Type: Bug >Reporter: Ignacio Vera >Priority: Major > Attachments: image-2020-06-25-11-03-02-075.png, > image-2020-06-25-11-05-50-406.png > > Time Spent: 10m > Remaining Estimate: 0h > > Related to LUCENE-9400, tessellator might fail when several polygons are > connected to the same polygon vertex, and this vertex is connected from more > than one segment. For example the following polygon: > > !image-2020-06-25-11-03-02-075.png|width=377,height=232! > > Holes get connected the following way: > > !image-2020-06-25-11-05-50-406.png|width=357,height=236! > > Two holes are connected to the same point, and this point is shared across > different segments. We need to make sure we connect in such a way that we are > not crossing each other. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] iverase merged pull request #1614: LUCENE-9417: Tessellator might fail when several holes share are connected to the same vertex
iverase merged pull request #1614: URL: https://github.com/apache/lucene-solr/pull/1614 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147892#comment-17147892 ] Erick Erickson commented on SOLR-14588: --- Next time I do something like this I'll just @AwaitsFix rather than rush in and add confusion ;) > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 9.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14578) Confusing Name in the docs and Test of Auto Add Trigger
[ https://issues.apache.org/jira/browse/SOLR-14578?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147883#comment-17147883 ] Cassandra Targett commented on SOLR-14578: -- Ha, well that would be a really good reason! > Confusing Name in the docs and Test of Auto Add Trigger > --- > > Key: SOLR-14578 > URL: https://issues.apache.org/jira/browse/SOLR-14578 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) > Components: AutoScaling, documentation >Affects Versions: master (9.0) >Reporter: Marcus Eagan >Assignee: Mike Drob >Priority: Trivial > Fix For: master (9.0) > > Time Spent: 20m > Remaining Estimate: 0h > > In the autoscaling docs, the name of the names of two actions are the same > and it is confusing to users. > See: > {code:java} > { > "set-trigger": { > "name": ".auto_add_replicas", > "event": "nodeLost, > "waitFor": "5s", > "enabled": true, > "actions": [ > { > "name": "auto_add_replicas_plan", > "class": "solr.AutoAddReplicasPlanAction" > }, > { >"name": "auto_add_replicas_plan", // works?, but should be execute plan >"class": "solr.ExecutePlanAction" > } > ] > } > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14578) Confusing Name in the docs and Test of Auto Add Trigger
[ https://issues.apache.org/jira/browse/SOLR-14578?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147881#comment-17147881 ] Mike Drob commented on SOLR-14578: -- The mis-named sections in question do not exist in 8x. > Confusing Name in the docs and Test of Auto Add Trigger > --- > > Key: SOLR-14578 > URL: https://issues.apache.org/jira/browse/SOLR-14578 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) > Components: AutoScaling, documentation >Affects Versions: master (9.0) >Reporter: Marcus Eagan >Assignee: Mike Drob >Priority: Trivial > Fix For: master (9.0) > > Time Spent: 20m > Remaining Estimate: 0h > > In the autoscaling docs, the name of the names of two actions are the same > and it is confusing to users. > See: > {code:java} > { > "set-trigger": { > "name": ".auto_add_replicas", > "event": "nodeLost, > "waitFor": "5s", > "enabled": true, > "actions": [ > { > "name": "auto_add_replicas_plan", > "class": "solr.AutoAddReplicasPlanAction" > }, > { >"name": "auto_add_replicas_plan", // works?, but should be execute plan >"class": "solr.ExecutePlanAction" > } > ] > } > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-13749) Implement support for joining across collections with multiple shards ( XCJF )
[ https://issues.apache.org/jira/browse/SOLR-13749?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147878#comment-17147878 ] Dan Fox commented on SOLR-13749: For joining on numeric fields, I was trying to follow the same approach that JoinQuery uses, which does something similar to get its DocSet ([https://github.com/apache/lucene-solr/blob/54e6528304027ab15bbac2b62746b34eed7b4f40/solr/core/src/java/org/apache/solr/search/JoinQuery.java#L250]). It looks like JoinQuery intentionally doesn't cache the result because the query may be very large. I don't think we had a particular reason for doing it this way - if there's a better way for this to work that sounds good to me. (We originally built XCJF for joining on string fields, but then we thought it should support numeric fields as well.) Our reason for not using a TermInSetQuery for the Terms case is that we were worried about queries with very large numbers of join keys - we didn't want to have to build up the full list of join key terms in memory. > Implement support for joining across collections with multiple shards ( XCJF ) > -- > > Key: SOLR-13749 > URL: https://issues.apache.org/jira/browse/SOLR-13749 > Project: Solr > Issue Type: New Feature >Reporter: Kevin Watters >Assignee: Gus Heck >Priority: Blocker > Fix For: 8.6 > > Attachments: 2020-03 Smiley with ASF hat.jpeg > > Time Spent: 2h 40m > Remaining Estimate: 0h > > This ticket includes 2 query parsers. > The first one is the "Cross collection join filter" (XCJF) parser. This is > the "Cross-collection join filter" query parser. It can do a call out to a > remote collection to get a set of join keys to be used as a filter against > the local collection. > The second one is the Hash Range query parser that you can specify a field > name and a hash range, the result is that only the documents that would have > hashed to that range will be returned. > This query parser will do an intersection based on join keys between 2 > collections. > The local collection is the collection that you are searching against. > The remote collection is the collection that contains the join keys that you > want to use as a filter. > Each shard participating in the distributed request will execute a query > against the remote collection. If the local collection is setup with the > compositeId router to be routed on the join key field, a hash range query is > applied to the remote collection query to only match the documents that > contain a potential match for the documents that are in the local shard/core. > > > Here's some vocab to help with the descriptions of the various parameters. > ||Term||Description|| > |Local Collection|This is the main collection that is being queried.| > |Remote Collection|This is the collection that the XCJFQuery will query to > resolve the join keys.| > |XCJFQuery|The lucene query that executes a search to get back a set of join > keys from a remote collection| > |HashRangeQuery|The lucene query that matches only the documents whose hash > code on a field falls within a specified range.| > > > ||Param ||Required ||Description|| > |collection|Required|The name of the external Solr collection to be queried > to retrieve the set of join key values ( required )| > |zkHost|Optional|The connection string to be used to connect to Zookeeper. > zkHost and solrUrl are both optional parameters, and at most one of them > should be specified. > If neither of zkHost or solrUrl are specified, the local Zookeeper cluster > will be used. ( optional )| > |solrUrl|Optional|The URL of the external Solr node to be queried ( optional > )| > |from|Required|The join key field name in the external collection ( required > )| > |to|Required|The join key field name in the local collection| > |v|See Note|The query to be executed against the external Solr collection to > retrieve the set of join key values. > Note: The original query can be passed at the end of the string or as the > "v" parameter. > It's recommended to use query parameter substitution with the "v" parameter > to ensure no issues arise with the default query parsers.| > |routed| |true / false. If true, the XCJF query will use each shard's hash > range to determine the set of join keys to retrieve for that shard. > This parameter improves the performance of the cross-collection join, but > it depends on the local collection being routed by the toField. If this > parameter is not specified, > the XCJF query will try to determine the correct value automatically.| > |ttl| |The length of time that an XCJF query in the cache will be considered > valid, in seconds. Defaults to 3600 (one hour). > The XCJF query will not be aware of
[GitHub] [lucene-solr] ctargett commented on a change in pull request #1628: SOLR-14539: Ref Guide changes
ctargett commented on a change in pull request #1628: URL: https://github.com/apache/lucene-solr/pull/1628#discussion_r447049307 ## File path: solr/solr-ref-guide/src/json-query-dsl.adoc ## @@ -303,6 +303,30 @@ include::{example-source-dir}JsonRequestApiTest.java[tag=solrj-ipod-query-bool-c -- +Example of referencing <>, <> and <>: Review comment: The syntax for a link to a section that's on the same page is to simply use the section title, so: - `<>` should become `<>` - `<>` should become `<>` If you don't want the first one to be printed as "Additional Queries" (capitalized), you can add the link text as in the 2nd example This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r447048456 ## File path: solr/core/src/java/org/apache/solr/core/SolrConfig.java ## @@ -804,6 +813,14 @@ private void initLibs(SolrResourceLoader loader, boolean isConfigsetTrusted) { loader.reloadLuceneSPI(); } + private void validateMemoryBreakerThreshold() { +if (useCircuitBreakers) { + if (memoryCircuitBreakerThreshold > 100 || memoryCircuitBreakerThreshold < 0) { Review comment: This is more of a sanity test to ensure that the incoming values lie in the range of percentage. If we wish to restrain the actual domain of values that memoryCircuitBreakerThreshold can take, I can add another check and add the same in the documentation. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] atris commented on pull request #1626: SOLR-14588: Implement Circuit Breakers
atris commented on pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#issuecomment-651182733 > Atri: DebugComponentTest is failing a lot, although not reproducibly. See: https://jenkins.thetaphi.de/job/Lucene-Solr-master-Linux/27171/ I took a brief look and it looks like, under some circumstances, the circuitbreaker info is getting in the debug output throwing the test off as it doesn’t expect the circuitbreaker data. Jan pushed a fix to revert the last bits of tests from the earlier PR -- DebugComponentTest should be fine now (please let me know if otherwise). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147866#comment-17147866 ] Atri Sharma commented on SOLR-14588: [~janhoy] Not at all. I am grateful for the effort that you took to fix it -- in the end, its all community over code! :) > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 9h 10m > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14578) Confusing Name in the docs and Test of Auto Add Trigger
[ https://issues.apache.org/jira/browse/SOLR-14578?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147860#comment-17147860 ] Cassandra Targett commented on SOLR-14578: -- Is there any reason why this couldn't be in branch_8x instead of master only? > Confusing Name in the docs and Test of Auto Add Trigger > --- > > Key: SOLR-14578 > URL: https://issues.apache.org/jira/browse/SOLR-14578 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) > Components: AutoScaling, documentation >Affects Versions: master (9.0) >Reporter: Marcus Eagan >Assignee: Mike Drob >Priority: Trivial > Fix For: master (9.0) > > Time Spent: 20m > Remaining Estimate: 0h > > In the autoscaling docs, the name of the names of two actions are the same > and it is confusing to users. > See: > {code:java} > { > "set-trigger": { > "name": ".auto_add_replicas", > "event": "nodeLost, > "waitFor": "5s", > "enabled": true, > "actions": [ > { > "name": "auto_add_replicas_plan", > "class": "solr.AutoAddReplicasPlanAction" > }, > { >"name": "auto_add_replicas_plan", // works?, but should be execute plan >"class": "solr.ExecutePlanAction" > } > ] > } > } > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] mikemccand commented on pull request #1623: LUCENE-8962: Merge segments on getReader
mikemccand commented on pull request #1623: URL: https://github.com/apache/lucene-solr/pull/1623#issuecomment-651167468 A different, reproducing, test failure, likely from the same cause: ``` > java.lang.AssertionError > at __randomizedtesting.SeedInfo.seed([C64EB0BA0CE0061F:FB961E96340E586F]:0) > at org.apache.lucene.index.IndexWriter.maybeCloseOnTragicEvent(IndexWriter.java:5026) > at org.apache.lucene.index.IndexWriter.tragicEvent(IndexWriter.java:5019) > at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4214) > at org.apache.lucene.index.IndexWriter$IndexWriterMergeSource.merge(IndexWriter.java:5735) > at org.apache.lucene.index.SerialMergeScheduler.merge(SerialMergeScheduler.java:40) > at org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:581) > at org.apache.lucene.index.DirectoryReader.open(DirectoryReader.java:103) > at org.apache.lucene.index.TestIndexWriterExceptions2.testBasics(TestIndexWriterExceptions2.java:205) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1754) > at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:942) > at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:978) > at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:992) > at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) > at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) > at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) > at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) > at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) > at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:370) > at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:819) > at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:470) > at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:951) > at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:836) > at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:887) > at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:898) > at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) > at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) > at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) > at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) > at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) > at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) > at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) > at org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:54) > at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at
[GitHub] [lucene-solr] mikemccand commented on pull request #1623: LUCENE-8962: Merge segments on getReader
mikemccand commented on pull request #1623: URL: https://github.com/apache/lucene-solr/pull/1623#issuecomment-651166475 Here's a fun tragic failure test that repros: ``` [junit4] 2> NOTE: reproduce with: ant test -Dtestcase=TestIndexWriterOnVMError -Dtests.method=testUnknownError -Dtests.seed=BBEAF0EBC40AB8F7 -Dtests.slow=true -Dtests.badapples=true \ -Dtests.locale=brx -Dtests.timezone=Indian/Chagos -Dtests.asserts=true -Dtests.file.encoding=UTF-8 [junit4] FAILURE 0.30s | TestIndexWriterOnVMError.testUnknownError <<< [junit4]> Throwable #1: java.lang.AssertionError [junit4]>at __randomizedtesting.SeedInfo.seed([BBEAF0EBC40AB8F7:50BC2F6BC68D2EC7]:0) [junit4]>at org.apache.lucene.index.IndexWriter.maybeCloseOnTragicEvent(IndexWriter.java:5026) [junit4]>at org.apache.lucene.index.IndexWriter.tragicEvent(IndexWriter.java:5019) [junit4]>at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4214) [junit4]>at org.apache.lucene.index.IndexWriter$IndexWriterMergeSource.merge(IndexWriter.java:5735) [junit4]>at org.apache.lucene.index.SerialMergeScheduler.merge(SerialMergeScheduler.java:40) [junit4]>at org.apache.lucene.index.IndexWriter.getReader(IndexWriter.java:581) [junit4]>at org.apache.lucene.index.DirectoryReader.open(DirectoryReader.java:103) [junit4]>at org.apache.lucene.index.TestIndexWriterOnVMError.doTest(TestIndexWriterOnVMError.java:175) [junit4]>at org.apache.lucene.index.TestIndexWriterOnVMError.testUnknownError(TestIndexWriterOnVMError.java:251) [junit4]>at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) [junit4]>at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) [junit4]>at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) [junit4]>at java.base/java.lang.reflect.Method.invoke(Method.java:566) [junit4]>at java.base/java.lang.Thread.run(Thread.java:834) [junit4] 2> NOTE: test params are: codec=Asserting(Lucene86): {text_payloads=PostingsFormat(name=Direct), text_vectors=TestBloomFilteredLucenePostings(BloomFilteringPostingsFormat(Luc\ ene84)), text1=TestBloomFilteredLucenePostings(BloomFilteringPostingsFormat(Lucene84)), id=PostingsFormat(name=Direct)}, docValues:{dv3=DocValuesFormat(name=Asserting), dv2=DocValuesFormat\ (name=Lucene80), dv5=DocValuesFormat(name=Asserting), dv=DocValuesFormat(name=Lucene80), dv4=DocValuesFormat(name=Lucene80)}, maxPointsInLeafNode=1228, maxMBSortInHeap=6.874571632539512, s\ im=Asserting(RandomSimilarity(queryNorm=true): {text_payloads=IB SPL-L1, text_vectors=BM25(k1=1.2,b=0.75), text1=DFI(Saturated)}), locale=brx, timezone=Indian/Chagos [junit4] 2> NOTE: Linux 5.5.6-arch1-1 amd64/Oracle Corporation 11.0.6 (64-bit)/cpus=128,threads=1,free=522930136,total=536870912 [junit4] 2> NOTE: All tests run in this JVM: [TestIndexWriterOnVMError] ``` This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] mikemccand commented on pull request #1623: LUCENE-8962: Merge segments on getReader
mikemccand commented on pull request #1623: URL: https://github.com/apache/lucene-solr/pull/1623#issuecomment-651156942 > So this would merge small commits on refresh? Small segments, yes. > I wonder if it would be more obvious to users if we call the MergeTrigger REFRESH? +1, refresh is more recognized in the outside world :) I have been beasting this and uncovering small test failures, in tests that are confused that they do not have the expected number of segments. I'll push some fixes for those ... This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (LUCENE-9416) Fix CheckIndex to print norms as unsigned integers
[ https://issues.apache.org/jira/browse/LUCENE-9416?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147816#comment-17147816 ] Michael McCandless commented on LUCENE-9416: Yeah this is indeed tricky. By default, norms are mapped to a single unsigned byte (0 .. 255), I think. Yet, the API, and a custom {{Codec,}} could indeed choose to use higher precision and use the full {{long}} space (well, a java signed long, so maximum positive value of {{Long.MAX_VALUE}}) allowed by the API. >From that thread, it looks like {{-1}} is supposed to be the largest unsigned >value. So maybe we should fix {{CheckIndex}}'s debug output to interpret the >number as an unsigned Long? Maybe using {{Long.toUnsignedString}}? > Fix CheckIndex to print norms as unsigned integers > -- > > Key: LUCENE-9416 > URL: https://issues.apache.org/jira/browse/LUCENE-9416 > Project: Lucene - Core > Issue Type: Bug > Components: core/index >Reporter: Mohammad Sadiq >Priority: Minor > > In the [discussion on "CheckIndex complaining about -1 for norms value" in > the java-user list|http://markmail.org/message/gcwdhasblsyovwc2], it was > identified that we should "fix CheckIndex to print norms as unsigned > integers". > I'd like to take a stab at this. > I'm trying to understand the problem and from what I gather, while norms are > `byte`s, the API exposes them as `long` values. While printing the error > message, we want it to print a zero instead of -1? -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] ctargett commented on pull request #1625: Highlighting the actual state observed in LUCENE-9328
ctargett commented on pull request #1625: URL: https://github.com/apache/lucene-solr/pull/1625#issuecomment-651138896 I commented with a specific recommendation on the change, but in general wonder if it's worth it? LUCENE-9328 is marked as an Improvement, which would imply to me that we never should have expected grouping to be faster with docValues? If that's the case, then the docs have been incorrect and this change makes sense. However, if grouping was correctly documented as a specific thing that should be faster but isn't today because of a regression, then the Jira should be a Bug and this change makes less sense because we don't document every single Bug in Solr or Lucene in the Ref Guide - we'd have hundreds of tiny edits as things break and get fixed and we'd miss *a lot* of them. There's obviously differences of degrees here - if SSL totally broke for an entire release, we'd probably want to document that, but grouping being slower for a release or two (if that's the case, I didn't study the Jiras so maybe it's been longer), that's a less pressing edit IMO. A related point is that if this has been wrong all along but is now going to be supported in the upcoming release (not sure the timing of LUCENE-9328), making this change now means you'll have to add the word back in another commit before release and it wouldn't be worth removing it now. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] ctargett commented on a change in pull request #1625: Highlighting the actual state observed in LUCENE-9328
ctargett commented on a change in pull request #1625: URL: https://github.com/apache/lucene-solr/pull/1625#discussion_r446983370 ## File path: solr/solr-ref-guide/src/docvalues.adoc ## @@ -24,7 +24,7 @@ The standard way that Solr builds the index is with an _inverted index_. This st For other features that we now commonly associate with search, such as sorting, faceting, and highlighting, this approach is not very efficient. The faceting engine, for example, must look up each term that appears in each document that will make up the result set and pull the document IDs in order to build the facet list. In Solr, this is maintained in memory, and can be slow to load (depending on the number of documents, terms, etc.). -In Lucene 4.0, a new approach was introduced. DocValue fields are now column-oriented fields with a document-to-value mapping built at index time. This approach promises to relieve some of the memory requirements of the fieldCache and make lookups for faceting, sorting, and grouping much faster. +In Lucene 4.0, a new approach was introduced. DocValue fields are now column-oriented fields with a document-to-value mapping built at index time. This approach promises to relieve some of the memory requirements of the fieldCache and make lookups for faceting, sorting much faster. Review comment: If you're going to take a word out of the list and leave only 2 items, you should remove the comma and add "and" instead: `...make lookups for faceting and sorting much faster`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147788#comment-17147788 ] Ilan Ginzburg commented on SOLR-14462: -- PR for backport to branch_8x for inclusion in 8.6 at [https://github.com/apache/lucene-solr/pull/1630] Don't know if I'll have more luck now for a review than with the original version in master, otherwise I'll merge that later today... > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 3h > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for
[GitHub] [lucene-solr] murblanc opened a new pull request #1630: SOLR-14462: cache more than one autoscaling session
murblanc opened a new pull request #1630: URL: https://github.com/apache/lucene-solr/pull/1630 Cherry picked from 25428013fb0ed8f8fdbebdef3f1d65dea77129c2 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] msokolov commented on pull request #1623: LUCENE-8962: Merge segments on getReader
msokolov commented on pull request #1623: URL: https://github.com/apache/lucene-solr/pull/1623#issuecomment-651116085 So this would merge small commits on refresh? That's cool. I wonder if it would be more obvious to users if we call the MergeTrigger REFRESH? I see that the trigger method is IndexWriter.getReader, but it seems like ultimately the higher level event that is more familiar is refresh. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14557) eDisMax parser unable to parse subparser followed by parenthesis
[ https://issues.apache.org/jira/browse/SOLR-14557?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147749#comment-17147749 ] Lucene/Solr QA commented on SOLR-14557: --- | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} master Compile Tests {color} || | {color:green}+1{color} | {color:green} compile {color} | {color:green} 1m 11s{color} | {color:green} master passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:green}+1{color} | {color:green} compile {color} | {color:green} 1m 4s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 1m 4s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} Release audit (RAT) {color} | {color:green} 1m 4s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} Check forbidden APIs {color} | {color:green} 1m 4s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} Validate source patterns {color} | {color:green} 1m 4s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:red}-1{color} | {color:red} unit {color} | {color:red} 45m 10s{color} | {color:red} core in the patch failed. {color} | | {color:black}{color} | {color:black} {color} | {color:black} 49m 29s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | solr.search.TestExtendedDismaxParser | \\ \\ || Subsystem || Report/Notes || | JIRA Issue | SOLR-14557 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/13006646/SOLR-14557.patch | | Optional Tests | compile javac unit ratsources checkforbiddenapis validatesourcepatterns | | uname | Linux lucene1-us-west 4.15.0-54-generic #58-Ubuntu SMP Mon Jun 24 10:55:24 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | ant | | Personality | /home/jenkins/jenkins-slave/workspace/PreCommit-SOLR-Build/sourcedir/dev-tools/test-patch/lucene-solr-yetus-personality.sh | | git revision | master / 49a3f0a11d4 | | ant | version: Apache Ant(TM) version 1.10.5 compiled on March 28 2019 | | Default Java | LTS | | unit | https://builds.apache.org/job/PreCommit-SOLR-Build/769/artifact/out/patch-unit-solr_core.txt | | Test Results | https://builds.apache.org/job/PreCommit-SOLR-Build/769/testReport/ | | modules | C: solr/core U: solr/core | | Console output | https://builds.apache.org/job/PreCommit-SOLR-Build/769/console | | Powered by | Apache Yetus 0.7.0 http://yetus.apache.org | This message was automatically generated. > eDisMax parser unable to parse subparser followed by parenthesis > > > Key: SOLR-14557 > URL: https://issues.apache.org/jira/browse/SOLR-14557 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Reporter: Mikhail Khludnev >Priority: Major > Labels: painful > Attachments: SOLR-14557.patch > > > h2. Solr 4.5 > {{/select?defType=edismax=\{!lucene}(foo)=true}} > > goes like > {code} > \{!lucene}(foo) > content:foo > LuceneQParser > {code} > fine > h2. Solr 8.2 > with luceneMatchVersion=4.5 following SOLR-11501 I know it's a grey zone but > it's a question of migrating existing queries. > {{/select?defType=edismax=\{!lucene}(foo)=true}} > goes like > {code} > "querystring":"\{!lucene}(foo)", > "parsedquery":"+DisjunctionMaxQuery(((Project.Address:lucene > Project.Address:foo) | (Project.OwnerType:lucene Project.OwnerType:foo) > "QParser":"ExtendedDismaxQParser", > {code} > blah... > but removing braces in 8.2 works perfectly fine > {code} > "querystring":"\{!lucene}foo", > "parsedquery":"+content:foo", > "parsedquery_toString":"+content:foo", > "QParser":"ExtendedDismaxQParser", > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] sigram commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
sigram commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r446935828 ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java ## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.core.SolrCore; + +/** + * Manages all registered circuit breaker instances. Responsible for a holistic view + * of whether a circuit breaker has tripped or not. + * + * There are two typical ways of using this class's instance: + * 1. Check if any circuit breaker has triggered -- and know which circuit breaker has triggered. + * 2. Get an instance of a specific circuit breaker and perform checks. + * + * It is a good practice to register new circuit breakers here if you want them checked for every + * request. + * + * NOTE: The current way of registering new default circuit breakers is minimal and not a long term + * solution. There will be a follow up with a SIP for a schema API design. + */ +public class CircuitBreakerManager { + + private final Map circuitBreakerMap = new HashMap<>(); + + // Allows replacing of existing circuit breaker + public void registerCircuitBreaker(CircuitBreakerType circuitBreakerType, CircuitBreaker circuitBreaker) { +circuitBreakerMap.put(circuitBreakerType, circuitBreaker); + } + + public CircuitBreaker getCircuitBreaker(CircuitBreakerType circuitBreakerType) { +assert circuitBreakerType != null; + +return circuitBreakerMap.get(circuitBreakerType); + } + + /** + * Check if any circuit breaker has triggered. + * @return CircuitBreakers which have triggered, null otherwise + */ + public Map checkAllCircuitBreakersAndReturnTrippedBreakers() { +Map triggeredCircuitBreakers = null; Review comment: If this is called on every query request then it makes sense to avoid allocations until they are needed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] sigram commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
sigram commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r446908349 ## File path: solr/contrib/prometheus-exporter/src/test-files/solr/collection1/conf/solrconfig.xml ## @@ -83,6 +83,10 @@ 200 +false + +100 Review comment: This is probably not needed in configs that don't actually use it (when useCircuitBreakers=false)? Also, to make it more future-proof, we could put these in a section - the expectation is that we will have at least one more (CPU breaker) and potentially other ones too, so instead of adding these new breakers as new elements at this level we could add them as a section (as sub-elements). ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java ## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.core.SolrCore; + +/** + * Manages all registered circuit breaker instances. Responsible for a holistic view + * of whether a circuit breaker has tripped or not. + * + * There are two typical ways of using this class's instance: + * 1. Check if any circuit breaker has triggered -- and know which circuit breaker has triggered. + * 2. Get an instance of a specific circuit breaker and perform checks. + * Review comment: The following probably belongs to the SIP ... but the way I think about the common usage of this class for different code-paths is if breaker configs are labeled and correspond to different code-paths, eg.: * "query" -> one config * "index" -> another config * "foobar" -> yet another config, used perhaps in my custom component Current implementation limits us to use the same config for potentially very different code paths. ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java ## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.core.SolrCore; + +/** + * Manages all registered circuit breaker instances. Responsible for a holistic view + * of whether a circuit breaker has tripped or not. + * + * There are two typical ways of using this class's instance: + * 1. Check if any circuit breaker has triggered -- and know which circuit breaker has triggered. + * 2. Get an instance of a specific circuit breaker and perform checks. + * + * It is a good practice to register new circuit breakers here if you want them checked for every + * request. + * + * NOTE: The current way of registering new default circuit breakers is minimal and not a long term + * solution. There will be a follow up with a SIP for a schema API design. + */ +public class CircuitBreakerManager { + + private final Map circuitBreakerMap = new HashMap<>(); + + // Allows replacing of existing circuit breaker + public void registerCircuitBreaker(CircuitBreakerType circuitBreakerType, CircuitBreaker circuitBreaker) { +circuitBreakerMap.put(circuitBreakerType, circuitBreaker); + } + + public CircuitBreaker getCircuitBreaker(CircuitBreakerType circuitBreakerType) { +assert circuitBreakerType != null; + +return circuitBreakerMap.get(circuitBreakerType); + } + +
[jira] [Created] (LUCENE-9421) TestDrillSideways.testRandom reproducing failure
Michael McCandless created LUCENE-9421: -- Summary: TestDrillSideways.testRandom reproducing failure Key: LUCENE-9421 URL: https://issues.apache.org/jira/browse/LUCENE-9421 Project: Lucene - Core Issue Type: Improvement Reporter: Michael McCandless I have not looked yet, but it does repro: {noformat} [junit4] 2> NOTE: reproduce with: ant test -Dtestcase=TestDrillSideways -Dtests.method=testRandom -Dtests.seed=8336CD729D6DA8A8 -Dtests.slow=true -Dtests.badapples=true -Dtests.local\ e=bs-Latn -Dtests.timezone=Asia/Kuching -Dtests.asserts=true -Dtests.file.encoding=UTF-8 [junit4] FAILURE 0.32s | TestDrillSideways.testRandom <<< [junit4] > Throwable #1: java.lang.AssertionError [junit4] > at __randomizedtesting.SeedInfo.seed([8336CD729D6DA8A8:F17AE87D2C0D1EDB]:0) [junit4] > at org.apache.lucene.facet.TestDrillSideways.verifyEquals(TestDrillSideways.java:1124) [junit4] > at org.apache.lucene.facet.TestDrillSideways.testRandom(TestDrillSideways.java:832) [junit4] > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) [junit4] > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) [junit4] > at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) [junit4] > at java.base/java.lang.reflect.Method.invoke(Method.java:566) [junit4] > at java.base/java.lang.Thread.run(Thread.java:834) [junit4] 2> NOTE: test params are: codec=Asserting(Lucene86): {dim4=PostingsFormat(name=LuceneVarGapDocFreqInterval), $full_path$=PostingsFormat(name=Direct), $facets=PostingsFormat(n\ ame=LuceneVarGapDocFreqInterval), dim3=PostingsFormat(name=Direct), dim2=BlockTreeOrds(blocksize=128), $payloads$=PostingsFormat(name=Direct), dim1=PostingsFormat(name=LuceneFixedGap), dim\ 0=PostingsFormat(name=LuceneVarGapDocFreqInterval), id=PostingsFormat(name=Direct), content=PostingsFormat(name=LuceneFixedGap)}, docValues:{$facets=DocValuesFormat(name=Lucene80), id=DocV\ aluesFormat(name=Asserting)}, maxPointsInLeafNode=818, maxMBSortInHeap=6.4081279656609755, sim=Asserting(RandomSimilarity(queryNorm=true): {$facets=DFR I(F)B1, content=DFR I(ne)L2}), local\ e=bs-Latn, timezone=Asia/Kuching [junit4] 2> NOTE: Linux 5.5.6-arch1-1 amd64/Oracle Corporation 11.0.6 (64-bit)/cpus=128,threads=1,free=458496400,total=536870912 [junit4] 2> NOTE: All tests run in this JVM: [TestDrillSideways] [junit4] Completed [1/1 (1!)] in 0.58s, 1 test, 1 failure <<< FAILURES! {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] ErickErickson commented on a change in pull request #1626: SOLR-14588: Implement Circuit Breakers
ErickErickson commented on a change in pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#discussion_r446684636 ## File path: solr/CHANGES.txt ## @@ -11,6 +11,7 @@ Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this r New Features - * SOLR-14440: Introduce new Certificate Authentication Plugin to load Principal from certificate subject. (Mike Drob) +* SOLR-14588: Introduce Circuit Breaker Infrastructure and a JVM heap usage memory tracking circuit breaker implementation (Atri Sharma) Review comment: Minor nit, should be a new line between these. ## File path: solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml ## @@ -123,6 +123,17 @@ The purpose is to enable easy caching of user/application level data. The regenerator argument should be specified as an implementation of solr.search.CacheRegenerator if autowarming is desired. --> + + +false + + +100 Review comment: A little bit of guidance here would be helpful, maybe "70-80% heap usage is typical". Numbers are pulled out of thin air, just looking for a starting point for a newbie. ## File path: solr/core/src/java/org/apache/solr/core/SolrConfig.java ## @@ -804,6 +813,14 @@ private void initLibs(SolrResourceLoader loader, boolean isConfigsetTrusted) { loader.reloadLuceneSPI(); } + private void validateMemoryBreakerThreshold() { +if (useCircuitBreakers) { + if (memoryCircuitBreakerThreshold > 100 || memoryCircuitBreakerThreshold < 0) { Review comment: Does it really make sense to allow either of these values? 0 seems like it'd cause everything to break (haven't seen the rest of the code yet, maybe 0 is a special case). 100% seems too late. Do we have any good information about what reasonable upper and lower bounds are? And should we enforce them? Say 50%/90% as a straw-man proposal for discussion... ## File path: solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerType.java ## @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util.circuitbreaker; + +/** + * Types of circuit breakers + */ Review comment: This seems like an awfully small file, perhaps put this in the abstract CircuitBreaker class? No big deal either way. ## File path: solr/core/src/java/org/apache/solr/core/SolrConfig.java ## @@ -804,6 +813,14 @@ private void initLibs(SolrResourceLoader loader, boolean isConfigsetTrusted) { loader.reloadLuceneSPI(); } + private void validateMemoryBreakerThreshold() { +if (useCircuitBreakers) { + if (memoryCircuitBreakerThreshold > 100 || memoryCircuitBreakerThreshold < 0) { +throw new IllegalArgumentException("memoryCircuitBreakerThreshold is not a valid percentage"); + } Review comment: Add what valid percentages are here, especially if we decide to enforce as above.. This should also echo the number entered and what the valid limits are, something like: memoryCircuitBreakerThreshold was set to" + memoryCircuitBreakerThreshold + ". Valid percentages must be between X% and Y%" ## File path: solr/contrib/prometheus-exporter/src/test-files/solr/collection1/conf/solrconfig.xml ## @@ -83,6 +83,10 @@ 200 +false + +100 Review comment: Same as above. ## File path: solr/core/src/java/org/apache/solr/core/SolrConfig.java ## @@ -522,6 +527,10 @@ public SolrRequestParsers getRequestParsers() { public final int queryResultWindowSize; public final int queryResultMaxDocsCached; public final boolean enableLazyFieldLoading; + + // Circuit Breaker Configuration + public final boolean useCircuitBreakers; + public final int memoryCircuitBreakerThreshold; Review comment: I always like to put in default values where the Java defaults wouldn't work, just in case "somehow" this isn't set in future. memoryCircuitBreakerThreshold defaults to 0 in this case, is that OK? And maybe name this
[GitHub] [lucene-solr] ErickErickson commented on pull request #1626: SOLR-14588: Implement Circuit Breakers
ErickErickson commented on pull request #1626: URL: https://github.com/apache/lucene-solr/pull/1626#issuecomment-651073728 Atri: DebugComponentTest is failing a lot, although not reproducibly. See: https://jenkins.thetaphi.de/job/Lucene-Solr-master-Linux/27171/ I took a brief look and it looks like, under some circumstances, the circuitbreaker info is getting in the debug output throwing the test off as it doesn’t expect the circuitbreaker data. > On Jun 28, 2020, at 2:14 PM, Atri Sharma wrote: > > > @atris requested your review on: #1626 SOLR-14588: Implement Circuit Breakers. > > — > You are receiving this because your review was requested. > Reply to this email directly, view it on GitHub, or unsubscribe. > This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147675#comment-17147675 ] Jan Høydahl commented on SOLR-14588: Master is still failing. There are remnants of circuit breaker stuff left in DebugComponentTest, see https://github.com/apache/lucene-solr/blob/master/solr/core/src/test/org/apache/solr/handler/component/DebugComponentTest.java#L64 > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Resolved] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg resolved SOLR-14462. -- Resolution: Fixed > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0) >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial session was used 45 times (once initially then reused 44 >
[jira] [Updated] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg updated SOLR-14462: - Affects Version/s: 8.6 > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial session was used 45 times (once initially then
[jira] [Reopened] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg reopened SOLR-14462: -- Reopening to backport for 8.6 > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0) >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial session was used 45 times (once initially then
[jira] [Comment Edited] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147697#comment-17147697 ] Ilan Ginzburg edited comment on SOLR-14462 at 6/29/20, 11:25 AM: - Reopening to backport for 8.6. FYI [~bruno.roustant] was (Author: ilan): Reopening to backport for 8.6 > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0), 8.6 >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections
[jira] [Reopened] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jan Høydahl reopened SOLR-14561: Re-opening to add some more checks > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 3h 50m > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147694#comment-17147694 ] Jan Høydahl commented on SOLR-14561: Committed PR 1629 to master in 49a3f0a11d41f7124b893a08dc9e67594c32e2ee > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 4h > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14561) Validate parameters to CoreAdminAPI
[ https://issues.apache.org/jira/browse/SOLR-14561?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147699#comment-17147699 ] ASF subversion and git services commented on SOLR-14561: Commit 49a3f0a11d41f7124b893a08dc9e67594c32e2ee in lucene-solr's branch refs/heads/master from Jan Høydahl [ https://gitbox.apache.org/repos/asf?p=lucene-solr.git;h=49a3f0a ] SOLR-14561 Followup - validate params for more core operations (#1629) Add template to solr.in scripts Also testes Windows paths Added RefGuide documentation to some params > Validate parameters to CoreAdminAPI > --- > > Key: SOLR-14561 > URL: https://issues.apache.org/jira/browse/SOLR-14561 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: 8.6 > > Time Spent: 4h > Remaining Estimate: 0h > > CoreAdminAPI does not validate parameter input. We should limit what users > can specify for at least {{instanceDir and dataDir}} params, perhaps restrict > them to be relative to SOLR_HOME or SOLR_DATA_HOME. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] janhoy merged pull request #1629: SOLR-14561 Followup - validate params for more core operations
janhoy merged pull request #1629: URL: https://github.com/apache/lucene-solr/pull/1629 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147688#comment-17147688 ] Jan Høydahl commented on SOLR-14588: NP - I decided to commit instead of wait, given the small size of the fix. But next time I'll try pinging on Slack first :) > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[GitHub] [lucene-solr] uschindler commented on a change in pull request #1624: use MethodHandles in AnnotatedAPI
uschindler commented on a change in pull request #1624: URL: https://github.com/apache/lucene-solr/pull/1624#discussion_r446890033 ## File path: solr/core/src/java/org/apache/solr/api/AnnotatedApi.java ## @@ -306,7 +313,7 @@ void invoke(SolrQueryRequest req, SolrQueryResponse rsp, CommandOperation cmd) { } catch (InvocationTargetException ite) { Review comment: This catch block can go away! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147685#comment-17147685 ] Andrzej Bialecki commented on SOLR-14462: - This should be back-ported to 8.6, it's an important bugfix. > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0) >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster.
[GitHub] [lucene-solr] janhoy opened a new pull request #1629: SOLR-14561 Followup - validate params for more core operations
janhoy opened a new pull request #1629: URL: https://github.com/apache/lucene-solr/pull/1629 Now validates path params for backup, restore, snapshot, mergeindexes Add template to solr.in scripts Also tested on Windows paths Added RefGuide documentation to some params This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147681#comment-17147681 ] Atri Sharma commented on SOLR-14588: Thanks [~janhoy], you beat me to it! https://github.com/atris/lucene-solr/tree/fix_vestigal_test > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Assigned] (SOLR-14462) Autoscaling placement wrong with concurrent collection creations
[ https://issues.apache.org/jira/browse/SOLR-14462?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ilan Ginzburg reassigned SOLR-14462: Assignee: Ilan Ginzburg (was: Noble Paul) > Autoscaling placement wrong with concurrent collection creations > > > Key: SOLR-14462 > URL: https://issues.apache.org/jira/browse/SOLR-14462 > Project: Solr > Issue Type: Bug > Components: AutoScaling >Affects Versions: master (9.0) >Reporter: Ilan Ginzburg >Assignee: Ilan Ginzburg >Priority: Major > Attachments: PolicyHelperNewLogs.txt, policylogs.txt > > Time Spent: 2h 50m > Remaining Estimate: 0h > > Under concurrent collection creation, wrong Autoscaling placement decisions > can lead to severely unbalanced clusters. > Sequential creation of the same collections is handled correctly and the > cluster is balanced. > *TL;DR;* under high load, the way sessions that cache future changes to > Zookeeper are managed cause placement decisions of multiple concurrent > Collection API calls to ignore each other, be based on identical “initial” > cluster state, possibly leading to identical placement decisions and as a > consequence cluster imbalance. > *Some context first* for those less familiar with how Autoscaling deals with > cluster state change: a PolicyHelper.Session is created with a snapshot of > the Zookeeper cluster state and is used to track already decided but not yet > persisted to Zookeeper cluster state changes so that Collection API commands > can make the right placement decisions. > A Collection API command either uses an existing cached Session (that > includes changes computed by previous command(s)) or creates a new Session > initialized from the Zookeeper cluster state (i.e. with only state changes > already persisted). > When a Collection API command requires a Session - and one is needed for any > cluster state update computation - if one exists but is currently in use, the > command can wait up to 10 seconds. If the session becomes available, it is > reused. Otherwise, a new one is created. > The Session lifecycle is as follows: it is created in COMPUTING state by a > Collection API command and is initialized with a snapshot of cluster state > from Zookeeper (does not require a Zookeeper read, this is running on > Overseer that maintains a cache of cluster state). The command has exclusive > access to the Session and can change the state of the Session. When the > command is done changing the Session, the Session is “returned” and its state > changes to EXECUTING while the command continues to run to persist the state > to Zookeeper and interact with the nodes, but no longer interacts with the > Session. Another command can then grab a Session in EXECUTING state, change > its state to COMPUTING to compute new changes taking into account previous > changes. When all commands having used the session have completed their work, > the session is “released” and destroyed (at this stage, Zookeeper contains > all the state changes that were computed using that Session). > The issue arises when multiple Collection API commands are executed at once. > A first Session is created and commands start using it one by one. In a > simple 1 shard 1 replica collection creation test run with 100 parallel > Collection API requests (see debug logs from PolicyHelper in file > policy.logs), this Session update phase (Session in COMPUTING status in > SessionWrapper) takes about 250-300ms (MacBook Pro). > This means that about 40 commands can run by using in turn the same Session > (45 in the sample run). The commands that have been waiting for too long time > out after 10 seconds, more or less all at the same time (at the rate at which > they have been received by the OverseerCollectionMessageHandler, approx one > per 100ms in the sample run) and most/all independently decide to create a > new Session. These new Sessions are based on Zookeeper state, they might or > might not include some of the changes from the first 40 commands (depending > on if these commands got their changes written to Zookeeper by the time of > the 10 seconds timeout, a few might have made it, see below). > These new Sessions (54 sessions in addition to the initial one) are based on > more or less the same state, so all remaining commands are making placement > decisions that do not take into account each other (and likely not much of > the first 44 placement decisions either). > The sample run whose relevant logs are attached led for the 100 single shard > single replica collection creations to 82 collections on the Overseer node, > and 5 and 13 collections on the two other nodes of a 3 nodes cluster. Given > that the initial session was used 45 times
[jira] [Commented] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147679#comment-17147679 ] Jan Høydahl commented on SOLR-14588: Fixed in https://github.com/apache/lucene-solr/commit/574e399ce58fc5b23e69e52be28a97b6fc587616 > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14588) Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker
[ https://issues.apache.org/jira/browse/SOLR-14588?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Atri Sharma updated SOLR-14588: --- Affects Version/s: master (9.0) > Circuit Breakers Infrastructure and Real JVM Based Circuit Breaker > -- > > Key: SOLR-14588 > URL: https://issues.apache.org/jira/browse/SOLR-14588 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (9.0) >Reporter: Atri Sharma >Assignee: Atri Sharma >Priority: Major > Time Spent: 8.5h > Remaining Estimate: 0h > > This Jira tracks addition of circuit breakers in the search path and > implements JVM based circuit breaker which rejects incoming search requests > if the JVM heap usage exceeds a defined percentage. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14539) Query DSL: Introducing {!bool excludeTags=foo,bar}
[ https://issues.apache.org/jira/browse/SOLR-14539?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mikhail Khludnev updated SOLR-14539: Fix Version/s: 8.6 > Query DSL: Introducing {!bool excludeTags=foo,bar} > --- > > Key: SOLR-14539 > URL: https://issues.apache.org/jira/browse/SOLR-14539 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Reporter: Mikhail Khludnev >Priority: Major > Labels: newbie > Fix For: 8.6 > > Attachments: SOLR-14539.patch > > Time Spent: 10m > Remaining Estimate: 0h > > It's continuation of Query DSL improvements SOLR-14419, SOLR-9510. > -Let \{!bool .. }... repeat \{!filters ... trick resolve parameter refs to > many values-. *UPD* Already done. > Let's introduce {{excludeTags}} trick in BoolQParser. It will be useful for > facet exclusion in block-join facets. -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Commented] (SOLR-14422) Solr 8.5 Admin UI shows Angular placeholders on first load / refresh
[ https://issues.apache.org/jira/browse/SOLR-14422?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17147641#comment-17147641 ] Colvin Cowie commented on SOLR-14422: - Oh, forgot about this. [~epugh] [~krisden] do you think we can get this into 8.6? > Solr 8.5 Admin UI shows Angular placeholders on first load / refresh > > > Key: SOLR-14422 > URL: https://issues.apache.org/jira/browse/SOLR-14422 > Project: Solr > Issue Type: Bug > Components: Admin UI >Affects Versions: 8.5, 8.5.1, 8.5.2 >Reporter: Colvin Cowie >Priority: Minor > Attachments: SOLR-14422.patch, image-2020-04-21-14-51-18-923.png > > > When loading / refreshing the Admin UI in 8.5.1, it briefly but _visibly_ > shows a placeholder for the "SolrCore Initialization Failures" error message, > with a lot of redness. It looks like there is a real problem. Obviously the > message then disappears, and it can be ignored. > However, if I was a first time user, it would not give me confidence that > everything is okay. In a way, an error message that appears briefly then > disappears before I can finish reading it is worse than one which just stays > there. > > Here's a screenshot of what I mean !image-2020-04-21-14-51-18-923.png! > > I suspect that SOLR-14132 will have caused this > > From a (very) brief googling it seems like using the ng-cloak attribute is > the right way to fix this, and it certainly seems to work for me. > https://docs.angularjs.org/api/ng/directive/ngCloak > I will attach a patch with it, but if someone who actually knows Angular etc > has a better approach then please go for it -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14557) eDisMax parser unable to parse subparser followed by parenthesis
[ https://issues.apache.org/jira/browse/SOLR-14557?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mikhail Khludnev updated SOLR-14557: Status: Patch Available (was: Open) > eDisMax parser unable to parse subparser followed by parenthesis > > > Key: SOLR-14557 > URL: https://issues.apache.org/jira/browse/SOLR-14557 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Reporter: Mikhail Khludnev >Priority: Major > Labels: painful > Attachments: SOLR-14557.patch > > > h2. Solr 4.5 > {{/select?defType=edismax=\{!lucene}(foo)=true}} > > goes like > {code} > \{!lucene}(foo) > content:foo > LuceneQParser > {code} > fine > h2. Solr 8.2 > with luceneMatchVersion=4.5 following SOLR-11501 I know it's a grey zone but > it's a question of migrating existing queries. > {{/select?defType=edismax=\{!lucene}(foo)=true}} > goes like > {code} > "querystring":"\{!lucene}(foo)", > "parsedquery":"+DisjunctionMaxQuery(((Project.Address:lucene > Project.Address:foo) | (Project.OwnerType:lucene Project.OwnerType:foo) > "QParser":"ExtendedDismaxQParser", > {code} > blah... > but removing braces in 8.2 works perfectly fine > {code} > "querystring":"\{!lucene}foo", > "parsedquery":"+content:foo", > "parsedquery_toString":"+content:foo", > "QParser":"ExtendedDismaxQParser", > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14557) eDisMax parser unable to parse subparser followed by parenthesis
[ https://issues.apache.org/jira/browse/SOLR-14557?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mikhail Khludnev updated SOLR-14557: Attachment: SOLR-14557.patch Status: Open (was: Open) attaching reproducer > eDisMax parser unable to parse subparser followed by parenthesis > > > Key: SOLR-14557 > URL: https://issues.apache.org/jira/browse/SOLR-14557 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Reporter: Mikhail Khludnev >Priority: Major > Labels: painful > Attachments: SOLR-14557.patch > > > h2. Solr 4.5 > {{/select?defType=edismax=\{!lucene}(foo)=true}} > > goes like > {code} > \{!lucene}(foo) > content:foo > LuceneQParser > {code} > fine > h2. Solr 8.2 > with luceneMatchVersion=4.5 following SOLR-11501 I know it's a grey zone but > it's a question of migrating existing queries. > {{/select?defType=edismax=\{!lucene}(foo)=true}} > goes like > {code} > "querystring":"\{!lucene}(foo)", > "parsedquery":"+DisjunctionMaxQuery(((Project.Address:lucene > Project.Address:foo) | (Project.OwnerType:lucene Project.OwnerType:foo) > "QParser":"ExtendedDismaxQParser", > {code} > blah... > but removing braces in 8.2 works perfectly fine > {code} > "querystring":"\{!lucene}foo", > "parsedquery":"+content:foo", > "parsedquery_toString":"+content:foo", > "QParser":"ExtendedDismaxQParser", > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org
[jira] [Updated] (SOLR-14557) eDisMax parser unable to parse subparser followed by parenthesis
[ https://issues.apache.org/jira/browse/SOLR-14557?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Mikhail Khludnev updated SOLR-14557: Summary: eDisMax parser unable to parse subparser followed by parenthesis (was: eDisMax parser switch + braces regression) > eDisMax parser unable to parse subparser followed by parenthesis > > > Key: SOLR-14557 > URL: https://issues.apache.org/jira/browse/SOLR-14557 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Reporter: Mikhail Khludnev >Priority: Major > Labels: painful > > h2. Solr 4.5 > {{/select?defType=edismax=\{!lucene}(foo)=true}} > > goes like > {code} > \{!lucene}(foo) > content:foo > LuceneQParser > {code} > fine > h2. Solr 8.2 > with luceneMatchVersion=4.5 following SOLR-11501 I know it's a grey zone but > it's a question of migrating existing queries. > {{/select?defType=edismax=\{!lucene}(foo)=true}} > goes like > {code} > "querystring":"\{!lucene}(foo)", > "parsedquery":"+DisjunctionMaxQuery(((Project.Address:lucene > Project.Address:foo) | (Project.OwnerType:lucene Project.OwnerType:foo) > "QParser":"ExtendedDismaxQParser", > {code} > blah... > but removing braces in 8.2 works perfectly fine > {code} > "querystring":"\{!lucene}foo", > "parsedquery":"+content:foo", > "parsedquery_toString":"+content:foo", > "QParser":"ExtendedDismaxQParser", > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org