epugh commented on code in PR #2395: URL: https://github.com/apache/solr/pull/2395#discussion_r1559751361
########## solr/core/src/java/org/apache/solr/update/processor/NumFieldsMonitor.java: ########## @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import org.apache.solr.core.AbstractSolrEventListener; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; + +public class NumFieldsMonitor extends AbstractSolrEventListener { + + private int numFields; + + public NumFieldsMonitor(SolrCore core) { + super(core); + this.numFields = -1; + } + + @Override + public void postCommit() { + RefCounted<SolrIndexSearcher> indexSearcher = null; + try { + indexSearcher = getCore().getSearcher(); + // Get the number of fields directly from the IndexReader instead of the Schema object to also + // include the Review Comment: are there any other interesting places in Solr to use a NumFieldsMonitor? ########## solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessor.java: ########## @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.Locale; +import org.apache.solr.cloud.CloudDescriptor; +import org.apache.solr.cloud.ZkController; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.update.AddUpdateCommand; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NumFieldLimitingUpdateRequestProcessor extends UpdateRequestProcessor { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private SolrQueryRequest req; + private int fieldThreshold; + private int currentNumFields; + private boolean warnOnly; + + public NumFieldLimitingUpdateRequestProcessor( + SolrQueryRequest req, + UpdateRequestProcessor next, + int fieldThreshold, + int currentNumFields, + boolean warnOnly) { + super(next); + this.req = req; + this.fieldThreshold = fieldThreshold; + this.currentNumFields = currentNumFields; + this.warnOnly = warnOnly; + } + + public void processAdd(AddUpdateCommand cmd) throws IOException { + if (!isCloudMode() || /* implicit isCloudMode==true */ isLeaderThatOwnsTheDoc(cmd)) { + if (coreExceedsFieldLimit()) { + throwExceptionOrLog(cmd.getPrintableId()); + } else { + if (log.isDebugEnabled()) { + log.debug( + "Allowing document {}, since current core is under the max-field limit (numFields={}, maxThreshold={})", Review Comment: max-field or maxField? ########## solr/core/src/java/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorFactory.java: ########## @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.plugin.SolrCoreAware; + +/** + * This factory generates an UpdateRequestProcessor which fails update requests once a core has + * exceeded a configurable maximum number of fields. Meant as a safeguard to help users notice + * potentially-dangerous schema design before performance and stability problems start to occur. + * + * <p>The URP uses the core's {@link SolrIndexSearcher} to judge the current number of fields. + * Accordingly, it undercounts the number of fields in the core - missing all fields added since the + * previous searcher was opened. As such, the URP's request-blocking is "best effort" - it cannot be + * relied on as a precise limit on the number of fields. + * + * <p>Additionally, the field-counting includes all documents present in the index, including any + * deleted docs that haven't yet been purged via segment merging. Note that this can differ + * significantly from the number of fields defined in managed-schema.xml - especially when dynamic + * fields are enabled. The only way to reduce this field count is to delete documents and wait until + * the deleted documents have been removed by segment merges. Users may of course speed up this + * process by tweaking Solr's segment-merging, triggering an "optimize" operation, etc. + * + * <p>{@link NumFieldLimitingUpdateRequestProcessorFactory} accepts two configuration parameters: + * + * <ul> + * <li><code>maxFields</code> - (required) The maximum number of fields before update requests + * should be aborted. Once this limit has been exceeded, additional update requests will fail + * until fields have been removed or the "maxFields" is increased. + * <li><code>warnOnly</code> - (optional) If <code>true</code> then the URP logs verbose warnings + * about the limit being exceeded but doesn't abort update requests. Defaults to <code>false + * </code> if not specified + * </ul> + * + * @since 9.6.0 + */ +public class NumFieldLimitingUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory + implements SolrCoreAware { + + private static final String MAXIMUM_FIELDS_PARAM = "maxFields"; + private static final String WARN_ONLY_PARAM = "warnOnly"; + + private NumFieldsMonitor numFieldsMonitor; + private int maximumFields; + private boolean warnOnly; + + @Override + public void inform(final SolrCore core) { + // register a commit callback for monitoring the number of fields in the schema + numFieldsMonitor = new NumFieldsMonitor(core); + core.getUpdateHandler().registerCommitCallback(numFieldsMonitor); + core.registerNewSearcherListener(numFieldsMonitor); + } + + public void init(NamedList<?> args) { + warnOnly = args.indexOf(WARN_ONLY_PARAM, 0) > 0 ? args.getBooleanArg(WARN_ONLY_PARAM) : false; + + if (args.indexOf(MAXIMUM_FIELDS_PARAM, 0) < 0) { + throw new IllegalArgumentException( + "The " + + MAXIMUM_FIELDS_PARAM + + " parameter is required for " + + getClass().getName() + + ", but no value was provided."); + } + final Object rawMaxFields = args.get(MAXIMUM_FIELDS_PARAM); Review Comment: somdeay we will have a utility method that does this kind of thing and is used everywhere! ########## solr/core/src/java/org/apache/solr/update/processor/NumFieldsMonitor.java: ########## @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import org.apache.solr.core.AbstractSolrEventListener; +import org.apache.solr.core.SolrCore; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; + +public class NumFieldsMonitor extends AbstractSolrEventListener { + + private int numFields; + + public NumFieldsMonitor(SolrCore core) { + super(core); + this.numFields = -1; + } + + @Override + public void postCommit() { + RefCounted<SolrIndexSearcher> indexSearcher = null; + try { + indexSearcher = getCore().getSearcher(); + // Get the number of fields directly from the IndexReader instead of the Schema object to also + // include the Review Comment: formatting? ########## solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java: ########## @@ -388,4 +388,19 @@ public static String stringFromReader(Reader inReader) throws IOException { return stringWriter.toString(); } } + Review Comment: Really? Sigh... we don't have this already!??? ########## solr/core/src/test/org/apache/solr/update/processor/NumFieldLimitingUpdateRequestProcessorIntegrationTest.java: ########## @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.hamcrest.Matchers; +import org.junit.BeforeClass; +import org.junit.Test; + +public class NumFieldLimitingUpdateRequestProcessorIntegrationTest extends SolrCloudTestCase { + + private static long ADMIN_OPERATION_TIMEOUT = 15 * 1000; Review Comment: use TimeUnit instead? I'm guessing this is meant to be 15 seconds? ########## solr/server/solr/configsets/_default/conf/solrconfig.xml: ########## @@ -939,7 +939,7 @@ </updateProcessor> <!-- The update.autoCreateFields property can be turned to false to disable schemaless mode --> - <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}" Review Comment: intentional? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
