Copilot commented on code in PR #16650: URL: https://github.com/apache/pinot/pull/16650#discussion_r2318469292
########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/MinimumShouldMatchQueryParserTest.java: ########## @@ -0,0 +1,271 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.text.lucene.parsers; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class MinimumShouldMatchQueryParserTest { + + private static final String FIELD_NAME = "content"; + + /** + * Helper method to parse query with minimum_should_match option and return the result. + * + * @param query the query string to parse + * @param minimumShouldMatch the minimum_should_match value (can be null) + * @return the parsed Query + * @throws ParseException if parsing fails + */ + private Query parseQueryWithMinimumShouldMatch(String query, String minimumShouldMatch) + throws ParseException { + MatchQueryParser parser = new MatchQueryParser(FIELD_NAME, new StandardAnalyzer()); + if (minimumShouldMatch != null) { + parser.setMinimumShouldMatch(minimumShouldMatch); + } + return parser.parse(query); + } + + @Test + public void testPositiveCases() + throws ParseException { + // Test 1: MUST_SHOULD_80_percent - OpenSearch AND (one OR two OR three OR four) with minimumShouldMatch=80% + Query result1 = parseQueryWithMinimumShouldMatch("OpenSearch AND (one OR two OR three OR four)", "80%"); + Assert.assertTrue(result1 instanceof BooleanQuery); + BooleanQuery booleanQuery1 = (BooleanQuery) result1; + // Should have 2 clauses: MUST(OpenSearch) and MUST(nested BooleanQuery) + Assert.assertEquals(booleanQuery1.clauses().size(), 2); + // The nested BooleanQuery should have minimumShouldMatch=3 (80% of 4 = 3.2, rounded down to 3) + BooleanQuery nestedQuery1 = (BooleanQuery) booleanQuery1.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery1.getMinimumNumberShouldMatch(), 3); + + // Test 2: MUST_SHOULD_negative_20_percent - OpenSearch AND (one OR two OR three OR four) with + // minimumShouldMatch=-20% + Query result2 = parseQueryWithMinimumShouldMatch("OpenSearch AND (one OR two OR three OR four)", "-20%"); + Assert.assertTrue(result2 instanceof BooleanQuery); + BooleanQuery booleanQuery2 = (BooleanQuery) result2; + Assert.assertEquals(booleanQuery2.clauses().size(), 2); + // The nested BooleanQuery should have minimumShouldMatch=3 (100+(-20)=80% of 4 = 3.2, rounded down to 3) + BooleanQuery nestedQuery2 = (BooleanQuery) booleanQuery2.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery2.getMinimumNumberShouldMatch(), 3); + + // Test 3: SHOULD_only_default_one - one OR two OR three OR four without minimumShouldMatch + Query result3 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", null); + Assert.assertTrue(result3 instanceof BooleanQuery); + BooleanQuery booleanQuery3 = (BooleanQuery) result3; + Assert.assertEquals(booleanQuery3.clauses().size(), 4); + // Default minimumShouldMatch should be 1 for SHOULD-only queries + Assert.assertEquals(booleanQuery3.getMinimumNumberShouldMatch(), 1); + + // Test 4: SHOULD_minimum_2 - one OR two OR three OR four with minimumShouldMatch=2 + Query result4 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "2"); + Assert.assertTrue(result4 instanceof BooleanQuery); + BooleanQuery booleanQuery4 = (BooleanQuery) result4; + Assert.assertEquals(booleanQuery4.clauses().size(), 4); + Assert.assertEquals(booleanQuery4.getMinimumNumberShouldMatch(), 2); + + // Test 5: SHOULD_75_percent - one OR two OR three OR four with minimumShouldMatch=75% + Query result5 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "75%"); + Assert.assertTrue(result5 instanceof BooleanQuery); + BooleanQuery booleanQuery5 = (BooleanQuery) result5; + Assert.assertEquals(booleanQuery5.clauses().size(), 4); + // 75% of 4 = 3 matches required + Assert.assertEquals(booleanQuery5.getMinimumNumberShouldMatch(), 3); + + // Test 6: SHOULD_100_percent - one OR two OR three OR four with minimumShouldMatch=100% + Query result6 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "100%"); + Assert.assertTrue(result6 instanceof BooleanQuery); + BooleanQuery booleanQuery6 = (BooleanQuery) result6; + Assert.assertEquals(booleanQuery6.clauses().size(), 4); + // 100% of 4 = 4 matches required + Assert.assertEquals(booleanQuery6.getMinimumNumberShouldMatch(), 4); + + // Test 7: SHOULD_25_percent - one OR two OR three OR four with minimumShouldMatch=25% + Query result7 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "25%"); + Assert.assertTrue(result7 instanceof BooleanQuery); + BooleanQuery booleanQuery7 = (BooleanQuery) result7; + Assert.assertEquals(booleanQuery7.clauses().size(), 4); + // 25% of 4 = 1 match required + Assert.assertEquals(booleanQuery7.getMinimumNumberShouldMatch(), 1); + + // Test 8: single_term_query - OpenSearch with minimumShouldMatch=1 + Query result8 = parseQueryWithMinimumShouldMatch("OpenSearch", "1"); + Assert.assertTrue(result8 instanceof BooleanQuery); + BooleanQuery booleanQuery8 = (BooleanQuery) result8; + Assert.assertEquals(booleanQuery8.clauses().size(), 1); + Assert.assertEquals(booleanQuery8.getMinimumNumberShouldMatch(), 1); + + // Test 9: SHOULD_negative_50_percent - one OR two OR three OR four with minimumShouldMatch=-50% + Query result9 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "-50%"); + Assert.assertTrue(result9 instanceof BooleanQuery); + BooleanQuery booleanQuery9 = (BooleanQuery) result9; + Assert.assertEquals(booleanQuery9.clauses().size(), 4); + // -50% means 50% must match, so 50% of 4 = 2 matches required + Assert.assertEquals(booleanQuery9.getMinimumNumberShouldMatch(), 2); + + // Test 10: Deep nested query - OpenSearch AND ((one OR two) AND (three OR four OR five)) with + // minimumShouldMatch=60% + Query result10 = parseQueryWithMinimumShouldMatch( + "OpenSearch AND ((one OR two) AND (three OR four OR five))", "60%"); + Assert.assertTrue(result10 instanceof BooleanQuery); + BooleanQuery booleanQuery10 = (BooleanQuery) result10; + Assert.assertEquals(booleanQuery10.clauses().size(), 2); + + // Get the nested BooleanQuery: ((one OR two) AND (three OR four OR five)) + BooleanQuery nestedQuery10 = (BooleanQuery) booleanQuery10.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery10.clauses().size(), 2); + + // Get the first sub-nested BooleanQuery: (one OR two) + BooleanQuery subNested1 = (BooleanQuery) nestedQuery10.clauses().get(0).getQuery(); + Assert.assertEquals(subNested1.clauses().size(), 2); + // 60% of 2 = 1.2, rounded down to 1 + Assert.assertEquals(subNested1.getMinimumNumberShouldMatch(), 1); + + // Get the second sub-nested BooleanQuery: (three OR four OR five) + BooleanQuery subNested2 = (BooleanQuery) nestedQuery10.clauses().get(1).getQuery(); + Assert.assertEquals(subNested2.clauses().size(), 3); + // 60% of 3 = 1.8, rounded down to 1 + Assert.assertEquals(subNested2.getMinimumNumberShouldMatch(), 1); + } + + @Test + public void testNegativeCases() + throws ParseException { + // Case 1: Invalid percentage value (> 100%) + try { + parseQueryWithMinimumShouldMatch("java OR python", "101%"); + Assert.fail("Should throw IllegalArgumentException for invalid percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 2: Invalid negative percentage value (< -100%) + try { + parseQueryWithMinimumShouldMatch("java OR python", "-101%"); + Assert.fail("Should throw IllegalArgumentException for invalid negative percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 3: Invalid format (not integer or percentage) + try { + parseQueryWithMinimumShouldMatch("java OR python", "abc"); + Assert.fail("Should throw IllegalArgumentException for invalid format"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 4: Invalid decimal percentage + try { + parseQueryWithMinimumShouldMatch("java OR python", "50.5%"); + Assert.fail("Should throw IllegalArgumentException for invalid decimal percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 5: Null query + try { + parseQueryWithMinimumShouldMatch(null, null); + Assert.fail("Should throw ParseException for null query"); + } catch (ParseException e) { + // Expected + } + + // Case 6: Empty query + try { + parseQueryWithMinimumShouldMatch("", null); + Assert.fail("Should throw ParseException for empty query"); + } catch (ParseException e) { + // Expected + } + + // Case 7: Whitespace-only query + try { + parseQueryWithMinimumShouldMatch(" ", null); + Assert.fail("Should throw ParseException for whitespace-only query"); + } catch (ParseException e) { + // Expected + } + + + Review Comment: Remove excessive blank lines between test cases. Use single blank lines to separate logical sections for better readability. ```suggestion ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/MatchQueryParser.java: ########## @@ -0,0 +1,335 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.text.lucene.parsers; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.charstream.CharStream; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; + + +/** + * A custom query parser that implements minimum_should_match behavior. + * This parser creates Boolean queries with should clauses and enforces a minimum + * number of matches. + * + * <p>This parser supports the following minimum_should_match formats:</p> + * <ul> + * <li><strong>Positive integer:</strong> "3" - at least 3 should clauses must match</li> + * <li><strong>Negative integer:</strong> "-2" - at most 2 should clauses can be missing</li> + * <li><strong>Positive percentage:</strong> "80%" - at least 80% of should clauses must match</li> + * <li><strong>Negative percentage:</strong> "-20%" - at most 20% of should clauses can be missing</li> + * </ul> + * + * <p><strong>Example usage:</strong></p> + * <ul> + * <li>Input: 'java OR python OR scala' with minimumShouldMatch=2 + * <br>Output: BooleanQuery with 3 should clauses, requiring at least 2 matches</li> + * <li>Input: 'machine learning OR deep learning OR neural networks' with minimumShouldMatch="80%" + * <br>Output: BooleanQuery with 3 should clauses, requiring at least 2 matches (80% of 3 = 2.4, rounded down + * to 2)</li> + * <li>Input: 'error OR warning OR critical' with minimumShouldMatch="-1" + * <br>Output: BooleanQuery with 3 should clauses, allowing at most 1 to be missing (requiring at least 2 + * matches)</li> + * </ul> + * + * <p><strong>Behavior:</strong></p> + * <ul> + * <li>Single term queries: Returns TermQuery (minimum_should_match is ignored)</li> + * <li>Multiple term queries: Returns BooleanQuery with should clauses and minimum match requirement</li> + * <li>Null/empty queries: Throws ParseException</li> + * </ul> + * + * <p>This parser extends Lucene's QueryParserBase and implements the required abstract methods. + * It uses the provided Analyzer for tokenization and creates appropriate Lucene Boolean queries.</p> + */ +public class MatchQueryParser extends QueryParserBase { + /** The field name to search in */ + private final String _field; + + /** The analyzer used for tokenizing the query */ + private final Analyzer _analyzer; + + /** The minimum should match specification (stored as string for dynamic calculation) */ + private String _minimumShouldMatch = "1"; + + /** The default operator for combining terms */ + private BooleanClause.Occur _defaultOperator = BooleanClause.Occur.SHOULD; + + /** Pattern for parsing percentage values */ + private static final Pattern PERCENTAGE_PATTERN = Pattern.compile("^(-?\\d+)%$"); + + /** + * Constructs a new MinimumShouldMatchQueryParser with the specified field and analyzer. + * + * @param field the field name to search in (must not be null) + * @param analyzer the analyzer to use for tokenizing queries (must not be null) + * @throws IllegalArgumentException if field or analyzer is null + */ + public MatchQueryParser(String field, Analyzer analyzer) { + super(); + _field = field; + _analyzer = analyzer; + } + + /** + * Validates the minimum should match specification. + * + * <p>This method validates the format and range of the minimum_should_match value:</p> + * <ul> + * <li><strong>Positive integer:</strong> "3" - at least 3 should clauses must match</li> + * <li><strong>Negative integer:</strong> "-2" - at most 2 should clauses can be missing</li> + * <li><strong>Positive percentage:</strong> "80%" - at least 80% of should clauses must match</li> + * <li><strong>Negative percentage:</strong> "-20%" - at most 20% of should clauses can be missing</li> + * </ul> + * + * @param minimumShouldMatch the minimum should match specification to validate + * @return the validated and trimmed value + * @throws IllegalArgumentException if the format is invalid or value is out of range + */ + private String validateMinimumShouldMatch(String minimumShouldMatch) { + if (minimumShouldMatch == null || minimumShouldMatch.trim().isEmpty()) { + return "1"; + } + + String value = minimumShouldMatch.trim(); + Matcher matcher = PERCENTAGE_PATTERN.matcher(value); + if (matcher.matches()) { + int percentage = Integer.parseInt(matcher.group(1)); + if (percentage < -100 || percentage > 100) { + throw new IllegalArgumentException("Percentage must be between -100 and 100: " + percentage); + } + return value; + } else { + try { + Integer.parseInt(value); + return value; + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid minimum_should_match format: " + value + + ". Expected integer or percentage (e.g., '3', '-2', '80%', '-20%')"); + } + } + } + + /** + * Sets the minimum number of should clauses that must match. + * + * <p>This method supports the same formats as OpenSearch's minimum_should_match:</p> + * <ul> + * <li><strong>Positive integer:</strong> "3" - at least 3 should clauses must match</li> + * <li><strong>Negative integer:</strong> "-2" - at most 2 should clauses can be missing</li> + * <li><strong>Positive percentage:</strong> "80%" - at least 80% of should clauses must match</li> + * <li><strong>Negative percentage:</strong> "-20%" - at most 20% of should clauses can be missing</li> + * </ul> + * + * <p>Examples:</p> + * <ul> + * <li>setMinimumShouldMatch("3") - requires at least 3 matches</li> + * <li>setMinimumShouldMatch("-1") - allows at most 1 to be missing</li> + * <li>setMinimumShouldMatch("80%") - requires at least 80% matches</li> + * <li>setMinimumShouldMatch("-20%") - allows at most 20% to be missing</li> + * </ul> + * + * @param minimumShouldMatch the minimum should match specification (integer or percentage) + * @throws IllegalArgumentException if the format is invalid or value is out of range + */ + public void setMinimumShouldMatch(String minimumShouldMatch) { + _minimumShouldMatch = validateMinimumShouldMatch(minimumShouldMatch); + } + + /** + * Sets the default operator for combining terms. + * + * @param defaultOperator the default operator (MUST for AND, SHOULD for OR) + */ + public void setDefaultOperator(BooleanClause.Occur defaultOperator) { + _defaultOperator = defaultOperator; + } + + /** + * Parses the given query string and returns an appropriate Lucene Query. + * + * <p>This method performs the following steps:</p> + * <ol> + * <li>Validates the input query (null, empty, whitespace-only)</li> + * <li>Parses the query using Lucene's QueryParser</li> + * <li>Applies minimum_should_match behavior to Boolean queries</li> + * </ol> + * + * @param query the query string to parse (must not be null or empty) + * @return a Lucene Query object representing the parsed query + * @throws ParseException if the query is null, empty, or parsing fails + */ + @Override + public Query parse(String query) + throws ParseException { + if (query == null) { + throw new ParseException("Query cannot be null"); + } + + if (query.trim().isEmpty()) { + throw new ParseException("Query cannot be empty"); + } + + // Parse the query using Lucene's QueryParser + QueryParser parser = new QueryParser(_field, _analyzer); + Query parsedQuery = parser.parse(query); + + // If it's a Boolean query, apply minimum_should_match behavior + if (parsedQuery instanceof BooleanQuery) { + return applyMinimumShouldMatch((BooleanQuery) parsedQuery); + } + + // For single term queries, convert to Boolean query with SHOULD clause + // For single terms, minimum_should_match should always be 1 + if (parsedQuery instanceof TermQuery) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(parsedQuery, BooleanClause.Occur.SHOULD); + builder.setMinimumNumberShouldMatch(1); + return builder.build(); + } + + //All the other queries are returned as is Review Comment: Add a space after '//' to follow standard comment formatting: '// All the other queries are returned as is' ```suggestion // All the other queries are returned as is ``` ########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/text/lucene/parsers/MinimumShouldMatchQueryParserTest.java: ########## @@ -0,0 +1,271 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.segment.index.text.lucene.parsers; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.testng.Assert; +import org.testng.annotations.Test; + + +public class MinimumShouldMatchQueryParserTest { + + private static final String FIELD_NAME = "content"; + + /** + * Helper method to parse query with minimum_should_match option and return the result. + * + * @param query the query string to parse + * @param minimumShouldMatch the minimum_should_match value (can be null) + * @return the parsed Query + * @throws ParseException if parsing fails + */ + private Query parseQueryWithMinimumShouldMatch(String query, String minimumShouldMatch) + throws ParseException { + MatchQueryParser parser = new MatchQueryParser(FIELD_NAME, new StandardAnalyzer()); + if (minimumShouldMatch != null) { + parser.setMinimumShouldMatch(minimumShouldMatch); + } + return parser.parse(query); + } + + @Test + public void testPositiveCases() + throws ParseException { + // Test 1: MUST_SHOULD_80_percent - OpenSearch AND (one OR two OR three OR four) with minimumShouldMatch=80% + Query result1 = parseQueryWithMinimumShouldMatch("OpenSearch AND (one OR two OR three OR four)", "80%"); + Assert.assertTrue(result1 instanceof BooleanQuery); + BooleanQuery booleanQuery1 = (BooleanQuery) result1; + // Should have 2 clauses: MUST(OpenSearch) and MUST(nested BooleanQuery) + Assert.assertEquals(booleanQuery1.clauses().size(), 2); + // The nested BooleanQuery should have minimumShouldMatch=3 (80% of 4 = 3.2, rounded down to 3) + BooleanQuery nestedQuery1 = (BooleanQuery) booleanQuery1.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery1.getMinimumNumberShouldMatch(), 3); + + // Test 2: MUST_SHOULD_negative_20_percent - OpenSearch AND (one OR two OR three OR four) with + // minimumShouldMatch=-20% + Query result2 = parseQueryWithMinimumShouldMatch("OpenSearch AND (one OR two OR three OR four)", "-20%"); + Assert.assertTrue(result2 instanceof BooleanQuery); + BooleanQuery booleanQuery2 = (BooleanQuery) result2; + Assert.assertEquals(booleanQuery2.clauses().size(), 2); + // The nested BooleanQuery should have minimumShouldMatch=3 (100+(-20)=80% of 4 = 3.2, rounded down to 3) + BooleanQuery nestedQuery2 = (BooleanQuery) booleanQuery2.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery2.getMinimumNumberShouldMatch(), 3); + + // Test 3: SHOULD_only_default_one - one OR two OR three OR four without minimumShouldMatch + Query result3 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", null); + Assert.assertTrue(result3 instanceof BooleanQuery); + BooleanQuery booleanQuery3 = (BooleanQuery) result3; + Assert.assertEquals(booleanQuery3.clauses().size(), 4); + // Default minimumShouldMatch should be 1 for SHOULD-only queries + Assert.assertEquals(booleanQuery3.getMinimumNumberShouldMatch(), 1); + + // Test 4: SHOULD_minimum_2 - one OR two OR three OR four with minimumShouldMatch=2 + Query result4 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "2"); + Assert.assertTrue(result4 instanceof BooleanQuery); + BooleanQuery booleanQuery4 = (BooleanQuery) result4; + Assert.assertEquals(booleanQuery4.clauses().size(), 4); + Assert.assertEquals(booleanQuery4.getMinimumNumberShouldMatch(), 2); + + // Test 5: SHOULD_75_percent - one OR two OR three OR four with minimumShouldMatch=75% + Query result5 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "75%"); + Assert.assertTrue(result5 instanceof BooleanQuery); + BooleanQuery booleanQuery5 = (BooleanQuery) result5; + Assert.assertEquals(booleanQuery5.clauses().size(), 4); + // 75% of 4 = 3 matches required + Assert.assertEquals(booleanQuery5.getMinimumNumberShouldMatch(), 3); + + // Test 6: SHOULD_100_percent - one OR two OR three OR four with minimumShouldMatch=100% + Query result6 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "100%"); + Assert.assertTrue(result6 instanceof BooleanQuery); + BooleanQuery booleanQuery6 = (BooleanQuery) result6; + Assert.assertEquals(booleanQuery6.clauses().size(), 4); + // 100% of 4 = 4 matches required + Assert.assertEquals(booleanQuery6.getMinimumNumberShouldMatch(), 4); + + // Test 7: SHOULD_25_percent - one OR two OR three OR four with minimumShouldMatch=25% + Query result7 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "25%"); + Assert.assertTrue(result7 instanceof BooleanQuery); + BooleanQuery booleanQuery7 = (BooleanQuery) result7; + Assert.assertEquals(booleanQuery7.clauses().size(), 4); + // 25% of 4 = 1 match required + Assert.assertEquals(booleanQuery7.getMinimumNumberShouldMatch(), 1); + + // Test 8: single_term_query - OpenSearch with minimumShouldMatch=1 + Query result8 = parseQueryWithMinimumShouldMatch("OpenSearch", "1"); + Assert.assertTrue(result8 instanceof BooleanQuery); + BooleanQuery booleanQuery8 = (BooleanQuery) result8; + Assert.assertEquals(booleanQuery8.clauses().size(), 1); + Assert.assertEquals(booleanQuery8.getMinimumNumberShouldMatch(), 1); + + // Test 9: SHOULD_negative_50_percent - one OR two OR three OR four with minimumShouldMatch=-50% + Query result9 = parseQueryWithMinimumShouldMatch("one OR two OR three OR four", "-50%"); + Assert.assertTrue(result9 instanceof BooleanQuery); + BooleanQuery booleanQuery9 = (BooleanQuery) result9; + Assert.assertEquals(booleanQuery9.clauses().size(), 4); + // -50% means 50% must match, so 50% of 4 = 2 matches required + Assert.assertEquals(booleanQuery9.getMinimumNumberShouldMatch(), 2); + + // Test 10: Deep nested query - OpenSearch AND ((one OR two) AND (three OR four OR five)) with + // minimumShouldMatch=60% + Query result10 = parseQueryWithMinimumShouldMatch( + "OpenSearch AND ((one OR two) AND (three OR four OR five))", "60%"); + Assert.assertTrue(result10 instanceof BooleanQuery); + BooleanQuery booleanQuery10 = (BooleanQuery) result10; + Assert.assertEquals(booleanQuery10.clauses().size(), 2); + + // Get the nested BooleanQuery: ((one OR two) AND (three OR four OR five)) + BooleanQuery nestedQuery10 = (BooleanQuery) booleanQuery10.clauses().get(1).getQuery(); + Assert.assertEquals(nestedQuery10.clauses().size(), 2); + + // Get the first sub-nested BooleanQuery: (one OR two) + BooleanQuery subNested1 = (BooleanQuery) nestedQuery10.clauses().get(0).getQuery(); + Assert.assertEquals(subNested1.clauses().size(), 2); + // 60% of 2 = 1.2, rounded down to 1 + Assert.assertEquals(subNested1.getMinimumNumberShouldMatch(), 1); + + // Get the second sub-nested BooleanQuery: (three OR four OR five) + BooleanQuery subNested2 = (BooleanQuery) nestedQuery10.clauses().get(1).getQuery(); + Assert.assertEquals(subNested2.clauses().size(), 3); + // 60% of 3 = 1.8, rounded down to 1 + Assert.assertEquals(subNested2.getMinimumNumberShouldMatch(), 1); + } + + @Test + public void testNegativeCases() + throws ParseException { + // Case 1: Invalid percentage value (> 100%) + try { + parseQueryWithMinimumShouldMatch("java OR python", "101%"); + Assert.fail("Should throw IllegalArgumentException for invalid percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 2: Invalid negative percentage value (< -100%) + try { + parseQueryWithMinimumShouldMatch("java OR python", "-101%"); + Assert.fail("Should throw IllegalArgumentException for invalid negative percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 3: Invalid format (not integer or percentage) + try { + parseQueryWithMinimumShouldMatch("java OR python", "abc"); + Assert.fail("Should throw IllegalArgumentException for invalid format"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 4: Invalid decimal percentage + try { + parseQueryWithMinimumShouldMatch("java OR python", "50.5%"); + Assert.fail("Should throw IllegalArgumentException for invalid decimal percentage"); + } catch (IllegalArgumentException e) { + // Expected + } catch (ParseException e) { + Assert.fail("Should throw IllegalArgumentException, not ParseException"); + } + + // Case 5: Null query + try { + parseQueryWithMinimumShouldMatch(null, null); + Assert.fail("Should throw ParseException for null query"); + } catch (ParseException e) { + // Expected + } + + // Case 6: Empty query + try { + parseQueryWithMinimumShouldMatch("", null); + Assert.fail("Should throw ParseException for empty query"); + } catch (ParseException e) { + // Expected + } + + // Case 7: Whitespace-only query + try { + parseQueryWithMinimumShouldMatch(" ", null); + Assert.fail("Should throw ParseException for whitespace-only query"); + } catch (ParseException e) { + // Expected + } + + + + // Case 9: Non-Boolean query (phrase query) + parseQueryWithMinimumShouldMatch("\"java programming\"", null); + parseQueryWithMinimumShouldMatch("java*", null); + } + + + Review Comment: Remove excessive blank lines between methods. Use single blank lines to separate methods for better readability. ```suggestion ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
