Hello Kaj
Briefly skimming through the letter
queryParser.setSplitOnWhitespace(true); // shouldn't false be here
?
queryParser.setAutoGeneratePhraseQueries(true);
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
queryParser.setPhraseSlop(1);
Query q = queryParser.parse("canonical phrase");
assertEquals("foo:canonical foo:phrase", q.toString(),
"I was expecting a phrase query here: foo:\"canonical
phrase\"~1");
On Thu, Oct 30, 2025 at 4:49 PM Kai Grossjohann
<[email protected]> wrote:
> I thought if I have a synonym map that says “synonym alias” is an alias
> for “canonical phrase”, and I noodle “canonical phrase” through the
> query parser, telling it to auto generate multi term queries, I'd get a
> multi term query. But that doesn't seem to be the case.
>
> The only way to generate multi term queries seems to be when the synonym
> says that “shortsyn” is an alias for “another phrase”, and then noodle
> “shortsyn” through the query parser. Then I get foo:"another phrase"~1
> which is what I expected.
>
> My use case is as follows: I have some multi-word strings, and I need to
> create queries from them. And if one of the synonym phrases appears in
> the multi-word string, then I would like to generate a phrase query for
> that part. For example, given the synonyms mentioned above, if the
> multi-word string is, say, “my synonym alias is nice”, then I'd like to
> generate a query that searches for the word “my”, the phrase “canonical
> phrase”, and the words “is” and “nice”. Maybe I would like to
> /also/ search for the words “synonym” and “alias”, or the words
> “canonical” and “phrase”, or all four of them, I'm not sure.
>
> This description left out quite a bit of information, I'll paste some
> code below to clarify.
>
> Kai
>
> /**
> * This tests the behavior of the Lucene query
> * builder with synonyms
> */
> public class SynonymGraphQueryBuilderTest {
>
> private static class MyAnalyzer extends Analyzer {
> private final CharArraySet stopwords;
> private final SynonymMap synonyms;
>
> public MyAnalyzer(Set<String> stopwords, SynonymMap synonyms) {
> this.stopwords = new CharArraySet(stopwords, true);
> this.synonyms = synonyms;
> }
>
> @Override
> protected TokenStreamComponents createComponents(String
> fieldName) {
> final Tokenizer src = new SimplePatternTokenizer("[a-z0-9]+");
> TokenStream tok = new LowerCaseFilter(src);
> tok = new SynonymGraphFilter(tok, synonyms, true);
> tok = new FlattenGraphFilter(tok);
> tok = new StopFilter(tok, stopwords);
> return new TokenStreamComponents(
> src::setReader,
> tok);
> }
> }
>
> @Test
> void testSynonymPhrases() throws Exception {
> Builder builder = new Builder();
>
> // canonical phrase <- synonym alias
> CharsRef canonical = Builder.join(new String[] { "canonical",
> "phrase" }, new CharsRefBuilder());
> CharsRef synonym = Builder.join(new String[] { "synonym",
> "alias" }, new CharsRefBuilder());
> builder.add(synonym, canonical, true);
>
> // another phrase <- shortsyn
> canonical = Builder.join(new String[] { "another", "phrase" },
> new CharsRefBuilder());
> synonym = Builder.join(new String[] { "shortsyn" }, new
> CharsRefBuilder());
> builder.add(synonym, canonical, true);
>
> SynonymMap synonyms = builder.build();
>
> Set<String> stopwords = Set.of("the");
>
> MyAnalyzer analyzer = new MyAnalyzer(stopwords, synonyms);
>
> QueryParser queryParser = new QueryParser("foo", analyzer);
> queryParser.setSplitOnWhitespace(true);
> queryParser.setAutoGeneratePhraseQueries(true);
> queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(true);
> queryParser.setPhraseSlop(1);
>
> Query q = queryParser.parse("canonical phrase");
> assertEquals("foo:canonical foo:phrase", q.toString(),
> "I was expecting a phrase query here: foo:\"canonical
> phrase\"~1");
>
> q = queryParser.parse("synonym alias");
> assertEquals("foo:synonym foo:alias", q.toString(),
> "I was expecting a phrase query here: foo:\"canonical
> phrase\"~1");
>
> q = queryParser.parse("shortsyn");
> assertEquals("foo:\"another phrase\"~1 foo:shortsyn",
> q.toString(),
> "This is what I expected.");
>
> q = queryParser.parse("another phrase");
> assertEquals("foo:another foo:phrase", q.toString(),
> "I was expecting a phrase query here: foo:\"another
> phrase\"~1");
> }
> }
>
--
Sincerely yours
Mikhail Khludnev