Hi, What platform do you use? CLucene uses TCHAR as character type and this should be #defined as wchar_t (at least on Windows and Linux)
If this doesn’t help: CLucene change wildcard expression to Boolean OR query with all index terms that match the wildcard condition. You can look at clucene\src\core\CLucene\search\WildcardTermEnum.cpp. There is a method WildcardTermEnum::termCompare, which judge if term match wildcard or not. Let me know, if you need more help. Borek From: Tamás Dömők [mailto:domokta...@gmail.com] Sent: Wednesday, July 24, 2019 11:41 AM To: clucene-developers@lists.sourceforge.net Subject: [CLucene-dev] Wildcard query on a Russian text is not working for me Hi all, I'm trying to index some Russian content and search in this content using the CLucene library (v2.3.3.4-10). It works most of the time, but on some words the wildcard query is not working for me, and I have no idea why. Can anybody help me on this, please? Here is my source code: main.cc: #include <QCoreApplication> #include <QString> #include <QDebug> #include <QScopedPointer> #include <CLucene.h> const TCHAR FIELD_CONTENT[] = L"content"; const char INDEX_PATH[] = "/tmp/index"; void create_index(const QString &content) { lucene::analysis::standard::StandardAnalyzer analyzer; lucene::index::IndexWriter writer(INDEX_PATH, &analyzer, true); lucene::document::Document doc; std::wstring content_buffer = content.toStdWString(); doc.add(*_CLNEW lucene::document::Field(FIELD_CONTENT, content_buffer.data(), lucene::document::Field::STORE_NO | lucene::document::Field::INDEX_TOKENIZED | lucene::document::Field::TERMVECTOR_NO, true)); writer.addDocument(&doc); writer.flush(); writer.close(true); } void search(const QString &query_string) { lucene::search::IndexSearcher searcher(INDEX_PATH); lucene::analysis::standard::StandardAnalyzer analyzer; lucene::queryParser::QueryParser parser(FIELD_CONTENT, &analyzer); parser.setAllowLeadingWildcard(true); std::wstring query = query_string.toStdWString(); QScopedPointer< lucene::search::Query > lucene_query(parser.parse(query.c_str(), FIELD_CONTENT, &analyzer)); QScopedPointer< lucene::search::Hits > hits(searcher.search(lucene_query.data())); TCHAR *query_debug_string(lucene_query->toString()); qDebug() << "found?" << query_string << QString::fromWCharArray(query_debug_string) << (hits->length() > 0); free(query_debug_string); } int main(int argc, char *argv[]) { QCoreApplication a(argc, argv); create_index(QString("Росси́я официально также Росси́йская Федера́ция")); search(QString("noWordLkeThis")); // ok search(QString("Федера́ция")); // ok search(QString("Федер*ция")); // ERROR: it should work, but it doesn't search(QString("Фед*")); // ok search(QString("Федер")); // ok search(QString("\"федера ция\"")); // why is this working? search(QString("официально")); // ok search(QString("офиц*ьно")); // ok search(QString("оф*циально")); // ok search(QString("офици*но")); // ok return 0; } cluceneutf8.pro<http://cluceneutf8.pro>: QT -= gui CONFIG += c++11 console CONFIG -= app_bundle CONFIG += link_pkgconfig PKGCONFIG += libclucene-core SOURCES += \ main.cc qmake && make && ./cluceneutf8 The output of the program: found? "noWordLkeThis" "content:nowordlkethis" false found? "Федера́ция" "content:\"федера ция\"" true found? "Федер*ция" "content:федер*ция" false found? "Фед*" "content:фед*" true found? "Федер" "content:федер" false found? "\"федера ция\"" "content:\"федера ция\"" true found? "официально" "content:официально" true found? "офиц*ьно" "content:офиц*ьно" true found? "оф*циально" "content:оф*циально" true found? "офици*но" "content:офици*но" true It's built with Qt and qmake, but I also made a non-Qt version if that would be better to share, I can. So my problem is that I can search for Федера́ция but I can't search for Федер*ция for example. Other words like официально can be searched anyway. Thanks. -- Dömők Tamás
_______________________________________________ CLucene-developers mailing list CLucene-developers@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/clucene-developers