Sharing the complete C code for search.c.
Please look for g_testProximity to follow the proximity query related
changes.
As such the default QParser, handles the follow query strings passed from
command line arguments without issues. I need not make any code changes.
(1) "a AND b"
(2) "a or b"
(3) "a NOT b"
(4) "a b"
But what is failing is the case "a b"~100, though my indexed documents have
the necessary terms with a span of 100 words.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CFISH_USE_SHORT_NAMES
#define LUCY_USE_SHORT_NAMES
#include "Clownfish/String.h"
#include "Clownfish/Vector.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Highlight/Highlighter.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/ANDQuery.h"
#include "Lucy/Search/Hits.h"
#include "Lucy/Search/IndexSearcher.h"
#include "Lucy/Search/TermQuery.h"
#include "Lucy/Search/QueryParser.h"
#include "LucyX/Search/ProximityQuery.h"
#include "Lucy/Analysis/Analyzer.h"
#include "Clownfish/TestHarness/TestUtils.h"
#include "QUtils.h"
#include "version.h"
char path_to_index[100] = "./lucy_index/lucy_index";
#define ENABLE_HIGHLIGHTER
// Test Configuration
enum
{
g_testDefault = 0, //QParser supports BOOLEAN/TERM queries
g_testProximity = 1, //To support proximity queries
g_testMax = 2
};
typedef struct TestOpts_ {
const char* name;
}TestOpts;
static void
S_usage_and_exit(const char *arg0) {
printf("Usage: %s [-p <x86_64/aarch64> platform] [-a
<enable(1)/disable(0)> angel signals] [-s <Docs count>] [-c <category
(OPTIONAL)>] <querystring>\n", arg0);
exit(1);
}
int
main(int argc, char *argv[]) {
fprintf( stderr, "Search Version: %d.%d\n", MAJOR_VERSION,
MINOR_VERSION);
bool isEnableAngelSignals = false;
uint32_t docCount = 0;
uint32_t numWanted = 10;
// Initialize the library.
lucy_bootstrap_parcel();
const char *category = NULL;
const char *platform = NULL;
const char *testQuery = NULL;
TestOpts g_testopts[] =
{
{ "default" },
{ "proximity"},
};
bool queryType[g_testMax] = {false};
int i = 1;
uint32_t j;
while (i < argc - 1) {
if (strcmp(argv[i], "-p") == 0) {
if (i + 1 >= argc) {
S_usage_and_exit(argv[0]);
}
i += 1;
platform = argv[i];
}
else if (strcmp(argv[i], "-a") == 0) {
if (i + 1 >= argc) {
S_usage_and_exit(argv[0]);
}
i += 1;
isEnableAngelSignals = argv[i];
}
else if (strcmp(argv[i], "-s") == 0) {
if (i + 1 >= argc) {
S_usage_and_exit(argv[0]);
}
i += 1;
docCount = atol(argv[i]);
}
else if (strcmp(argv[i], "-c") == 0) {
if (i + 1 >= argc) {
//S_usage_and_exit(argv[0]);
}
i += 1;
category = argv[i];
printf("Category given: %s\n\n", category);
}
else if (strcmp(argv[i], "-T") == 0) {
if (i + 1 >= argc) {
S_usage_and_exit(argv[0]);
}
i += 1;
testQuery = argv[i];
char *opt = (char *)&testQuery[0];
bool found = false;
for(j = 0; j < sizeof(g_testopts)/sizeof(TestOpts); j++)
{
if (strcmp(opt, g_testopts[j].name) == 0)
{
queryType[j] = true;
printf( "Testing Query: %s\n", g_testopts[j].name);
found = true;
break;
}
}
if (!found)
{
printf("Invalid option: -T=%s\n", testQuery);
printf("Valid tests: -T=%s", g_testopts[0].name);
for(j = 1; j < sizeof(g_testopts)/sizeof(TestOpts); j++)
{
printf( ",%s", g_testopts[j].name);
}
printf( "\n");
exit(0);
}
}
else {
S_usage_and_exit(argv[0]);
}
i += 1;
}
if (i + 1 != argc) {
S_usage_and_exit(argv[0]);
}
const char *query_c = argv[i];
printf("Searching for: %s with # of hits: %d \n\n", query_c, numWanted);
#ifdef PERF_INSTRUMENT
perf_event_init( (enable_perf_events) (ENABLE_HW_CYCLES_PER |
ENABLE_HW_INSTRS_PER) );
#endif
char buff1[100];
sprintf(buff1, "%s%d%s%s", "-", docCount, "-", platform);
strcat(path_to_index, buff1);
String *folder = Str_newf("%s", path_to_index);
printf("Index file used: %s\n", path_to_index);
#ifdef PERF_INSTRUMENT
perf_event_enable ( (enable_perf_events) (ENABLE_HW_CYCLES_PER |
ENABLE_HW_INSTRS_PER) );
uint64_t beginI = perf_per_instr_event_read();
#endif
double start = (double)clock();
IndexSearcher *searcher = IxSearcher_new((Obj*)folder);
Schema *schema = IxSearcher_Get_Schema(searcher);
String *query_str = Str_newf("%s", query_c);
QueryParser *qparser = QParser_new(schema, NULL, NULL, NULL);
ProximityQuery *pquery = NULL;
Query *query = NULL;
query = QParser_Parse(qparser, query_str);
String *content_str = Str_newf("content");
#ifdef ENABLE_HIGHLIGHTER
Highlighter *highlighter
= Highlighter_new((Searcher*)searcher, (Obj*)query, content_str,
200);
#endif
if (category)
{
String *category_name = Str_newf("category");
String *category_str = Str_newf("%s", category);
TermQuery *category_query
= TermQuery_new(category_name, (Obj*)category_str);
Vector *children = Vec_new(2);
Vec_Push(children, (Obj*)query);
Vec_Push(children, (Obj*)category_query);
query = (Query*)ANDQuery_new(children);
DECREF(children);
DECREF(category_str);
DECREF(category_name);
}
//To handle proximity queries
if( queryType[g_testProximity] )
{
Vector *terms = Vec_new(0);
Vec_Push(terms, (Obj*)query_str);
String *field_name = Str_newf("content");
pquery = (Query*)ProximityQuery_new(field_name, terms, 100);
Vector *children = Vec_new(2);
Vec_Push(children, (Obj*) query);
Vec_Push(children, (Obj*) pquery);
query = (Query*) (children); //???
DECREF(children);
DECREF(field_name);
DECREF(terms);
}
Hits *hits;
if ( queryType[g_testDefault] )
{
hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL);
}
else
{
hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL);
}
String *title_str = Str_newf("title");
String *url_str = Str_newf("url");
HitDoc *hit;
i = 1;
// Loop over search results.
while (NULL != (hit = Hits_Next(hits))) {
String *title = (String*)HitDoc_Extract(hit, title_str);
char *title_c = Str_To_Utf8(title);
String *url = (String*)HitDoc_Extract(hit, url_str);
char *url_c = Str_To_Utf8(url);
#ifdef ENABLE_HIGHLIGHTER
String *excerpt = Highlighter_Create_Excerpt(highlighter, hit);
char *excerpt_c = Str_To_Utf8(excerpt);
printf("Result %d: %s (%s)\n%s\n\n", i, title_c, url_c, excerpt_c);
free(excerpt_c);
DECREF(excerpt);
#else
printf("Result %d: %s (%s)\n\n", i, title_c, url_c);
#endif
free(url_c);
free(title_c);
DECREF(url);
DECREF(title);
DECREF(hit);
i++;
}
printf("Search: %8.5f QPS\n", (1 *
((double)CLOCKS_PER_SEC/((double)clock()-start))) );
#ifdef PERF_INSTRUMENT
printf("================================\n");
printf("For Searching: %lld instructions\n", (perf_per_instr_event_read()
- beginI) );
#endif
DECREF(url_str);
DECREF(title_str);
DECREF(hits);
DECREF(query);
DECREF(query_str);
if( queryType[g_testProximity] )
{
DECREF(pquery);
}
#ifdef ENABLE_HIGHLIGHTER
DECREF(highlighter);
#endif
DECREF(content_str);
DECREF(qparser);
DECREF(searcher);
DECREF(folder);
return 0;
}
--
View this message in context:
http://lucene.472066.n3.nabble.com/lucy-user-ProxmityQuery-in-C-tp4320613p4321024.html
Sent from the lucy-user mailing list archive at Nabble.com.