Here's the patch.
Matthew
Index: swmodule.cpp =================================================================== --- swmodule.cpp (revision 2448) +++ swmodule.cpp (working copy) @@ -498,7 +498,7 @@ #ifdef USELUCENE if (searchType == -4) { // lucene //Buffers for the wchar<->utf8 char* conversion - const unsigned short int MAX_CONV_SIZE = 2047; + const unsigned int MAX_CONV_SIZE = 1024 * 1024; wchar_t wcharBuffer[MAX_CONV_SIZE + 1]; char utfBuffer[MAX_CONV_SIZE + 1]; @@ -510,10 +510,11 @@ ir = IndexReader::open(target); is = new IndexSearcher(ir); (*percent)(10, percentUserData); - - standard::StandardAnalyzer analyzer; + + const TCHAR* stop_words[] = { NULL }; + standard::StandardAnalyzer *analyzer = new standard::StandardAnalyzer( (const TCHAR**)stop_words ); lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8? - q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer); + q = QueryParser::parse(wcharBuffer, _T("content"), analyzer); (*percent)(20, percentUserData); h = is->search(q); (*percent)(80, percentUserData); @@ -1026,21 +1027,27 @@ IndexWriter *coreWriter = NULL; IndexWriter *fsWriter = NULL; Directory *d = NULL; - - standard::StandardAnalyzer *an = new standard::StandardAnalyzer(); + const unsigned int MAX_CONV_SIZE = 1024 * 1024; + + const TCHAR* stop_words[] = { NULL }; + standard::StandardAnalyzer *an = new standard::StandardAnalyzer( (const TCHAR**)stop_words ); SWBuf target = getConfigEntry("AbsoluteDataPath"); bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch"); char ch = target.c_str()[strlen(target.c_str())-1]; if ((ch != '/') && (ch != '\\')) target.append('/'); target.append("lucene"); - FileMgr::createParent(target+"/dummy"); + int iswritable = FileMgr::createParent(target+"/dummy"); + if (iswritable == -1) + return -1; ramDir = new RAMDirectory(); coreWriter = new IndexWriter(ramDir, an, true); + coreWriter->setMaxFieldLength(MAX_CONV_SIZE); + char perc = 1; VerseKey *vkcheck = 0; vkcheck = SWDYNAMIC_CAST(VerseKey, key); @@ -1066,8 +1073,11 @@ SWBuf proxBuf; SWBuf proxLem; SWBuf strong; + SWBuf morph; + SWBuf footnote; + SWBuf heading; - const short int MAX_CONV_SIZE = 2047; + wchar_t wcharBuffer[MAX_CONV_SIZE + 1]; char err = Error(); @@ -1104,8 +1114,15 @@ AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; + AttributeValue::iterator morphVal; + AttributeValue::iterator headings; + AttributeTypeList::iterator footnotes; + AttributeList::iterator footList; + AttributeValue::iterator footVal; + strong=""; + morph=""; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { for (word = words->second.begin();word != words->second.end(); word++) { @@ -1124,10 +1141,38 @@ strong.append(strongVal->second); strong.append(' '); } + tmp = "Morph"; + morphVal = word->second.find(tmp); + if (morphVal != word->second.end()){ + morph.append(morphVal->second); + morph.append(' '); + } } } } + footnote=""; + footnotes = getEntryAttributes().find("Footnote"); + if (footnotes != getEntryAttributes().end()) { + for (footList = footnotes->second.begin(); footList != footnotes->second.end(); footList++) { + SWBuf tmp = "body"; + footVal = footList->second.find(tmp); + if (footVal != footList->second.end()) { + footnote.append(footVal->second); + footnote.append(' '); + } + } + } + + heading=""; + for (headings = getEntryAttributes()["Heading"]["Preverse"].begin(); + headings != getEntryAttributes()["Heading"]["Preverse"].end(); + headings++) { + heading.append(headings->second); + heading.append(' '); + } + + lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8 // doc->add( *(new Field("key", wcharBuffer, Field::STORE_YES | Field::INDEX_TOKENIZED))); doc->add( *Field::Text(_T("key"), wcharBuffer ) ); @@ -1149,6 +1194,21 @@ //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str()); } + if (morph.length() > 0) { + lucene_utf8towcs(wcharBuffer, morph, MAX_CONV_SIZE); + doc->add( *Field::UnStored(_T("morph"), wcharBuffer) ); + } + + if (footnote.length() > 0) { + lucene_utf8towcs(wcharBuffer, footnote, MAX_CONV_SIZE); + doc->add( *Field::UnStored(_T("footnote"), wcharBuffer) ); + } + + if (heading.length() > 0) { + lucene_utf8towcs(wcharBuffer, heading, MAX_CONV_SIZE); + doc->add( *Field::UnStored(_T("heading"), wcharBuffer) ); + } + //printf("setting fields (%s).\n", (const char *)*key); //fflush(stdout); }
_______________________________________________ sword-devel mailing list: sword-devel@crosswire.org http://www.crosswire.org/mailman/listinfo/sword-devel Instructions to unsubscribe/change your settings at above page