Hello.
Code Sample for the issue:-
Would like to mention that I am able to index documents containing close
to 3.5 Lakh lines.. Whereas INDEXING is NOT happening when number of
lines are anything greater than 5 Lakhs.. I get Memory Exception from
Java...
Would sincerely appreciate some help and guidance,.
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LiveIndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.MergePolicy.OneMerge;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.Date;
public class D {
/** Index all text files under a directory. */
static String[] filenames;
public static void main(String[] args) {
//String indexPath = args[0];
String indexPath="D:\\Issue";//Place where indexes will be
created
String docsPath="Issue"; //Place where the files are kept.
boolean create=true;
String ch="OverAll";
final File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '"
+docDir.getAbsolutePath()+
"' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer=new
com.rancore.demo.CustomAnalyzerForCaseSensitive(Version.LUCENE_44);
IndexWriterConfig iwc = new
IndexWriterConfig(Version.LUCENE_44,
analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
if(ch.equalsIgnoreCase("OverAll")){
indexDocs(writer, docDir,true);
}else{
filenames=args[2].split(",");
// indexDocs(writer, docDir);
}
writer.commit();
writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
catch(Exception e)
{
e.printStackTrace();
}
}
//Over All
static void indexDocs(IndexWriter writer, File file,boolean flag)
throws IOException {
FileInputStream fis = null;
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i< files.length; i++) {
indexDocs(writer, new File(file, files[i]),flag);
}
}
} else {
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
}
try {
Document doc = new Document();
Field pathField = new StringField("path", file.getPath(),
Field.Store.YES);
doc.add(pathField);
doc.add(new LongField("modified", file.lastModified(),
Field.Store.NO));
doc.add(new
StringField("name",file.getName(),Field.Store.YES));
doc.add(new TextField("contents", new BufferedReader(new
InputStreamReader(fis, "UTF-8"))));
LineNumberReader lnr=new LineNumberReader(new
FileReader(file));
String line=null;
while( null != (line = lnr.readLine()) ){
doc.add(new
StringField("SC",line.trim(),Field.Store.YES));
// doc.add(new
Field("contents",line,Field.Store.YES,Field.Index.ANALYZED));
}
if (writer.getConfig().getOpenMode() ==
OpenMode.CREATE_OR_APPEND)
{
writer.addDocument(doc);
writer.commit();
fis.close();
} else {
try
{
writer.updateDocument(new Term("path", file.getPath()),
doc);
fis.close();
}catch(Exception e)
{
writer.close();
fis.close();
e.printStackTrace();
}
}
}catch (Exception e) {
writer.close();
fis.close();
e.printStackTrace();
}finally {
// writer.close();
fis.close();
}
}
}
}
}
On 8/30/2013 10:33 PM, Adrien Grand wrote:
Ankit,
The stack traces you are showing only say there was an out of memory
error. In those case, the stack trace is unfortunately not always
helpful since the allocation may fail on a small object because
another object is taking all the memory of the JVM. Can you come up
with a small piece of code that reproduces the error you are
encountering? This would help us see if there is something wrong in
the indexing code and try to debug it otherwise.
--
Regards
Ankit Murarka
"What lies behind us and what lies before us are tiny matters compared with what
lies within us"
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org