Hello.
Code Sample for the issue:-

Would like to mention that I am able to index documents containing close to 3.5 Lakh lines.. Whereas INDEXING is NOT happening when number of lines are anything greater than 5 Lakhs.. I get Memory Exception from Java...

Would sincerely appreciate some help and guidance,.

import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LiveIndexWriterConfig;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.SerialMergeScheduler;
 import org.apache.lucene.index.MergePolicy.OneMerge;
 import org.apache.lucene.index.MergeScheduler;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;


 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.util.Date;

 public class D {

      /** Index all text files under a directory. */


        static String[] filenames;

      public static void main(String[] args) {

        //String indexPath = args[0];

        String indexPath="D:\\Issue";//Place where indexes will be
 created
        String docsPath="Issue";    //Place where the files are kept.
        boolean create=true;

        String ch="OverAll";


       final File docDir = new File(docsPath);
       if (!docDir.exists() || !docDir.canRead()) {
          System.out.println("Document directory '"
 +docDir.getAbsolutePath()+
 "' does not exist or is not readable, please check the path");
          System.exit(1);
        }

        Date start = new Date();
       try {
         Directory dir = FSDirectory.open(new File(indexPath));
         Analyzer analyzer=new
 com.rancore.demo.CustomAnalyzerForCaseSensitive(Version.LUCENE_44);
         IndexWriterConfig iwc = new
 IndexWriterConfig(Version.LUCENE_44,
 analyzer);
          iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);

          IndexWriter writer = new IndexWriter(dir, iwc);
          if(ch.equalsIgnoreCase("OverAll")){
              indexDocs(writer, docDir,true);
          }else{
              filenames=args[2].split(",");
             // indexDocs(writer, docDir);

       }
          writer.commit();
          writer.close();

        } catch (IOException e) {
          System.out.println(" caught a " + e.getClass() +
           "\n with message: " + e.getMessage());
        }
        catch(Exception e)
        {

            e.printStackTrace();
        }
     }

      //Over All
      static void indexDocs(IndexWriter writer, File file,boolean flag)
      throws IOException {

          FileInputStream fis = null;
     if (file.canRead()) {

        if (file.isDirectory()) {
         String[] files = file.list();
          // an IO error could occur
          if (files != null) {
            for (int i = 0; i<    files.length; i++) {
              indexDocs(writer, new File(file, files[i]),flag);
            }
          }
       } else {
          try {
            fis = new FileInputStream(file);
         } catch (FileNotFoundException fnfe) {

           fnfe.printStackTrace();
         }

          try {

              Document doc = new Document();

              Field pathField = new StringField("path", file.getPath(),
 Field.Store.YES);
              doc.add(pathField);

              doc.add(new LongField("modified", file.lastModified(),
 Field.Store.NO));

              doc.add(new
 StringField("name",file.getName(),Field.Store.YES));

             doc.add(new TextField("contents", new BufferedReader(new
 InputStreamReader(fis, "UTF-8"))));

              LineNumberReader lnr=new LineNumberReader(new
 FileReader(file));


             String line=null;
              while( null != (line = lnr.readLine()) ){
                  doc.add(new
 StringField("SC",line.trim(),Field.Store.YES));
                 // doc.add(new
 Field("contents",line,Field.Store.YES,Field.Index.ANALYZED));
              }

              if (writer.getConfig().getOpenMode() ==
 OpenMode.CREATE_OR_APPEND)
 {

                writer.addDocument(doc);
                writer.commit();
                fis.close();
              } else {
                  try
                  {
                writer.updateDocument(new Term("path", file.getPath()),
 doc);

                fis.close();

                  }catch(Exception e)
                  {
                      writer.close();
                       fis.close();

                      e.printStackTrace();

                  }
              }

          }catch (Exception e) {
               writer.close();
                fis.close();

             e.printStackTrace();
          }finally {
              // writer.close();

            fis.close();
          }
        }
      }
 }
 }



On 8/30/2013 10:33 PM, Adrien Grand wrote:
Ankit,

The stack traces you are showing only say there was an out of memory
error. In those case, the stack trace is unfortunately not always
helpful since the allocation may fail on a small object because
another object is taking all the memory of the JVM. Can you come up
with a small piece of code that reproduces the error you are
encountering? This would help us see if there is something wrong in
the indexing code and try to debug it otherwise.



--
Regards

Ankit Murarka

"What lies behind us and what lies before us are tiny matters compared with what 
lies within us"


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Reply via email to