Hi All,
           I am a lucene developer. i saw your benchmark in lucene website
http://lucene.apache.org
I have records upto 45 GB.when i compress the record it goes to 80
GB.howcan i compress to 10 GB or lower than that.
please help me in this regards.

i have send you the source code that i use:



*public class MediationIndexer {

public static void main(String[] args) throws Exception{
       String slNo="";
        String fileName="";
        String callType="";
        String callingPartyNumber="";
        String calledPartyNumber="";
        String dateSc="";
        String timeSc="";
        String chargDur="";
        String outgoingRoute="";
        String incomingRoute="";
        String orgCalledNumber="";
        String redirectingNumber="";
        String imsiNumber="";



       File   indexDir = new File("C:/Sample/Mediatio/Index");
           Analyzer analyzer = new StandardAnalyzer();
       IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true);
          // indexWriter.setUseCompoundFile(true);

       File mediationFiles=new File("C:/mediation files");
       File   fileDir  = new File("C:/mediation files");
           long startTime = new Date().getTime();
       String mediFiles[]=mediationFiles.list();
       for(int j=0;j<mediFiles.length;j++)
       {
       File   file = new File("C:/mediation files"+ "/" +mediFiles[j]);

       //indexDir is the directory that hosts Lucene's index files




       String myFiles[] = file.list();
       System.out.println(myFiles.length);

       for (int i = 0; i <myFiles.length ; i++){

       int recCount = 0;
            try {
                 FileReader fr     = new FileReader(file+"/"+ myFiles[i]);
                 BufferedReader br = new BufferedReader(fr);

       //Add documents to the index

                *

*                 String record = br.readLine();
                     System.out.println("First:"+record);
                 while (record  != null){

                      System.out.println("Current:"+record);

                      System.out.println(record);

                      String[] afterSplit = record.split(",");
                      for(int p=0;p<1;p++) {

                      slNo = afterSplit[0];
                      fileName= afterSplit[1];
                      callType=  afterSplit[2];
                      callingPartyNumber= afterSplit[3];
                      calledPartyNumber=afterSplit[4];
                      dateSc= afterSplit[5];
                      timeSc=afterSplit[6];
                      chargDur= afterSplit[7];
                      outgoingRoute=afterSplit[8];
                      incomingRoute=afterSplit[9];
                      orgCalledNumber=afterSplit[10];
                      redirectingNumber=afterSplit[11];
                      imsiNumber=afterSplit[12];

                      String contents =
                         new String(callType  + callingPartyNumber  +
calledPartyNumber  + dateSc +
                                     timeSc  + chargDur +  outgoingRoute +
                                     incomingRoute  +
                                    imsiNumber);
                                   recCount++;
                      System.out.println(recCount + ": " + record);
                      System.out.println(recCount + ": " + contents);

                      Document document = new Document();
                      *

*                       document.add(new Field("contents",contents,
Field.Store.YES,Field.Index.TOKENIZED));
                      document.add(new Field("callType",callType,
Field.Store.YES ,Field.Index.NO <http://field.index.no/>));
                      document.add(new
Field("callingPartyNumber",callingPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>));

                      document.add(new
Field("calledPartyNumber",calledPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>
));
                      document.add(new Field("dateSc",dateSc,
Field.Store.YES,Field.Index.TOKENIZED));
                      document.add(new Field("timeSc",timeSc,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                      document.add (new Field("chargDur",chargDur,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                      document.add(new Field("outgoingRoute",outgoingRoute,
Field.Store.YES, Field.Index.NO <http://field.index.no/>));
                      document.add(new Field("incomingRoute",incomingRoute,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
                      }



                      record = br.readLine();
                      if(record.equalsIgnoreCase("")) {
                          record=null;
                      }


                  }


                 }catch (IOException e) {
                              // catch possible io errors from readLine()
                              e.printStackTrace();
                           }
       }
       }
           long endTime = new Date().getTime();
           System.out.println("It took " + (endTime - startTime)
            + " milliseconds to create an index for the files in the
directory "
            + fileDir.getPath());
       }


}
*


--
Regards,
Sebastin Naveen

Reply via email to