Hi All,
I am a lucene developer. i saw your benchmark in lucene website
http://lucene.apache.org
I have records upto 45 GB.when i compress the record it goes to 80
GB.howcan i compress to 10 GB or lower than that.
please help me in this regards.
i have send you the source code that i use:
*public class MediationIndexer {
public static void main(String[] args) throws Exception{
String slNo="";
String fileName="";
String callType="";
String callingPartyNumber="";
String calledPartyNumber="";
String dateSc="";
String timeSc="";
String chargDur="";
String outgoingRoute="";
String incomingRoute="";
String orgCalledNumber="";
String redirectingNumber="";
String imsiNumber="";
File indexDir = new File("C:/Sample/Mediatio/Index");
Analyzer analyzer = new StandardAnalyzer();
IndexWriter indexWriter = new IndexWriter(indexDir,analyzer,true);
// indexWriter.setUseCompoundFile(true);
File mediationFiles=new File("C:/mediation files");
File fileDir = new File("C:/mediation files");
long startTime = new Date().getTime();
String mediFiles[]=mediationFiles.list();
for(int j=0;j<mediFiles.length;j++)
{
File file = new File("C:/mediation files"+ "/" +mediFiles[j]);
//indexDir is the directory that hosts Lucene's index files
String myFiles[] = file.list();
System.out.println(myFiles.length);
for (int i = 0; i <myFiles.length ; i++){
int recCount = 0;
try {
FileReader fr = new FileReader(file+"/"+ myFiles[i]);
BufferedReader br = new BufferedReader(fr);
//Add documents to the index
*
* String record = br.readLine();
System.out.println("First:"+record);
while (record != null){
System.out.println("Current:"+record);
System.out.println(record);
String[] afterSplit = record.split(",");
for(int p=0;p<1;p++) {
slNo = afterSplit[0];
fileName= afterSplit[1];
callType= afterSplit[2];
callingPartyNumber= afterSplit[3];
calledPartyNumber=afterSplit[4];
dateSc= afterSplit[5];
timeSc=afterSplit[6];
chargDur= afterSplit[7];
outgoingRoute=afterSplit[8];
incomingRoute=afterSplit[9];
orgCalledNumber=afterSplit[10];
redirectingNumber=afterSplit[11];
imsiNumber=afterSplit[12];
String contents =
new String(callType + callingPartyNumber +
calledPartyNumber + dateSc +
timeSc + chargDur + outgoingRoute +
incomingRoute +
imsiNumber);
recCount++;
System.out.println(recCount + ": " + record);
System.out.println(recCount + ": " + contents);
Document document = new Document();
*
* document.add(new Field("contents",contents,
Field.Store.YES,Field.Index.TOKENIZED));
document.add(new Field("callType",callType,
Field.Store.YES ,Field.Index.NO <http://field.index.no/>));
document.add(new
Field("callingPartyNumber",callingPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>));
document.add(new
Field("calledPartyNumber",calledPartyNumber,Field.Store.YES,Field.Index.NO<http://field.index.no/>
));
document.add(new Field("dateSc",dateSc,
Field.Store.YES,Field.Index.TOKENIZED));
document.add(new Field("timeSc",timeSc,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
document.add (new Field("chargDur",chargDur,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
document.add(new Field("outgoingRoute",outgoingRoute,
Field.Store.YES, Field.Index.NO <http://field.index.no/>));
document.add(new Field("incomingRoute",incomingRoute,
Field.Store.YES,Field.Index.NO <http://field.index.no/>));
}
record = br.readLine();
if(record.equalsIgnoreCase("")) {
record=null;
}
}
}catch (IOException e) {
// catch possible io errors from readLine()
e.printStackTrace();
}
}
}
long endTime = new Date().getTime();
System.out.println("It took " + (endTime - startTime)
+ " milliseconds to create an index for the files in the
directory "
+ fileDir.getPath());
}
}
*
--
Regards,
Sebastin Naveen