Thank you for this, I am confident that others will find it useful. Yours
Mark B f.dcg wrote: > > Hi, > > Attached y'll find a modified version of the XLS2CSV example written by > Nick > Burch. > I required a version that was able to take the different sheet into > consideration instead of dumping all data in 1 file. > > This version can take a sheetname as a input or a null. > In the latter case the whole sheet will be exported on a file per sheet > basis. > > I call the class from a Utility class using the following two lines > ... > XLS2CSV xls2csv=new > XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1); > xls2csv.process(sheetName); > ... > > I hope it can save someone a few cycles .. ;-) > > Cheers, > > Fermin DCG > > > -- > “The reasonable man adapts himself to the world; the unreasonable one > persists in trying to adapt the world to himself. Therefore all progress > depends on the unreasonable man.” > - George Bernard Shaw (1856 - 1950) > > package file; > > /* ==================================================================== > Licensed to the Apache Software Foundation (ASF) under one or more > contributor license agreements. See the NOTICE file distributed with > this work for additional information regarding copyright ownership. > The ASF licenses this file to You under the Apache License, Version 2.0 > (the "License"); you may not use this file except in compliance with > the License. You may obtain a copy of the License at > > http://www.apache.org/licenses/LICENSE-2.0 > > Unless required by applicable law or agreed to in writing, software > distributed under the License is distributed on an "AS IS" BASIS, > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > See the License for the specific language governing permissions and > limitations under the License. > ==================================================================== */ > > import java.io.File; > import java.io.FileInputStream; > import java.io.FileNotFoundException; > import java.io.IOException; > import java.io.PrintStream; > import java.util.ArrayList; > import java.util.HashMap; > import java.util.Map; > > import org.apache.commons.io.FilenameUtils; > import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; > import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; > import org.apache.poi.hssf.eventusermodel.HSSFListener; > import org.apache.poi.hssf.eventusermodel.HSSFRequest; > import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; > import > org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; > import > org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; > import > org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; > import org.apache.poi.hssf.model.HSSFFormulaParser; > import org.apache.poi.hssf.record.BOFRecord; > import org.apache.poi.hssf.record.BlankRecord; > import org.apache.poi.hssf.record.BoolErrRecord; > import org.apache.poi.hssf.record.BoundSheetRecord; > import org.apache.poi.hssf.record.FormulaRecord; > import org.apache.poi.hssf.record.LabelRecord; > import org.apache.poi.hssf.record.LabelSSTRecord; > import org.apache.poi.hssf.record.NoteRecord; > import org.apache.poi.hssf.record.NumberRecord; > import org.apache.poi.hssf.record.RKRecord; > import org.apache.poi.hssf.record.Record; > import org.apache.poi.hssf.record.SSTRecord; > import org.apache.poi.hssf.record.StringRecord; > import org.apache.poi.hssf.usermodel.HSSFWorkbook; > import org.apache.poi.poifs.filesystem.POIFSFileSystem; > > > /** > * A XLS -> CSV processor, that uses the MissingRecordAware EventModel > code to ensure it outputs all columns and rows. > * > * @author Nick Burch > */ > public class XLS2CSV implements HSSFListener { > private int minColumns; > private POIFSFileSystem fs; > private PrintStream output; > > private int lastRowNumber; > private int lastColumnNumber; > > /** Should we output the formula, or the value it has? */ > private boolean outputFormulaValues=true; > > /** For parsing Formulas */ > private SheetRecordCollectingListener workbookBuildingListener; > private HSSFWorkbook stubWorkbook; > > // Records we pick up as we process > private SSTRecord sstRecord; > private FormatTrackingHSSFListener formatListener; > > /** So we known which sheet we're on */ > private int sheetIndex=0; > private String sheetName=null; > private BoundSheetRecord[] orderedBSRs; > private ArrayList boundSheetRecords=new ArrayList(); > > // For handling formulas with string results > private int nextRow; > private int nextColumn; > private boolean outputNextStringRecord; > > /** > * Creates a new XLS -> CSV converter > * > * @param fs > * The POIFSFileSystem to process > * @param output > * The PrintStream to output the CSV to > * @param minColumns > * The minimum number of columns to output, or -1 for no > minimum > */ > public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) { > this.fs=fs; > this.output=output; > this.minColumns=minColumns; > } > > private String csvFilePrefix=null; > /** > * Creates a new XLS -> CSV converter > * > * @param filename The file to process > * @param minColumns The minimum number of columns to output, or -1 for > no minimum > * @throws IOException > * @throws FileNotFoundException > */ > public XLS2CSV(String filename, int minColumns) throws IOException, > FileNotFoundException { > //this(new POIFSFileSystem(new FileInputStream(filename)), new > PrintStream(new File(filename+".csv")), minColumns); > this(new POIFSFileSystem(new FileInputStream(filename)), null, > minColumns); > File f = new File(filename); > String fname = f.getCanonicalPath(); > > String fileToSort= FilenameUtils.getBaseName(fname); > String pathToFile= "/"+ FilenameUtils.getPath(fname); > csvFilePrefix = pathToFile+ fileToSort+ "-"; > } > > /** > * Initiates the processing of the XLS file to CSV > */ > public void process() throws IOException { > // If a mismatch occurs skip the whole thing > MissingRecordAwareHSSFListener listener=new > MissingRecordAwareHSSFListener(this); > formatListener=new FormatTrackingHSSFListener(listener); > > HSSFEventFactory factory=new HSSFEventFactory(); > HSSFRequest request=new HSSFRequest(); > > if (outputFormulaValues) { > request.addListenerForAllRecords(formatListener); > } else { > workbookBuildingListener=new > SheetRecordCollectingListener(formatListener); > > > request.addListenerForAllRecords(workbookBuildingListener); > } > > factory.processWorkbookEvents(request, fs); > > for (PrintStream output : outputStreams.values()) { > output.flush(); > output.close(); > } > } > > /** > * Initiates the processing of an XLS tab-sheet to a single CSV file > */ > public void process(String sheetName) throws IOException { > this.sheetName=sheetName; > process(); > } > > private Map<Integer, String> sheetIndices= new HashMap<Integer, > String>(3); > private Map<String, PrintStream> outputStreams=new HashMap<String, > PrintStream>(3); > > /** > * Main HSSFListener method, processes events, and outputs the CSV as > the > file is processed. > */ > public void processRecord(Record record) { > int thisRow=-1; > int thisColumn=-1; > String thisStr=null; > > switch (record.getSid()) { > case BOFRecord.sid: > BOFRecord br=(BOFRecord) record; > if (br.getType()==BOFRecord.TYPE_WORKSHEET) { > // Create sub workbook if required > if (workbookBuildingListener!=null&&stubWorkbook==null) { > stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook(); > } > > // Output the worksheet name > // Works by ordering the BSRs by the location of their > BOFRecords, and then knowing that we process BOFRecords in byte offset > order > > sheetIndex++; > System.out.println("Encountered new sheet reference for "+ > sheetIndices.get(sheetIndex)); > // Set the proper output stream for the sheet > if (this.sheetName==null || "".equals(sheetName)) { > this.output = outputStreams.get(sheetIndices.get(sheetIndex)); > } else if (sheetIndices.get(sheetIndex).equals(sheetName)) { // > Test for proper sheetname > this.output = outputStreams.get(sheetName); > } > > if (orderedBSRs==null) { > > orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords); > } > } > break; > > case BoundSheetRecord.sid: > BoundSheetRecord bsr=(BoundSheetRecord) record; > Integer sIdx = sheetIndices.size()+ 1; > sheetIndices.put( sIdx, bsr.getSheetname()); > > String exportFilename = csvFilePrefix+ > bsr.getSheetname()+ ".csv"; > // Export to a csv file output-stream > PrintStream output=null; > // Set the proper output stream for the sheet > if (this.sheetName==null || "".equals(sheetName)) { > try { > output=new PrintStream(new File(exportFilename)); > } catch (FileNotFoundException e) { > e.printStackTrace(); > } > outputStreams.put(bsr.getSheetname(), output); > } else if (bsr.getSheetname().equals(sheetName)) { // Test for > proper sheetname > try { > output=new PrintStream(new File(exportFilename)); > } catch (FileNotFoundException e) { > e.printStackTrace(); > } > outputStreams.put(bsr.getSheetname(), output); > } > > System.out.println("Detected sheet named: > "+bsr.getSheetname()); > > if (bsr.getSheetname().equals(this.sheetName)) > { // Only process one > specified sheet > boundSheetRecords.add(record); > } > > break; > > case SSTRecord.sid: > sstRecord=(SSTRecord) record; > break; > > case BlankRecord.sid: > BlankRecord brec=(BlankRecord) record; > > thisRow=brec.getRow(); > thisColumn=brec.getColumn(); > thisStr=""; > break; > case BoolErrRecord.sid: > BoolErrRecord berec=(BoolErrRecord) record; > > thisRow=berec.getRow(); > thisColumn=berec.getColumn(); > thisStr=""; > break; > > case FormulaRecord.sid: > FormulaRecord frec=(FormulaRecord) record; > > thisRow=frec.getRow(); > thisColumn=frec.getColumn(); > > if (outputFormulaValues) { > if (Double.isNaN(frec.getValue())) { > // Formula result is a string > // This is stored in the next record > outputNextStringRecord=true; > nextRow=frec.getRow(); > nextColumn=frec.getColumn(); > } else { > thisStr=formatListener.formatNumberDateCell(frec); > } > } else { > thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook, > frec.getParsedExpression())+'"'; > } > break; > case StringRecord.sid: > if (outputNextStringRecord) { > // String for formula > StringRecord srec=(StringRecord) record; > thisStr=srec.getString(); > thisRow=nextRow; > thisColumn=nextColumn; > outputNextStringRecord=false; > } > break; > > case LabelRecord.sid: > LabelRecord lrec=(LabelRecord) record; > > thisRow=lrec.getRow(); > thisColumn=lrec.getColumn(); > thisStr='"'+lrec.getValue()+'"'; > break; > case LabelSSTRecord.sid: > LabelSSTRecord lsrec=(LabelSSTRecord) record; > > thisRow=lsrec.getRow(); > thisColumn=lsrec.getColumn(); > if (sstRecord==null) { > thisStr='"'+"(No SST Record, can't identify string)"+'"'; > } else { > > thisStr='"'+sstRecord.getString(lsrec.getSSTIndex()).toString()+'"'; > } > break; > case NoteRecord.sid: > NoteRecord nrec=(NoteRecord) record; > > thisRow=nrec.getRow(); > thisColumn=nrec.getColumn(); > // TODO: Find object to match nrec.getShapeId() > thisStr='"'+"(TODO)"+'"'; > break; > case NumberRecord.sid: > NumberRecord numrec=(NumberRecord) record; > > thisRow=numrec.getRow(); > thisColumn=numrec.getColumn(); > > // Format > thisStr=formatListener.formatNumberDateCell(numrec); > break; > case RKRecord.sid: > RKRecord rkrec=(RKRecord) record; > > thisRow=rkrec.getRow(); > thisColumn=rkrec.getColumn(); > thisStr='"'+"(TODO)"+'"'; > break; > default: > break; > } > > > // Handle new row, but only if it belongs to the sheet to be processed > if (thisRow!=-1&&thisRow!=lastRowNumber) { > lastColumnNumber=-1; > } > > if (output!=null) { // Make sure there is an outputStream > // Handle missing column > if (record instanceof MissingCellDummyRecord) { > MissingCellDummyRecord mc=(MissingCellDummyRecord) record; > thisRow=mc.getRow(); > thisColumn=mc.getColumn(); > thisStr=""; > } > // If we got something to print out, do so > if (thisStr!=null) { > if (thisColumn>0) { > output.print(','); > //System.out.print(','); > } > output.print(thisStr); > //System.out.print(thisStr); > } > // Update column and row count > if (thisRow>-1) > lastRowNumber=thisRow; > if (thisColumn>-1) > lastColumnNumber=thisColumn; > // Handle end of row > if (record instanceof LastCellOfRowDummyRecord) { > // Print out any missing commas if needed > if (minColumns>0) { > // Columns are 0 based > if (lastColumnNumber==-1) { > lastColumnNumber=0; > } > for (int i=lastColumnNumber; i< (minColumns); i++) { > output.print(','); > //System.out.print(','); > } > } > > // We're onto a new row > lastColumnNumber=-1; > > // End the row > output.println(); > //System.out.println(); > } > } > > } > > public static void main(String[] args) throws Exception { > if (args.length<1) { > System.err.println("Use:"); > System.err.println(" XLS2CSVmra <xls file> [min > columns]"); > System.exit(1); > } > > int minColumns=-1; > if (args.length>=2) { > minColumns=Integer.parseInt(args[1]); > } > > XLS2CSV xls2csv=new XLS2CSV(args[0], minColumns); > xls2csv.process(); > } > } > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [email protected] > For additional commands, e-mail: [email protected] > -- View this message in context: http://old.nabble.com/Modified-XSL2CSV-example-tp27264115p27269649.html Sent from the POI - User mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
