Thank you for this, I am confident that others will find it useful.

Yours

Mark B


f.dcg wrote:
> 
> Hi,
> 
> Attached y'll find a modified version of the XLS2CSV example written by
> Nick
> Burch.
> I required a version that was able to take the different sheet into
> consideration instead of dumping all data in 1 file.
> 
> This version can take a sheetname as a input or a null.
> In the latter case the whole sheet will be exported on a file per sheet
> basis.
> 
> I call the class from a Utility class using the following two lines
> ...
>             XLS2CSV xls2csv=new
> XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
>             xls2csv.process(sheetName);
> ...
> 
> I hope it can save someone a few cycles .. ;-)
> 
> Cheers,
> 
> Fermin DCG
> 
> 
> -- 
> “The reasonable man adapts himself to the world; the unreasonable one
> persists in trying to adapt the world to himself. Therefore all progress
> depends on the unreasonable man.”
> - George Bernard Shaw (1856 - 1950)
> 
> package file;
> 
> /* ====================================================================
>  Licensed to the Apache Software Foundation (ASF) under one or more
>  contributor license agreements.  See the NOTICE file distributed with
>  this work for additional information regarding copyright ownership.
>  The ASF licenses this file to You under the Apache License, Version 2.0
>  (the "License"); you may not use this file except in compliance with
>  the License.  You may obtain a copy of the License at
> 
>  http://www.apache.org/licenses/LICENSE-2.0
> 
>  Unless required by applicable law or agreed to in writing, software
>  distributed under the License is distributed on an "AS IS" BASIS,
>  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>  See the License for the specific language governing permissions and
>  limitations under the License.
>  ==================================================================== */
> 
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.FileNotFoundException;
> import java.io.IOException;
> import java.io.PrintStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.Map;
> 
> import org.apache.commons.io.FilenameUtils;
> import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
> import org.apache.poi.hssf.eventusermodel.HSSFListener;
> import org.apache.poi.hssf.eventusermodel.HSSFRequest;
> import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
> import
> org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
> import
> org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
> import org.apache.poi.hssf.model.HSSFFormulaParser;
> import org.apache.poi.hssf.record.BOFRecord;
> import org.apache.poi.hssf.record.BlankRecord;
> import org.apache.poi.hssf.record.BoolErrRecord;
> import org.apache.poi.hssf.record.BoundSheetRecord;
> import org.apache.poi.hssf.record.FormulaRecord;
> import org.apache.poi.hssf.record.LabelRecord;
> import org.apache.poi.hssf.record.LabelSSTRecord;
> import org.apache.poi.hssf.record.NoteRecord;
> import org.apache.poi.hssf.record.NumberRecord;
> import org.apache.poi.hssf.record.RKRecord;
> import org.apache.poi.hssf.record.Record;
> import org.apache.poi.hssf.record.SSTRecord;
> import org.apache.poi.hssf.record.StringRecord;
> import org.apache.poi.hssf.usermodel.HSSFWorkbook;
> import org.apache.poi.poifs.filesystem.POIFSFileSystem;
> 
> 
> /**
>  * A XLS -> CSV processor, that uses the MissingRecordAware EventModel
> code to ensure it outputs all columns and rows.
>  * 
>  * @author Nick Burch
>  */
> public class XLS2CSV implements HSSFListener {
>       private int minColumns;
>       private POIFSFileSystem fs;
>       private PrintStream output;
> 
>       private int lastRowNumber;
>       private int lastColumnNumber;
> 
>       /** Should we output the formula, or the value it has? */
>       private boolean outputFormulaValues=true;
> 
>       /** For parsing Formulas */
>       private SheetRecordCollectingListener workbookBuildingListener;
>       private HSSFWorkbook stubWorkbook;
> 
>       // Records we pick up as we process
>       private SSTRecord sstRecord;
>       private FormatTrackingHSSFListener formatListener;
> 
>       /** So we known which sheet we're on */
>       private int sheetIndex=0;
>       private String sheetName=null;
>       private BoundSheetRecord[] orderedBSRs;
>       private ArrayList boundSheetRecords=new ArrayList();
> 
>       // For handling formulas with string results
>       private int nextRow;
>       private int nextColumn;
>       private boolean outputNextStringRecord;
> 
>       /**
>        * Creates a new XLS -> CSV converter
>        * 
>        * @param fs
>        *          The POIFSFileSystem to process
>        * @param output
>        *          The PrintStream to output the CSV to
>        * @param minColumns
>        *          The minimum number of columns to output, or -1 for no 
> minimum
>        */
>       public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
>               this.fs=fs;
>               this.output=output;
>               this.minColumns=minColumns;
>       }
> 
>       private String csvFilePrefix=null;
>       /**
>        * Creates a new XLS -> CSV converter
>        * 
>        * @param filename The file to process
>        * @param minColumns The minimum number of columns to output, or -1 for
> no minimum
>        * @throws IOException
>        * @throws FileNotFoundException
>        */
>       public XLS2CSV(String filename, int minColumns) throws IOException,
> FileNotFoundException {
>               //this(new POIFSFileSystem(new FileInputStream(filename)), new
> PrintStream(new File(filename+".csv")), minColumns);
>               this(new POIFSFileSystem(new FileInputStream(filename)), null,
> minColumns);
>               File f = new File(filename);
>               String fname = f.getCanonicalPath();
>               
>               String fileToSort= FilenameUtils.getBaseName(fname);
>               String pathToFile= "/"+ FilenameUtils.getPath(fname);
>               csvFilePrefix = pathToFile+ fileToSort+ "-";
>       }
> 
>       /**
>        * Initiates the processing of the XLS file to CSV
>        */
>       public void process() throws IOException {
>               // If a mismatch occurs skip the whole thing
>               MissingRecordAwareHSSFListener listener=new
> MissingRecordAwareHSSFListener(this);
>               formatListener=new FormatTrackingHSSFListener(listener);
> 
>               HSSFEventFactory factory=new HSSFEventFactory();
>               HSSFRequest request=new HSSFRequest();
> 
>               if (outputFormulaValues) {
>                       request.addListenerForAllRecords(formatListener);
>               } else {
>                       workbookBuildingListener=new
> SheetRecordCollectingListener(formatListener);
>                       
>                       
> request.addListenerForAllRecords(workbookBuildingListener);
>               }
> 
>               factory.processWorkbookEvents(request, fs);
> 
>               for (PrintStream output : outputStreams.values()) {
>           output.flush();
>           output.close();
>     }
>       }
> 
>       /**
>        * Initiates the processing of an XLS tab-sheet to a single CSV file
>        */
>       public void process(String sheetName) throws IOException {
>               this.sheetName=sheetName;
>               process();
>       }
> 
>       private Map<Integer, String> sheetIndices= new HashMap<Integer,
> String>(3);
>       private Map<String, PrintStream> outputStreams=new HashMap<String,
> PrintStream>(3);
>       
>       /**
>        * Main HSSFListener method, processes events, and outputs the CSV as 
> the
> file is processed.
>        */
>       public void processRecord(Record record) {
>               int thisRow=-1;
>               int thisColumn=-1;
>               String thisStr=null;
> 
>               switch (record.getSid()) {
>       case BOFRecord.sid:
>         BOFRecord br=(BOFRecord) record;
>         if (br.getType()==BOFRecord.TYPE_WORKSHEET) {
>           // Create sub workbook if required
>           if (workbookBuildingListener!=null&&stubWorkbook==null) {
>                   stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook();
>           }
> 
>           // Output the worksheet name
>           // Works by ordering the BSRs by the location of their
> BOFRecords, and then knowing that we process BOFRecords in byte offset
> order
> 
>           sheetIndex++;
>           System.out.println("Encountered new sheet reference for "+
> sheetIndices.get(sheetIndex));
>           // Set the proper output stream for the sheet
>           if (this.sheetName==null || "".equals(sheetName)) {
>               this.output = outputStreams.get(sheetIndices.get(sheetIndex));
>           } else if (sheetIndices.get(sheetIndex).equals(sheetName)) { //
> Test for proper sheetname 
>               this.output = outputStreams.get(sheetName);
>           }
>           
>           if (orderedBSRs==null) {
>                  
> orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords);
>           }
>         }
>         break;
> 
>       case BoundSheetRecord.sid:
>                               BoundSheetRecord bsr=(BoundSheetRecord) record;
>                               Integer sIdx = sheetIndices.size()+ 1;
>                               sheetIndices.put( sIdx, bsr.getSheetname());
>                               
>                               String exportFilename = csvFilePrefix+ 
> bsr.getSheetname()+ ".csv";
>                               // Export to a csv file output-stream
>                               PrintStream output=null;
>           // Set the proper output stream for the sheet
>           if (this.sheetName==null || "".equals(sheetName)) {
>           try {
>               output=new PrintStream(new File(exportFilename));
>           } catch (FileNotFoundException e) {
>               e.printStackTrace();
>           }
>                               outputStreams.put(bsr.getSheetname(), output);
>           } else if (bsr.getSheetname().equals(sheetName)) { // Test for
> proper sheetname 
>           try {
>               output=new PrintStream(new File(exportFilename));
>           } catch (FileNotFoundException e) {
>               e.printStackTrace();
>           }
>                               outputStreams.put(bsr.getSheetname(), output);
>           }
> 
>                               System.out.println("Detected sheet named: 
> "+bsr.getSheetname());
> 
>                               if (bsr.getSheetname().equals(this.sheetName)) 
> { // Only process one
> specified sheet
>                       boundSheetRecords.add(record);
>               }
> 
>               break;
> 
>       case SSTRecord.sid:
>         sstRecord=(SSTRecord) record;
>         break;
> 
>       case BlankRecord.sid:
>         BlankRecord brec=(BlankRecord) record;
> 
>         thisRow=brec.getRow();
>         thisColumn=brec.getColumn();
>         thisStr="";
>         break;
>       case BoolErrRecord.sid:
>         BoolErrRecord berec=(BoolErrRecord) record;
> 
>         thisRow=berec.getRow();
>         thisColumn=berec.getColumn();
>         thisStr="";
>         break;
> 
>       case FormulaRecord.sid:
>         FormulaRecord frec=(FormulaRecord) record;
> 
>         thisRow=frec.getRow();
>         thisColumn=frec.getColumn();
> 
>         if (outputFormulaValues) {
>           if (Double.isNaN(frec.getValue())) {
>                   // Formula result is a string
>                   // This is stored in the next record
>                   outputNextStringRecord=true;
>                   nextRow=frec.getRow();
>                   nextColumn=frec.getColumn();
>           } else {
>                   thisStr=formatListener.formatNumberDateCell(frec);
>           }
>         } else {
>           thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,
> frec.getParsedExpression())+'"';
>         }
>         break;
>       case StringRecord.sid:
>         if (outputNextStringRecord) {
>           // String for formula
>           StringRecord srec=(StringRecord) record;
>           thisStr=srec.getString();
>           thisRow=nextRow;
>           thisColumn=nextColumn;
>           outputNextStringRecord=false;
>         }
>         break;
> 
>       case LabelRecord.sid:
>         LabelRecord lrec=(LabelRecord) record;
> 
>         thisRow=lrec.getRow();
>         thisColumn=lrec.getColumn();
>         thisStr='"'+lrec.getValue()+'"';
>         break;
>       case LabelSSTRecord.sid:
>         LabelSSTRecord lsrec=(LabelSSTRecord) record;
> 
>         thisRow=lsrec.getRow();
>         thisColumn=lsrec.getColumn();
>         if (sstRecord==null) {
>           thisStr='"'+"(No SST Record, can't identify string)"+'"';
>         } else {
>          
> thisStr='"'+sstRecord.getString(lsrec.getSSTIndex()).toString()+'"';
>         }
>         break;
>       case NoteRecord.sid:
>         NoteRecord nrec=(NoteRecord) record;
> 
>         thisRow=nrec.getRow();
>         thisColumn=nrec.getColumn();
>         // TODO: Find object to match nrec.getShapeId()
>         thisStr='"'+"(TODO)"+'"';
>         break;
>       case NumberRecord.sid:
>         NumberRecord numrec=(NumberRecord) record;
> 
>         thisRow=numrec.getRow();
>         thisColumn=numrec.getColumn();
> 
>         // Format
>         thisStr=formatListener.formatNumberDateCell(numrec);
>         break;
>       case RKRecord.sid:
>         RKRecord rkrec=(RKRecord) record;
> 
>         thisRow=rkrec.getRow();
>         thisColumn=rkrec.getColumn();
>         thisStr='"'+"(TODO)"+'"';
>         break;
>       default:
>         break;
>     }
>               
> 
>     // Handle new row, but only if it belongs to the sheet to be processed
>     if (thisRow!=-1&&thisRow!=lastRowNumber) {
>           lastColumnNumber=-1;
>     }
> 
>     if (output!=null) { // Make sure there is an outputStream
>           // Handle missing column
>           if (record instanceof MissingCellDummyRecord) {
>                   MissingCellDummyRecord mc=(MissingCellDummyRecord) record;
>                   thisRow=mc.getRow();
>                   thisColumn=mc.getColumn();
>                   thisStr="";
>           }
>           // If we got something to print out, do so
>           if (thisStr!=null) {
>                   if (thisColumn>0) {
>                           output.print(',');
>                           //System.out.print(',');
>                   }
>                   output.print(thisStr);
>                   //System.out.print(thisStr);
>           }
>           // Update column and row count
>           if (thisRow>-1)
>                   lastRowNumber=thisRow;
>           if (thisColumn>-1)
>                   lastColumnNumber=thisColumn;
>           // Handle end of row
>           if (record instanceof LastCellOfRowDummyRecord) {
>                   // Print out any missing commas if needed
>                   if (minColumns>0) {
>                           // Columns are 0 based
>                           if (lastColumnNumber==-1) {
>                                   lastColumnNumber=0;
>                           }
>                           for (int i=lastColumnNumber; i< (minColumns); i++) {
>                                   output.print(',');
>                                   //System.out.print(',');
>                           }
>                   }
> 
>                   // We're onto a new row
>                   lastColumnNumber=-1;
> 
>                   // End the row
>                   output.println();
>                   //System.out.println();
>           }
>     }
>     
>       }
> 
>       public static void main(String[] args) throws Exception {
>               if (args.length<1) {
>                       System.err.println("Use:");
>                       System.err.println("  XLS2CSVmra <xls file> [min 
> columns]");
>                       System.exit(1);
>               }
> 
>               int minColumns=-1;
>               if (args.length>=2) {
>                       minColumns=Integer.parseInt(args[1]);
>               }
> 
>               XLS2CSV xls2csv=new XLS2CSV(args[0], minColumns);
>               xls2csv.process();
>       }
> }
> 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
> 

-- 
View this message in context: 
http://old.nabble.com/Modified-XSL2CSV-example-tp27264115p27269649.html
Sent from the POI - User mailing list archive at Nabble.com.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to