Hi,

Attached y'll find a modified version of the XLS2CSV example written by Nick
Burch.
I required a version that was able to take the different sheet into
consideration instead of dumping all data in 1 file.

This version can take a sheetname as a input or a null.
In the latter case the whole sheet will be exported on a file per sheet
basis.

I call the class from a Utility class using the following two lines
...
            XLS2CSV xls2csv=new
XLS2CSV(fileOrDirectoryName.getCanonicalPath(), -1);
            xls2csv.process(sheetName);
...

I hope it can save someone a few cycles .. ;-)

Cheers,

Fermin DCG


-- 
“The reasonable man adapts himself to the world; the unreasonable one
persists in trying to adapt the world to himself. Therefore all progress
depends on the unreasonable man.”
- George Bernard Shaw (1856 - 1950)
package file;

/* ====================================================================
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==================================================================== */

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.io.FilenameUtils;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;


/**
 * A XLS -> CSV processor, that uses the MissingRecordAware EventModel code to ensure it outputs all columns and rows.
 * 
 * @author Nick Burch
 */
public class XLS2CSV implements HSSFListener {
	private int minColumns;
	private POIFSFileSystem fs;
	private PrintStream output;

	private int lastRowNumber;
	private int lastColumnNumber;

	/** Should we output the formula, or the value it has? */
	private boolean outputFormulaValues=true;

	/** For parsing Formulas */
	private SheetRecordCollectingListener workbookBuildingListener;
	private HSSFWorkbook stubWorkbook;

	// Records we pick up as we process
	private SSTRecord sstRecord;
	private FormatTrackingHSSFListener formatListener;

	/** So we known which sheet we're on */
	private int sheetIndex=0;
	private String sheetName=null;
	private BoundSheetRecord[] orderedBSRs;
	private ArrayList boundSheetRecords=new ArrayList();

	// For handling formulas with string results
	private int nextRow;
	private int nextColumn;
	private boolean outputNextStringRecord;

	/**
	 * Creates a new XLS -> CSV converter
	 * 
	 * @param fs
	 *          The POIFSFileSystem to process
	 * @param output
	 *          The PrintStream to output the CSV to
	 * @param minColumns
	 *          The minimum number of columns to output, or -1 for no minimum
	 */
	public XLS2CSV(POIFSFileSystem fs, PrintStream output, int minColumns) {
		this.fs=fs;
		this.output=output;
		this.minColumns=minColumns;
	}

	private String csvFilePrefix=null;
	/**
	 * Creates a new XLS -> CSV converter
	 * 
	 * @param filename The file to process
	 * @param minColumns The minimum number of columns to output, or -1 for no minimum
	 * @throws IOException
	 * @throws FileNotFoundException
	 */
	public XLS2CSV(String filename, int minColumns) throws IOException, FileNotFoundException {
		//this(new POIFSFileSystem(new FileInputStream(filename)), new PrintStream(new File(filename+".csv")), minColumns);
		this(new POIFSFileSystem(new FileInputStream(filename)), null, minColumns);
		File f = new File(filename);
		String fname = f.getCanonicalPath();
		
		String fileToSort= FilenameUtils.getBaseName(fname);
		String pathToFile= "/"+ FilenameUtils.getPath(fname);
		csvFilePrefix = pathToFile+ fileToSort+ "-";
	}

	/**
	 * Initiates the processing of the XLS file to CSV
	 */
	public void process() throws IOException {
		// If a mismatch occurs skip the whole thing
		MissingRecordAwareHSSFListener listener=new MissingRecordAwareHSSFListener(this);
		formatListener=new FormatTrackingHSSFListener(listener);

		HSSFEventFactory factory=new HSSFEventFactory();
		HSSFRequest request=new HSSFRequest();

		if (outputFormulaValues) {
			request.addListenerForAllRecords(formatListener);
		} else {
			workbookBuildingListener=new SheetRecordCollectingListener(formatListener);
			
			request.addListenerForAllRecords(workbookBuildingListener);
		}

		factory.processWorkbookEvents(request, fs);

		for (PrintStream output : outputStreams.values()) {
	    output.flush();
	    output.close();
    }
	}

	/**
	 * Initiates the processing of an XLS tab-sheet to a single CSV file
	 */
	public void process(String sheetName) throws IOException {
		this.sheetName=sheetName;
		process();
	}

	private Map<Integer, String> sheetIndices= new HashMap<Integer, String>(3);
	private Map<String, PrintStream> outputStreams=new HashMap<String, PrintStream>(3);
	
	/**
	 * Main HSSFListener method, processes events, and outputs the CSV as the file is processed.
	 */
	public void processRecord(Record record) {
		int thisRow=-1;
		int thisColumn=-1;
		String thisStr=null;

		switch (record.getSid()) {
      case BOFRecord.sid:
        BOFRecord br=(BOFRecord) record;
        if (br.getType()==BOFRecord.TYPE_WORKSHEET) {
    	    // Create sub workbook if required
    	    if (workbookBuildingListener!=null&&stubWorkbook==null) {
    		    stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook();
    	    }

    	    // Output the worksheet name
    	    // Works by ordering the BSRs by the location of their BOFRecords, and then knowing that we process BOFRecords in byte offset order

    	    sheetIndex++;
    	    System.out.println("Encountered new sheet reference for "+ sheetIndices.get(sheetIndex));
    	    // Set the proper output stream for the sheet
    	    if (this.sheetName==null || "".equals(sheetName)) {
    	    	this.output = outputStreams.get(sheetIndices.get(sheetIndex));
    	    } else if (sheetIndices.get(sheetIndex).equals(sheetName)) { // Test for proper sheetname 
    	    	this.output = outputStreams.get(sheetName);
    	    }
    	    
    	    if (orderedBSRs==null) {
    		    orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords);
    	    }
        }
        break;

      case BoundSheetRecord.sid:
				BoundSheetRecord bsr=(BoundSheetRecord) record;
				Integer sIdx = sheetIndices.size()+ 1;
				sheetIndices.put( sIdx, bsr.getSheetname());
				
				String exportFilename = csvFilePrefix+ bsr.getSheetname()+ ".csv";
				// Export to a csv file output-stream
				PrintStream output=null;
  	    // Set the proper output stream for the sheet
  	    if (this.sheetName==null || "".equals(sheetName)) {
          try {
  	        output=new PrintStream(new File(exportFilename));
          } catch (FileNotFoundException e) {
  	        e.printStackTrace();
          }
  				outputStreams.put(bsr.getSheetname(), output);
  	    } else if (bsr.getSheetname().equals(sheetName)) { // Test for proper sheetname 
          try {
  	        output=new PrintStream(new File(exportFilename));
          } catch (FileNotFoundException e) {
  	        e.printStackTrace();
          }
  				outputStreams.put(bsr.getSheetname(), output);
  	    }

				System.out.println("Detected sheet named: "+bsr.getSheetname());

				if (bsr.getSheetname().equals(this.sheetName)) { // Only process one specified sheet
      		boundSheetRecords.add(record);
      	}

      	break;

      case SSTRecord.sid:
        sstRecord=(SSTRecord) record;
        break;

      case BlankRecord.sid:
        BlankRecord brec=(BlankRecord) record;

        thisRow=brec.getRow();
        thisColumn=brec.getColumn();
        thisStr="";
        break;
      case BoolErrRecord.sid:
        BoolErrRecord berec=(BoolErrRecord) record;

        thisRow=berec.getRow();
        thisColumn=berec.getColumn();
        thisStr="";
        break;

      case FormulaRecord.sid:
        FormulaRecord frec=(FormulaRecord) record;

        thisRow=frec.getRow();
        thisColumn=frec.getColumn();

        if (outputFormulaValues) {
    	    if (Double.isNaN(frec.getValue())) {
    		    // Formula result is a string
    		    // This is stored in the next record
    		    outputNextStringRecord=true;
    		    nextRow=frec.getRow();
    		    nextColumn=frec.getColumn();
    	    } else {
    		    thisStr=formatListener.formatNumberDateCell(frec);
    	    }
        } else {
    	    thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression())+'"';
        }
        break;
      case StringRecord.sid:
        if (outputNextStringRecord) {
    	    // String for formula
    	    StringRecord srec=(StringRecord) record;
    	    thisStr=srec.getString();
    	    thisRow=nextRow;
    	    thisColumn=nextColumn;
    	    outputNextStringRecord=false;
        }
        break;

      case LabelRecord.sid:
        LabelRecord lrec=(LabelRecord) record;

        thisRow=lrec.getRow();
        thisColumn=lrec.getColumn();
        thisStr='"'+lrec.getValue()+'"';
        break;
      case LabelSSTRecord.sid:
        LabelSSTRecord lsrec=(LabelSSTRecord) record;

        thisRow=lsrec.getRow();
        thisColumn=lsrec.getColumn();
        if (sstRecord==null) {
    	    thisStr='"'+"(No SST Record, can't identify string)"+'"';
        } else {
    	    thisStr='"'+sstRecord.getString(lsrec.getSSTIndex()).toString()+'"';
        }
        break;
      case NoteRecord.sid:
        NoteRecord nrec=(NoteRecord) record;

        thisRow=nrec.getRow();
        thisColumn=nrec.getColumn();
        // TODO: Find object to match nrec.getShapeId()
        thisStr='"'+"(TODO)"+'"';
        break;
      case NumberRecord.sid:
        NumberRecord numrec=(NumberRecord) record;

        thisRow=numrec.getRow();
        thisColumn=numrec.getColumn();

        // Format
        thisStr=formatListener.formatNumberDateCell(numrec);
        break;
      case RKRecord.sid:
        RKRecord rkrec=(RKRecord) record;

        thisRow=rkrec.getRow();
        thisColumn=rkrec.getColumn();
        thisStr='"'+"(TODO)"+'"';
        break;
      default:
        break;
    }
		

    // Handle new row, but only if it belongs to the sheet to be processed
    if (thisRow!=-1&&thisRow!=lastRowNumber) {
	    lastColumnNumber=-1;
    }

    if (output!=null) { // Make sure there is an outputStream
	    // Handle missing column
	    if (record instanceof MissingCellDummyRecord) {
		    MissingCellDummyRecord mc=(MissingCellDummyRecord) record;
		    thisRow=mc.getRow();
		    thisColumn=mc.getColumn();
		    thisStr="";
	    }
	    // If we got something to print out, do so
	    if (thisStr!=null) {
		    if (thisColumn>0) {
			    output.print(',');
			    //System.out.print(',');
		    }
		    output.print(thisStr);
		    //System.out.print(thisStr);
	    }
	    // Update column and row count
	    if (thisRow>-1)
		    lastRowNumber=thisRow;
	    if (thisColumn>-1)
		    lastColumnNumber=thisColumn;
	    // Handle end of row
	    if (record instanceof LastCellOfRowDummyRecord) {
		    // Print out any missing commas if needed
		    if (minColumns>0) {
			    // Columns are 0 based
			    if (lastColumnNumber==-1) {
				    lastColumnNumber=0;
			    }
			    for (int i=lastColumnNumber; i< (minColumns); i++) {
				    output.print(',');
				    //System.out.print(',');
			    }
		    }

		    // We're onto a new row
		    lastColumnNumber=-1;

		    // End the row
		    output.println();
		    //System.out.println();
	    }
    }
    
	}

	public static void main(String[] args) throws Exception {
		if (args.length<1) {
			System.err.println("Use:");
			System.err.println("  XLS2CSVmra <xls file> [min columns]");
			System.exit(1);
		}

		int minColumns=-1;
		if (args.length>=2) {
			minColumns=Integer.parseInt(args[1]);
		}

		XLS2CSV xls2csv=new XLS2CSV(args[0], minColumns);
		xls2csv.process();
	}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to