This is an automated email from the ASF dual-hosted git repository.
fanningpj pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/poi.git
The following commit(s) were added to refs/heads/trunk by this push:
new a023a295ae reformat
a023a295ae is described below
commit a023a295aeb88940b11717bfb4be5a66497fac8e
Author: PJ Fanning <[email protected]>
AuthorDate: Thu Mar 5 09:59:35 2026 +0100
reformat
---
.../hssf/extractor/EventBasedExcelExtractor.java | 399 +++++++++++----------
1 file changed, 201 insertions(+), 198 deletions(-)
diff --git
a/poi/src/main/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
b/poi/src/main/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
index aa58173db6..38e8302b6b 100644
---
a/poi/src/main/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
+++
b/poi/src/main/java/org/apache/poi/hssf/extractor/EventBasedExcelExtractor.java
@@ -46,16 +46,16 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* A text extractor for Excel files, that is based
- * on the HSSF EventUserModel API.
+ * on the HSSF EventUserModel API.
* It will typically use less memory than
- * {@link ExcelExtractor}, but may not provide
- * the same richness of formatting.
+ * {@link ExcelExtractor}, but may not provide
+ * the same richness of formatting.
* Returns the textual content of the file, suitable for
- * indexing by something like Lucene, but not really
- * intended for display to the user.
+ * indexing by something like Lucene, but not really
+ * intended for display to the user.
* <p>
* To turn an excel file into a CSV or similar, then see
- * the XLS2CSVmra example
+ * the XLS2CSVmra example
* </p>
*
* @see <a
href="https://github.com/apache/poi/blob/trunk/poi-examples/src/main/java/org/apache/poi/examples/hssf/eventusermodel/XLS2CSVmra.java">XLS2CSVmra</a>
@@ -72,200 +72,203 @@ public class EventBasedExcelExtractor implements
POIOLE2TextExtractor, org.apach
_dir = dir;
}
- public EventBasedExcelExtractor(POIFSFileSystem fs) {
+ public EventBasedExcelExtractor(POIFSFileSystem fs) {
poifs = fs;
_dir = fs.getRoot();
- }
-
- /**
- * Would return the document information metadata for the document,
- * if we supported it
- */
- @Override
- public DocumentSummaryInformation getDocSummaryInformation() {
- throw new IllegalStateException("Metadata extraction not supported in
streaming mode, please use ExcelExtractor");
- }
- /**
- * Would return the summary information metadata for the document,
- * if we supported it
- */
- @Override
- public SummaryInformation getSummaryInformation() {
- throw new IllegalStateException("Metadata extraction not supported in
streaming mode, please use ExcelExtractor");
- }
-
-
- /**
- * Would control the inclusion of cell comments from the document,
- * if we supported it
- */
- public void setIncludeCellComments(boolean includeComments) {
- throw new IllegalStateException("Comment extraction not supported in
streaming mode, please use ExcelExtractor");
- }
-
- /**
- * Would control the inclusion of headers and footers from the document,
- * if we supported it
- */
- public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
- throw new IllegalStateException("Header/Footer extraction not supported
in streaming mode, please use ExcelExtractor");
- }
-
-
- /**
- * Should sheet names be included? Default is true
- */
- public void setIncludeSheetNames(boolean includeSheetNames) {
- _includeSheetNames = includeSheetNames;
- }
- /**
- * Should we return the formula itself, and not
- * the result it produces? Default is false
- */
- public void setFormulasNotResults(boolean formulasNotResults) {
- _formulasNotResults = formulasNotResults;
- }
-
-
- /**
- * Retreives the text contents of the file
- */
- public String getText() {
- String text;
- try {
- TextListener tl = triggerExtraction();
-
- text = tl._text.toString();
- if(! text.endsWith("\n")) {
- text = text + "\n";
- }
- } catch(IOException e) {
- throw new IllegalStateException(e);
- }
-
- return text;
- }
-
- private TextListener triggerExtraction() throws IOException {
- TextListener tl = new TextListener();
- FormatTrackingHSSFListener ft = new FormatTrackingHSSFListener(tl);
- tl._ft = ft;
-
- // Register and process
- HSSFEventFactory factory = new HSSFEventFactory();
- HSSFRequest request = new HSSFRequest();
- request.addListenerForAllRecords(ft);
-
- factory.processWorkbookEvents(request, _dir);
-
- return tl;
- }
-
- private class TextListener implements HSSFListener {
- FormatTrackingHSSFListener _ft;
- private SSTRecord sstRecord;
-
- private final List<String> sheetNames;
- final StringBuilder _text = new StringBuilder();
- private int sheetNum = -1;
- private int rowNum;
-
- private boolean outputNextStringValue;
- private int nextRow = -1;
-
- public TextListener() {
- sheetNames = new ArrayList<>();
- }
- public void processRecord(org.apache.poi.hssf.record.Record record) {
- String thisText = null;
- int thisRow = -1;
-
- switch(record.getSid()) {
- case BoundSheetRecord.sid:
- BoundSheetRecord sr = (BoundSheetRecord)record;
- sheetNames.add(sr.getSheetname());
- break;
- case BOFRecord.sid:
- BOFRecord bof = (BOFRecord)record;
- if(bof.getType() == BOFRecord.TYPE_WORKSHEET) {
- sheetNum++;
- rowNum = -1;
-
- if(_includeSheetNames) {
- if(_text.length() > 0) _text.append("\n");
- _text.append(sheetNames.get(sheetNum));
- }
- }
- break;
- case SSTRecord.sid:
- sstRecord = (SSTRecord)record;
- break;
-
- case FormulaRecord.sid:
- FormulaRecord frec = (FormulaRecord) record;
- thisRow = frec.getRow();
-
- if(_formulasNotResults) {
- thisText = HSSFFormulaParser.toFormulaString(null,
frec.getParsedExpression());
- } else {
- if(frec.hasCachedResultString()) {
- // Formula result is a string
- // This is stored in the next record
- outputNextStringValue = true;
- nextRow = frec.getRow();
- } else {
- thisText = _ft.formatNumberDateCell(frec);
- }
- }
- break;
- case StringRecord.sid:
- if(outputNextStringValue) {
- // String for formula
- StringRecord srec = (StringRecord)record;
- thisText = srec.getString();
- thisRow = nextRow;
- outputNextStringValue = false;
- }
- break;
- case LabelRecord.sid:
- LabelRecord lrec = (LabelRecord) record;
- thisRow = lrec.getRow();
- thisText = lrec.getValue();
- break;
- case LabelSSTRecord.sid:
- LabelSSTRecord lsrec = (LabelSSTRecord) record;
- thisRow = lsrec.getRow();
- if(sstRecord == null) {
- throw new IllegalStateException("No SST record found");
- }
- thisText = sstRecord.getString(lsrec.getSSTIndex()).toString();
- break;
- case NoteRecord.sid:
- NoteRecord nrec = (NoteRecord) record;
- thisRow = nrec.getRow();
- // TODO: Find object to match nrec.getShapeId()
- break;
- case NumberRecord.sid:
- NumberRecord numrec = (NumberRecord) record;
- thisRow = numrec.getRow();
- thisText = _ft.formatNumberDateCell(numrec);
- break;
- default:
- break;
- }
-
- if(thisText != null) {
- if(thisRow != rowNum) {
- rowNum = thisRow;
- if(_text.length() > 0)
- _text.append("\n");
- } else {
- _text.append("\t");
- }
- _text.append(thisText);
- }
- }
- }
+ }
+
+ /**
+ * Would return the document information metadata for the document,
+ * if we supported it
+ */
+ @Override
+ public DocumentSummaryInformation getDocSummaryInformation() {
+ throw new IllegalStateException("Metadata extraction not supported in
streaming mode, please use ExcelExtractor");
+ }
+
+ /**
+ * Would return the summary information metadata for the document,
+ * if we supported it
+ */
+ @Override
+ public SummaryInformation getSummaryInformation() {
+ throw new IllegalStateException("Metadata extraction not supported in
streaming mode, please use ExcelExtractor");
+ }
+
+
+ /**
+ * Would control the inclusion of cell comments from the document,
+ * if we supported it
+ */
+ public void setIncludeCellComments(boolean includeComments) {
+ throw new IllegalStateException("Comment extraction not supported in
streaming mode, please use ExcelExtractor");
+ }
+
+ /**
+ * Would control the inclusion of headers and footers from the document,
+ * if we supported it
+ */
+ public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
+ throw new IllegalStateException("Header/Footer extraction not
supported in streaming mode, please use ExcelExtractor");
+ }
+
+
+ /**
+ * Should sheet names be included? Default is true
+ */
+ public void setIncludeSheetNames(boolean includeSheetNames) {
+ _includeSheetNames = includeSheetNames;
+ }
+
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ */
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ _formulasNotResults = formulasNotResults;
+ }
+
+
+ /**
+ * Retreives the text contents of the file
+ */
+ public String getText() {
+ String text;
+ try {
+ TextListener tl = triggerExtraction();
+
+ text = tl._text.toString();
+ if (!text.endsWith("\n")) {
+ text = text + "\n";
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+
+ return text;
+ }
+
+ private TextListener triggerExtraction() throws IOException {
+ TextListener tl = new TextListener();
+ FormatTrackingHSSFListener ft = new FormatTrackingHSSFListener(tl);
+ tl._ft = ft;
+
+ // Register and process
+ HSSFEventFactory factory = new HSSFEventFactory();
+ HSSFRequest request = new HSSFRequest();
+ request.addListenerForAllRecords(ft);
+
+ factory.processWorkbookEvents(request, _dir);
+
+ return tl;
+ }
+
+ private class TextListener implements HSSFListener {
+ FormatTrackingHSSFListener _ft;
+ private SSTRecord sstRecord;
+
+ private final List<String> sheetNames;
+ final StringBuilder _text = new StringBuilder();
+ private int sheetNum = -1;
+ private int rowNum;
+
+ private boolean outputNextStringValue;
+ private int nextRow = -1;
+
+ public TextListener() {
+ sheetNames = new ArrayList<>();
+ }
+
+ public void processRecord(org.apache.poi.hssf.record.Record record) {
+ String thisText = null;
+ int thisRow = -1;
+
+ switch (record.getSid()) {
+ case BoundSheetRecord.sid:
+ BoundSheetRecord sr = (BoundSheetRecord) record;
+ sheetNames.add(sr.getSheetname());
+ break;
+ case BOFRecord.sid:
+ BOFRecord bof = (BOFRecord) record;
+ if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
+ sheetNum++;
+ rowNum = -1;
+
+ if (_includeSheetNames) {
+ if (_text.length() > 0) _text.append("\n");
+ _text.append(sheetNames.get(sheetNum));
+ }
+ }
+ break;
+ case SSTRecord.sid:
+ sstRecord = (SSTRecord) record;
+ break;
+
+ case FormulaRecord.sid:
+ FormulaRecord frec = (FormulaRecord) record;
+ thisRow = frec.getRow();
+
+ if (_formulasNotResults) {
+ thisText = HSSFFormulaParser.toFormulaString(null,
frec.getParsedExpression());
+ } else {
+ if (frec.hasCachedResultString()) {
+ // Formula result is a string
+ // This is stored in the next record
+ outputNextStringValue = true;
+ nextRow = frec.getRow();
+ } else {
+ thisText = _ft.formatNumberDateCell(frec);
+ }
+ }
+ break;
+ case StringRecord.sid:
+ if (outputNextStringValue) {
+ // String for formula
+ StringRecord srec = (StringRecord) record;
+ thisText = srec.getString();
+ thisRow = nextRow;
+ outputNextStringValue = false;
+ }
+ break;
+ case LabelRecord.sid:
+ LabelRecord lrec = (LabelRecord) record;
+ thisRow = lrec.getRow();
+ thisText = lrec.getValue();
+ break;
+ case LabelSSTRecord.sid:
+ LabelSSTRecord lsrec = (LabelSSTRecord) record;
+ thisRow = lsrec.getRow();
+ if (sstRecord == null) {
+ throw new IllegalStateException("No SST record found");
+ }
+ thisText =
sstRecord.getString(lsrec.getSSTIndex()).toString();
+ break;
+ case NoteRecord.sid:
+ NoteRecord nrec = (NoteRecord) record;
+ thisRow = nrec.getRow();
+ // TODO: Find object to match nrec.getShapeId()
+ break;
+ case NumberRecord.sid:
+ NumberRecord numrec = (NumberRecord) record;
+ thisRow = numrec.getRow();
+ thisText = _ft.formatNumberDateCell(numrec);
+ break;
+ default:
+ break;
+ }
+
+ if (thisText != null) {
+ if (thisRow != rowNum) {
+ rowNum = thisRow;
+ if (_text.length() > 0)
+ _text.append("\n");
+ } else {
+ _text.append("\t");
+ }
+ _text.append(thisText);
+ }
+ }
+ }
@Override
public void setCloseFilesystem(boolean doCloseFilesystem) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]