This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 9261fe2 fix for TIKA-3389 contributed by kamaci (#436)
9261fe2 is described below
commit 9261fe2bae49a67c37b8485bc581e19b338f603f
Author: Furkan KAMACI <[email protected]>
AuthorDate: Mon May 10 18:20:09 2021 +0300
fix for TIKA-3389 contributed by kamaci (#436)
Thank you @kamaci !
---
.../apache/tika/eval/app/XMLErrorLogUpdater.java | 28 ++---
.../org/apache/tika/eval/app/reports/Report.java | 96 ++++++++--------
.../tika/eval/app/reports/ResultsReporter.java | 23 ++--
.../tika/eval/app/tools/TopCommonTokenCounter.java | 34 +++---
.../java/org/apache/tika/example/Language.java | 18 +--
.../java/org/apache/tika/example/ZipListFiles.java | 7 +-
.../tika/langdetect/tika/LanguageProfile.java | 22 +++-
.../parser/microsoft/AbstractPOIFSExtractor.java | 8 +-
.../tika/parser/microsoft/HSLFExtractor.java | 121 ++++++++++-----------
.../tika/parser/microsoft/WordExtractor.java | 8 +-
.../tika/parser/microsoft/chm/ChmCommons.java | 30 ++---
.../org/apache/tika/parser/gdal/GDALParser.java | 61 +++++------
.../org/apache/tika/parser/grib/GribParser.java | 64 +++++------
.../tika/parser/sqlite3/SQLite3DBParser.java | 8 +-
.../metadata/serialization/JsonFetchEmitTuple.java | 11 +-
.../serialization/JsonFetchEmitTupleList.java | 20 ++--
.../apache/tika/transcribe/AmazonTranscribe.java | 10 +-
.../tika/language/translate/MosesTranslator.java | 16 +--
18 files changed, 299 insertions(+), 286 deletions(-)
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
index c5d0bac..c4637f9 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/XMLErrorLogUpdater.java
@@ -133,13 +133,14 @@ public class XMLErrorLogUpdater {
int containerId = getContainerId(filePath);
String sql = "SELECT count(1) from " + errorTableName + " where "
+ Cols.CONTAINER_ID +
" = " + containerId + " or " + Cols.FILE_PATH + "='" +
filePath + "'";
- ResultSet rs = statement.executeQuery(sql);
-
- //now try to figure out if that file already exists
- //in parse errors
- int hitCount = 0;
- while (rs.next()) {
- hitCount = rs.getInt(1);
+ int hitCount;
+ try (ResultSet rs = statement.executeQuery(sql)) {
+ //now try to figure out if that file already exists
+ //in parse errors
+ hitCount = 0;
+ while (rs.next()) {
+ hitCount = rs.getInt(1);
+ }
}
//if it does, update all records matching that path or container id
@@ -179,13 +180,14 @@ public class XMLErrorLogUpdater {
String sql = "SELECT " + Cols.CONTAINER_ID.name() + " from " +
ExtractProfiler.CONTAINER_TABLE.getName() + " where " +
Cols.FILE_PATH + " ='" +
resourceId + "'";
- ResultSet rs = statement.executeQuery(sql);
- int resultCount = 0;
- while (rs.next()) {
- containerId = rs.getInt(1);
- resultCount++;
+ int resultCount;
+ try (ResultSet rs = statement.executeQuery(sql)) {
+ resultCount = 0;
+ while (rs.next()) {
+ containerId = rs.getInt(1);
+ resultCount++;
+ }
}
- rs.close();
if (resultCount == 0) {
LOG.warn("Should have found a container for: {}", resourceId);
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/Report.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/Report.java
index 0f37efc..15dd794 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/Report.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/Report.java
@@ -63,62 +63,64 @@ public class Report {
}
private void dumpXLSX(Connection c, Path reportsRoot) throws IOException,
SQLException {
- Statement st = c.createStatement();
- Path out = reportsRoot.resolve(reportFilename);
- Files.createDirectories(out.getParent());
-
- SXSSFWorkbook wb = new SXSSFWorkbook(new XSSFWorkbook(), 100, true,
true);
- wb.setCompressTempFiles(true);
- defaultIntegerFormatter.reset(wb.getXSSFWorkbook());
- defaultDoubleFormatter.reset(wb.getXSSFWorkbook());
- sqlCellStyle = wb.createCellStyle();
- sqlCellStyle.setVerticalAlignment(VerticalAlignment.TOP);
- sqlCellStyle.setWrapText(true);
-
-
- try {
- dumpReportToWorkbook(st, wb);
- } finally {
- try (OutputStream os = Files.newOutputStream(out)) {
- wb.write(os);
+ try (Statement st = c.createStatement()) {
+ Path out = reportsRoot.resolve(reportFilename);
+ Files.createDirectories(out.getParent());
+
+ SXSSFWorkbook wb = new SXSSFWorkbook(new XSSFWorkbook(), 100,
true, true);
+ wb.setCompressTempFiles(true);
+ defaultIntegerFormatter.reset(wb.getXSSFWorkbook());
+ defaultDoubleFormatter.reset(wb.getXSSFWorkbook());
+ sqlCellStyle = wb.createCellStyle();
+ sqlCellStyle.setVerticalAlignment(VerticalAlignment.TOP);
+ sqlCellStyle.setWrapText(true);
+
+ try {
+ dumpReportToWorkbook(st, wb);
} finally {
- wb.dispose();
+ try (OutputStream os = Files.newOutputStream(out)) {
+ wb.write(os);
+ } finally {
+ wb.dispose();
+ }
}
}
}
private void dumpReportToWorkbook(Statement st, SXSSFWorkbook wb)
- throws IOException, SQLException {
- ResultSet rs = st.executeQuery(sql);
-
- SXSSFSheet sheet = wb.createSheet("tika-eval Report");
- sheet.trackColumnForAutoSizing(0);
-
- int rowCount = 0;
- ResultSetMetaData meta = rs.getMetaData();
- Set<String> colNames = new HashSet<>();
-
- Row xssfRow = sheet.createRow(rowCount++);
- //write headers and cache them to check against styles
- for (int i = 1; i <= meta.getColumnCount(); i++) {
- Cell cell = xssfRow.createCell(i - 1);
- cell.setCellValue(meta.getColumnLabel(i));
- colNames.add(meta.getColumnLabel(i));
- }
+ throws SQLException {
+ SXSSFSheet sheet;
+ try (ResultSet rs = st.executeQuery(sql)) {
+
+ sheet = wb.createSheet("tika-eval Report");
+ sheet.trackColumnForAutoSizing(0);
+
+ int rowCount = 0;
+ ResultSetMetaData meta = rs.getMetaData();
+ Set<String> colNames = new HashSet<>();
- ResultSetMetaData resultSetMetaData = rs.getMetaData();
- while (rs.next()) {
- xssfRow = sheet.createRow(rowCount++);
+ Row xssfRow = sheet.createRow(rowCount++);
+ //write headers and cache them to check against styles
for (int i = 1; i <= meta.getColumnCount(); i++) {
Cell cell = xssfRow.createCell(i - 1);
- XSLXCellFormatter formatter =
cellFormatters.get(meta.getColumnLabel(i));
- if (formatter == null) {
- formatter =
getDefaultFormatter(resultSetMetaData.getColumnType(i));
- }
- if (formatter != null) {
- formatter.applyStyleAndValue(i, rs, cell);
- } else {
- writeCell(meta, i, rs, cell);
+ cell.setCellValue(meta.getColumnLabel(i));
+ colNames.add(meta.getColumnLabel(i));
+ }
+
+ ResultSetMetaData resultSetMetaData = rs.getMetaData();
+ while (rs.next()) {
+ xssfRow = sheet.createRow(rowCount++);
+ for (int i = 1; i <= meta.getColumnCount(); i++) {
+ Cell cell = xssfRow.createCell(i - 1);
+ XSLXCellFormatter formatter =
cellFormatters.get(meta.getColumnLabel(i));
+ if (formatter == null) {
+ formatter =
getDefaultFormatter(resultSetMetaData.getColumnType(i));
+ }
+ if (formatter != null) {
+ formatter.applyStyleAndValue(i, rs, cell);
+ } else {
+ writeCell(meta, i, rs, cell);
+ }
}
}
}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/ResultsReporter.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/ResultsReporter.java
index 3d420e3..b37b6fc 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/ResultsReporter.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/reports/ResultsReporter.java
@@ -303,17 +303,18 @@ public class ResultsReporter {
}
public void execute(Connection c, Path reportsDirectory) throws
IOException, SQLException {
- Statement st = c.createStatement();
- for (String sql : before) {
- LOG.info("processing before: {}", sql);
- st.execute(sql);
- }
- for (Report r : reports) {
- r.writeReport(c, reportsDirectory);
- }
- for (String sql : after) {
- LOG.info("processing after: {}", sql);
- st.execute(sql);
+ try (Statement st = c.createStatement()) {
+ for (String sql : before) {
+ LOG.info("processing before: {}", sql);
+ st.execute(sql);
+ }
+ for (Report r : reports) {
+ r.writeReport(c, reportsDirectory);
+ }
+ for (String sql : after) {
+ LOG.info("processing after: {}", sql);
+ st.execute(sql);
+ }
}
}
}
diff --git
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/tools/TopCommonTokenCounter.java
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/tools/TopCommonTokenCounter.java
index 90e30c1..f7475e5 100644
---
a/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/tools/TopCommonTokenCounter.java
+++
b/tika-eval/tika-eval-app/src/main/java/org/apache/tika/eval/app/tools/TopCommonTokenCounter.java
@@ -118,25 +118,25 @@ public class TopCommonTokenCounter {
return;
}
Files.createDirectories(path.getParent());
- BufferedWriter writer = Files.newBufferedWriter(path,
StandardCharsets.UTF_8);
- StringBuilder sb = new StringBuilder();
- writer.write(LICENSE);
- writer.write("#DOC_COUNT\t" + totalDocs + "\n");
- writer.write("#SUM_DOC_FREQS\t" + sumDocFreqs + "\n");
- writer.write("#SUM_TERM_FREQS\t" + sumTotalTermFreqs + "\n");
- writer.write("#UNIQUE_TERMS\t" + uniqueTerms + "\n");
- writer.write("#TOKEN\tDOCFREQ\tTERMFREQ\n");
- //add these tokens no matter what
- for (String t : INCLUDE_LIST) {
- writer.write(t);
- writer.newLine();
- }
- for (TokenDFTF tp : queue.getArray()) {
- writer.write(getRow(sb, tp) + "\n");
+ try (BufferedWriter writer = Files.newBufferedWriter(path,
StandardCharsets.UTF_8)) {
+ StringBuilder sb = new StringBuilder();
+ writer.write(LICENSE);
+ writer.write("#DOC_COUNT\t" + totalDocs + "\n");
+ writer.write("#SUM_DOC_FREQS\t" + sumDocFreqs + "\n");
+ writer.write("#SUM_TERM_FREQS\t" + sumTotalTermFreqs + "\n");
+ writer.write("#UNIQUE_TERMS\t" + uniqueTerms + "\n");
+ writer.write("#TOKEN\tDOCFREQ\tTERMFREQ\n");
+ //add these tokens no matter what
+ for (String t : INCLUDE_LIST) {
+ writer.write(t);
+ writer.newLine();
+ }
+ for (TokenDFTF tp : queue.getArray()) {
+ writer.write(getRow(sb, tp) + "\n");
+ }
+ writer.flush();
}
- writer.flush();
- writer.close();
}
private static String getRow(StringBuilder sb, TokenDFTF tp) {
diff --git a/tika-example/src/main/java/org/apache/tika/example/Language.java
b/tika-example/src/main/java/org/apache/tika/example/Language.java
index 757dcca..f8081a8 100755
--- a/tika-example/src/main/java/org/apache/tika/example/Language.java
+++ b/tika-example/src/main/java/org/apache/tika/example/Language.java
@@ -40,15 +40,15 @@ public class Language {
public static void languageDetectionWithWriter() throws IOException {
// TODO support version of LanguageWriter that doesn't need a detector.
LanguageDetector detector = new OptimaizeLangDetector().loadModels();
- LanguageWriter writer = new LanguageWriter(detector);
- writer.append("Minden emberi lény");
- writer.append(" szabadon születik és");
- writer.append(" egyenlő méltósága és");
- writer.append(" joga van.");
-
- LanguageResult result = writer.getLanguage();
- System.out.println(result.getLanguage());
- writer.close();
+ try (LanguageWriter writer = new LanguageWriter(detector)) {
+ writer.append("Minden emberi lény");
+ writer.append(" szabadon születik és");
+ writer.append(" egyenlő méltósága és");
+ writer.append(" joga van.");
+
+ LanguageResult result = writer.getLanguage();
+ System.out.println(result.getLanguage());
+ }
}
public static void languageDetectionWithHandler() throws Exception {
diff --git
a/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
b/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
index b19bac8..460fef0 100755
--- a/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
+++ b/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
@@ -37,9 +37,10 @@ public class ZipListFiles {
}
public static void listZipEntries(String path) throws IOException {
- ZipFile zip = new ZipFile(path);
- for (ZipEntry entry : Collections.list(zip.entries())) {
- System.out.println(entry.getName());
+ try (ZipFile zip = new ZipFile(path)) {
+ for (ZipEntry entry : Collections.list(zip.entries())) {
+ System.out.println(entry.getName());
+ }
}
}
}
diff --git
a/tika-langdetect/tika-langdetect-tika/src/main/java/org/apache/tika/langdetect/tika/LanguageProfile.java
b/tika-langdetect/tika-langdetect-tika/src/main/java/org/apache/tika/langdetect/tika/LanguageProfile.java
index e1da686..1bcec05 100644
---
a/tika-langdetect/tika-langdetect-tika/src/main/java/org/apache/tika/langdetect/tika/LanguageProfile.java
+++
b/tika-langdetect/tika-langdetect-tika/src/main/java/org/apache/tika/langdetect/tika/LanguageProfile.java
@@ -17,6 +17,8 @@
package org.apache.tika.langdetect.tika;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -24,6 +26,9 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Language profile based on ngram counts.
*
@@ -48,6 +53,8 @@ public class LanguageProfile {
*/
private long count = 0;
+ private static final Logger LOG =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
public LanguageProfile(int length) {
this.length = length;
}
@@ -59,9 +66,12 @@ public class LanguageProfile {
public LanguageProfile(String content, int length) {
this(length);
- ProfilingWriter writer = new ProfilingWriter(this);
- char[] ch = content.toCharArray();
- writer.write(ch, 0, ch.length);
+ try (ProfilingWriter writer = new ProfilingWriter(this)) {
+ char[] ch = content.toCharArray();
+ writer.write(ch, 0, ch.length);
+ } catch (IOException ioe) {
+ LOG.error("Unable to close stream", ioe);
+ }
}
public LanguageProfile(String content) {
@@ -100,7 +110,7 @@ public class LanguageProfile {
if (length != ngram.length()) {
throw new IllegalArgumentException(
"Unable to add an ngram of incorrect length: " +
ngram.length() + " != " +
- length);
+ length);
}
Counter counter = ngrams.get(ngram);
@@ -126,7 +136,7 @@ public class LanguageProfile {
private double distanceStandard(LanguageProfile that) {
if (length != that.length) {
throw new IllegalArgumentException("Unable to calculage distance
of language profiles" +
- " with different ngram lengths: " + that.length + " != " +
length);
+ " with different ngram lengths:
" + that.length + " != " + length);
}
double sumOfSquares = 0.0;
@@ -154,7 +164,7 @@ public class LanguageProfile {
private double distanceInterleaved(LanguageProfile that) {
if (length != that.length) {
throw new IllegalArgumentException("Unable to calculage distance
of language profiles" +
- " with different ngram lengths: " + that.length + " != " +
length);
+ " with different ngram lengths:
" + that.length + " != " + length);
}
double sumOfSquares = 0.0;
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index eb64906..19cc622 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -228,9 +228,11 @@ abstract class AbstractPOIFSExtractor {
} catch (FileNotFoundException ioe) {
contentsEntry = (DocumentEntry)
dir.getEntry("Contents");
}
- DocumentInputStream inp = new
DocumentInputStream(contentsEntry);
- byte[] contents = new byte[contentsEntry.getSize()];
- inp.readFully(contents);
+ byte[] contents;
+ try (DocumentInputStream inp = new
DocumentInputStream(contentsEntry)) {
+ contents = new byte[contentsEntry.getSize()];
+ inp.readFully(contents);
+ }
embedded = TikaInputStream.get(contents);
// Try to work out what it is
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
index f250a9e..aa89ee0 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
@@ -80,84 +80,81 @@ public class HSLFExtractor extends AbstractPOIFSExtractor {
protected void parse(DirectoryNode root, XHTMLContentHandler xhtml)
throws IOException, SAXException, TikaException {
- HSLFSlideShow ss;
List<HSLFSlide> _slides;
- try {
- ss = new HSLFSlideShow(root);
- } catch (EncryptedPowerPointFileException e) {
- throw new EncryptedDocumentException(e);
- }
-
- _slides = ss.getSlides();
+ try (HSLFSlideShow ss = new HSLFSlideShow(root)) {
+ _slides = ss.getSlides();
- xhtml.startElement("div", "class", "slideShow");
+ xhtml.startElement("div", "class", "slideShow");
- /* Iterate over slides and extract text */
- for (HSLFSlide slide : _slides) {
- xhtml.startElement("div", "class", "slide");
- HeadersFooters slideHeaderFooters =
- (officeParserConfig.isIncludeHeadersAndFooters()) ?
slide.getHeadersFooters() :
- null;
+ /* Iterate over slides and extract text */
+ for (HSLFSlide slide : _slides) {
+ xhtml.startElement("div", "class", "slide");
+ HeadersFooters slideHeaderFooters =
+ (officeParserConfig.isIncludeHeadersAndFooters()) ?
slide.getHeadersFooters() :
+ null;
- HeadersFooters notesHeadersFooters =
(officeParserConfig.isIncludeHeadersAndFooters()) ?
- ss.getNotesHeadersFooters() : null;
+ HeadersFooters notesHeadersFooters =
(officeParserConfig.isIncludeHeadersAndFooters()) ?
+ ss.getNotesHeadersFooters() : null;
- if (officeParserConfig.isIncludeHeadersAndFooters()) {
- // Slide header, if present
- if (slideHeaderFooters != null &&
slideHeaderFooters.isHeaderVisible() &&
- slideHeaderFooters.getHeaderText() != null) {
- xhtml.startElement("p", "class", "slide-header");
+ if (officeParserConfig.isIncludeHeadersAndFooters()) {
+ // Slide header, if present
+ if (slideHeaderFooters != null &&
slideHeaderFooters.isHeaderVisible() &&
+ slideHeaderFooters.getHeaderText() != null) {
+ xhtml.startElement("p", "class", "slide-header");
- xhtml.characters(slideHeaderFooters.getHeaderText());
+ xhtml.characters(slideHeaderFooters.getHeaderText());
- xhtml.endElement("p");
+ xhtml.endElement("p");
+ }
}
- }
- // Slide master, if present
- if (officeParserConfig.isIncludeSlideMasterContent()) {
- extractMaster(xhtml, slide.getMasterSheet());
- }
- // Slide text
- xhtml.startElement("div", "class", "slide-content");
- textRunsToText(xhtml, slide.getTextParagraphs());
-
- // Table text
- List<HSLFShape> shapes = getShapes(slide);
- if (shapes != null) {
- for (HSLFShape shape : shapes) {
- if (shape instanceof HSLFTable) {
- extractTableText(xhtml, (HSLFTable) shape);
+ // Slide master, if present
+ if (officeParserConfig.isIncludeSlideMasterContent()) {
+ extractMaster(xhtml, slide.getMasterSheet());
+ }
+ // Slide text
+ xhtml.startElement("div", "class", "slide-content");
+ textRunsToText(xhtml, slide.getTextParagraphs());
+
+ // Table text
+ List<HSLFShape> shapes = getShapes(slide);
+ if (shapes != null) {
+ for (HSLFShape shape : shapes) {
+ if (shape instanceof HSLFTable) {
+ extractTableText(xhtml, (HSLFTable) shape);
+ }
}
}
- }
- extractGroupText(xhtml, slide, 0);
- //end slide content
- xhtml.endElement("div");
-
- if (officeParserConfig.isIncludeHeadersAndFooters()) {
- // Slide footer, if present
- if (slideHeaderFooters != null &&
slideHeaderFooters.isFooterVisible() &&
- slideHeaderFooters.getFooterText() != null) {
- xhtml.startElement("p", "class", "slide-footer");
- xhtml.characters(slideHeaderFooters.getFooterText());
- xhtml.endElement("p");
+ extractGroupText(xhtml, slide, 0);
+ //end slide content
+ xhtml.endElement("div");
+
+ if (officeParserConfig.isIncludeHeadersAndFooters()) {
+ // Slide footer, if present
+ if (slideHeaderFooters != null &&
slideHeaderFooters.isFooterVisible() &&
+ slideHeaderFooters.getFooterText() != null) {
+ xhtml.startElement("p", "class", "slide-footer");
+ xhtml.characters(slideHeaderFooters.getFooterText());
+ xhtml.endElement("p");
+ }
}
- }
- handleComments(slide, xhtml);
- handleNotes(slide, notesHeadersFooters, xhtml);
+ handleComments(slide, xhtml);
+ handleNotes(slide, notesHeadersFooters, xhtml);
- // Now any embedded resources
- handleSlideEmbeddedResources(slide, xhtml);
+ // Now any embedded resources
+ handleSlideEmbeddedResources(slide, xhtml);
- // Slide complete
- xhtml.endElement("div");
- }
+ // Slide complete
+ xhtml.endElement("div");
+ }
- handleSlideEmbeddedPictures(ss, xhtml);
- if (officeParserConfig.isExtractMacros()) {
- extractMacros(ss, xhtml);
+ handleSlideEmbeddedPictures(ss, xhtml);
+ if (officeParserConfig.isExtractMacros()) {
+ extractMacros(ss, xhtml);
+ }
+ } catch (EncryptedPowerPointFileException e) {
+ throw new EncryptedDocumentException(e);
}
// All slides done
xhtml.endElement("div");
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
index 5ec1ea2..95ab303 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
@@ -595,9 +595,11 @@ public class WordExtractor extends AbstractPOIFSExtractor {
}
protected void parseWord6(DirectoryNode root, XHTMLContentHandler xhtml)
- throws IOException, SAXException, TikaException {
- HWPFOldDocument doc = new HWPFOldDocument(root);
- Word6Extractor extractor = new Word6Extractor(doc);
+ throws IOException, SAXException {
+ Word6Extractor extractor;
+ try (HWPFOldDocument doc = new HWPFOldDocument(root)) {
+ extractor = new Word6Extractor(doc);
+ }
for (String p : extractor.getParagraphText()) {
xhtml.element("p", p);
diff --git
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/chm/ChmCommons.java
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/chm/ChmCommons.java
index 9594750..62773d7 100644
---
a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/chm/ChmCommons.java
+++
b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/chm/ChmCommons.java
@@ -177,27 +177,17 @@ public class ChmCommons {
* @throws TikaException
*/
public static void writeFile(byte[][] buffer, String fileToBeSaved) throws
TikaException {
- FileOutputStream output = null;
- if (buffer != null && fileToBeSaved != null &&
!ChmCommons.isEmpty(fileToBeSaved)) {
- try {
- output = new FileOutputStream(fileToBeSaved);
- for (byte[] bufferEntry : buffer) {
- output.write(bufferEntry);
- }
- } catch (FileNotFoundException e) {
- throw new TikaException(e.getMessage());
- } catch (IOException e) {
- LOG.warn("problem writing tmp file", e);
- } finally {
- if (output != null) {
- try {
- output.flush();
- output.close();
- } catch (IOException e) {
- LOG.warn("problem writing tmp file", e);
- }
- }
+ if (buffer == null || fileToBeSaved == null ||
ChmCommons.isEmpty(fileToBeSaved)) {
+ return;
+ }
+ try (FileOutputStream output = new FileOutputStream(fileToBeSaved)) {
+ for (byte[] bufferEntry : buffer) {
+ output.write(bufferEntry);
}
+ } catch (FileNotFoundException e) {
+ throw new TikaException(e.getMessage());
+ } catch (IOException e) {
+ LOG.warn("problem writing tmp file", e);
}
}
diff --git
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
index 379465c..936d9af 100644
---
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
+++
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
@@ -232,30 +232,29 @@ public class GDALParser extends AbstractParser {
}
private void extractMetFromOutput(String output, Metadata met) {
- Scanner scanner = new Scanner(output);
- String currentKey = null;
- String[] headings = {"Subdatasets", "Corner Coordinates"};
- StringBuilder metVal = new StringBuilder();
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine();
- if (line.contains("=") || hasHeadings(line, headings)) {
- if (currentKey != null) {
- // time to flush this key and met val
- met.add(currentKey, metVal.toString());
- }
- metVal.setLength(0);
+ try (Scanner scanner = new Scanner(output)) {
+ String currentKey = null;
+ String[] headings = {"Subdatasets", "Corner Coordinates"};
+ StringBuilder metVal = new StringBuilder();
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine();
+ if (line.contains("=") || hasHeadings(line, headings)) {
+ if (currentKey != null) {
+ // time to flush this key and met val
+ met.add(currentKey, metVal.toString());
+ }
+ metVal.setLength(0);
- String[] lineToks = line.split("=");
- currentKey = lineToks[0].trim();
- if (lineToks.length == 2) {
- metVal.append(lineToks[1]);
+ String[] lineToks = line.split("=");
+ currentKey = lineToks[0].trim();
+ if (lineToks.length == 2) {
+ metVal.append(lineToks[1]);
+ }
} else {
- metVal.append("");
+ metVal.append(line);
}
- } else {
- metVal.append(line);
- }
+ }
}
}
@@ -274,21 +273,21 @@ public class GDALParser extends AbstractParser {
private void applyPatternsToOutput(String output, Metadata metadata,
Map<Pattern, String> metadataPatterns) {
- Scanner scanner = new Scanner(output);
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine();
- for (Pattern p : metadataPatterns.keySet()) {
- Matcher m = p.matcher(line);
- if (m.find()) {
- if (metadataPatterns.get(p) != null &&
!metadataPatterns.get(p).equals("")) {
- metadata.add(metadataPatterns.get(p), m.group(1));
- } else {
- metadata.add(m.group(1), m.group(2));
+ try (Scanner scanner = new Scanner(output)) {
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine();
+ for (Pattern p : metadataPatterns.keySet()) {
+ Matcher m = p.matcher(line);
+ if (m.find()) {
+ if (metadataPatterns.get(p) != null &&
!metadataPatterns.get(p).equals("")) {
+ metadata.add(metadataPatterns.get(p), m.group(1));
+ } else {
+ metadata.add(m.group(1), m.group(2));
+ }
}
}
}
}
-
}
private String execCommand(String[] cmd) throws IOException {
diff --git
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
index e7691a9..c0069f5 100644
---
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
+++
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/grib/GribParser.java
@@ -63,45 +63,47 @@ public class GribParser extends AbstractParser {
File gribFile = tis.getFile();
try {
- NetcdfFile ncFile =
NetcdfDataset.openFile(gribFile.getAbsolutePath(), null);
-
- // first parse out the set of global attributes
- for (Attribute attr : ncFile.getGlobalAttributes()) {
- Property property = resolveMetadataKey(attr.getFullName());
- if (attr.getDataType().isString()) {
- metadata.add(property, attr.getStringValue());
- } else if (attr.getDataType().isNumeric()) {
- int value = attr.getNumericValue().intValue();
- metadata.add(property, String.valueOf(value));
+ XHTMLContentHandler xhtml;
+ try (NetcdfFile ncFile =
NetcdfDataset.openFile(gribFile.getAbsolutePath(), null)) {
+
+ // first parse out the set of global attributes
+ for (Attribute attr : ncFile.getGlobalAttributes()) {
+ Property property = resolveMetadataKey(attr.getFullName());
+ if (attr.getDataType().isString()) {
+ metadata.add(property, attr.getStringValue());
+ } else if (attr.getDataType().isNumeric()) {
+ int value = attr.getNumericValue().intValue();
+ metadata.add(property, String.valueOf(value));
+ }
}
- }
-
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
metadata);
- xhtml.startDocument();
+ xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.newline();
- xhtml.startElement("ul");
- xhtml.characters("dimensions:");
- xhtml.newline();
+ xhtml.startDocument();
- for (Dimension dim : ncFile.getDimensions()) {
- xhtml.element("li",
- dim.getFullName() + "=" +
String.valueOf(dim.getLength()) + ";");
xhtml.newline();
- }
-
- xhtml.startElement("ul");
- xhtml.characters("variables:");
- xhtml.newline();
+ xhtml.startElement("ul");
+ xhtml.characters("dimensions:");
+ xhtml.newline();
- for (Variable var : ncFile.getVariables()) {
- xhtml.element("p",
- String.valueOf(var.getDataType()) +
var.getNameAndDimensions() + ";");
- for (Attribute element : var.getAttributes()) {
- xhtml.element("li", " :" + element + ";");
+ for (Dimension dim : ncFile.getDimensions()) {
+ xhtml.element("li",
+ dim.getFullName() + "=" +
String.valueOf(dim.getLength()) + ";");
xhtml.newline();
}
+
+ xhtml.startElement("ul");
+ xhtml.characters("variables:");
+ xhtml.newline();
+
+ for (Variable var : ncFile.getVariables()) {
+ xhtml.element("p",
+ String.valueOf(var.getDataType()) +
var.getNameAndDimensions() + ";");
+ for (Attribute element : var.getAttributes()) {
+ xhtml.element("li", " :" + element + ";");
+ xhtml.newline();
+ }
+ }
}
xhtml.endElement("ul");
xhtml.endElement("ul");
diff --git
a/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-module/src/main/java/org/apache/tika/parser/sqlite3/SQLite3DBParser.java
b/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-module/src/main/java/org/apache/tika/parser/sqlite3/SQLite3DBParser.java
index 97fb82e..36e1d05 100644
---
a/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-module/src/main/java/org/apache/tika/parser/sqlite3/SQLite3DBParser.java
+++
b/tika-parsers/tika-parsers-extended/tika-parser-sqlite3-module/src/main/java/org/apache/tika/parser/sqlite3/SQLite3DBParser.java
@@ -124,10 +124,10 @@ class SQLite3DBParser extends AbstractDBParser {
try (Statement st = connection.createStatement()) {
String sql = "SELECT name FROM sqlite_master WHERE type='table'";
- ResultSet rs = st.executeQuery(sql);
-
- while (rs.next()) {
- tableNames.add(rs.getString(1));
+ try (ResultSet rs = st.executeQuery(sql)) {
+ while (rs.next()) {
+ tableNames.add(rs.getString(1));
+ }
}
}
return tableNames;
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
index 9543243..fcc7b97 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java
@@ -51,12 +51,13 @@ public class JsonFetchEmitTuple {
public static FetchEmitTuple fromJson(Reader reader) throws IOException {
- JsonParser jParser = new JsonFactory().createParser(reader);
- JsonToken token = jParser.nextToken();
- if (token != JsonToken.START_OBJECT) {
- throw new IOException("require start object, but see: " +
token.name());
+ try (JsonParser jParser = new JsonFactory().createParser(reader)) {
+ JsonToken token = jParser.nextToken();
+ if (token != JsonToken.START_OBJECT) {
+ throw new IOException("require start object, but see: " +
token.name());
+ }
+ return parseFetchEmitTuple(jParser);
}
- return parseFetchEmitTuple(jParser);
}
diff --git
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleList.java
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleList.java
index cd44928..e74bd53 100644
---
a/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleList.java
+++
b/tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleList.java
@@ -33,15 +33,17 @@ import org.apache.tika.pipes.FetchEmitTuple;
public class JsonFetchEmitTupleList {
public static List<FetchEmitTuple> fromJson(Reader reader) throws
IOException {
- JsonParser jParser = new JsonFactory().createParser(reader);
- JsonToken token = jParser.nextToken();
- if (token != JsonToken.START_ARRAY) {
- throw new IOException("require start array, but see: " +
token.name());
- }
- List<FetchEmitTuple> list = new ArrayList<>();
- while (token != JsonToken.END_ARRAY) {
- list.add(JsonFetchEmitTuple.parseFetchEmitTuple(jParser));
- token = jParser.nextToken();
+ List<FetchEmitTuple> list;
+ try (JsonParser jParser = new JsonFactory().createParser(reader)) {
+ JsonToken token = jParser.nextToken();
+ if (token != JsonToken.START_ARRAY) {
+ throw new IOException("require start array, but see: " +
token.name());
+ }
+ list = new ArrayList<>();
+ while (token != JsonToken.END_ARRAY) {
+ list.add(JsonFetchEmitTuple.parseFetchEmitTuple(jParser));
+ token = jParser.nextToken();
+ }
}
return list;
}
diff --git
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
index ff91ef3..c972fb1 100644
---
a/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
+++
b/tika-transcribe/src/main/java/org/apache/tika/transcribe/AmazonTranscribe.java
@@ -130,8 +130,9 @@ public class AmazonTranscribe implements Transcriber {
byte[] buffer = new byte[inputStream.available()];
inputStream.read(buffer);
File targetFile = new File("src/main/resources/targetFile.tmp");
- OutputStream outStream = new FileOutputStream(targetFile);
- outStream.write(buffer);
+ try (OutputStream outStream = new FileOutputStream(targetFile)) {
+ outStream.write(buffer);
+ }
targetFile.deleteOnExit();
uploadFileToBucket(targetFile, jobName);
StartTranscriptionJobRequest startTranscriptionJobRequest = new
StartTranscriptionJobRequest();
@@ -161,8 +162,9 @@ public class AmazonTranscribe implements Transcriber {
byte[] buffer = new byte[inputStream.available()];
inputStream.read(buffer);
File targetFile = new File("src/main/resources/targetFile.tmp");
- OutputStream outStream = new FileOutputStream(targetFile);
- outStream.write(buffer);
+ try (OutputStream outStream = new FileOutputStream(targetFile)) {
+ outStream.write(buffer);
+ }
targetFile.deleteOnExit();
uploadFileToBucket(targetFile, jobName);
StartTranscriptionJobRequest startTranscriptionJobRequest = new
StartTranscriptionJobRequest();
diff --git
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
index 7b87d0d..3521dcf 100644
---
a/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
+++
b/tika-translate/src/main/java/org/apache/tika/language/translate/MosesTranslator.java
@@ -76,22 +76,22 @@ public class MosesTranslator extends ExternalTranslator {
public String translate(String text, String sourceLanguage, String
targetLanguage) throws TikaException, IOException {
if (!isAvailable() || !checkCommand(buildCheckCommand(smtPath), 1))
return text;
File tmpFile = new File(TMP_FILE_NAME);
- @SuppressWarnings("resource")
- OutputStreamWriter out = new OutputStreamWriter(new
FileOutputStream(tmpFile), Charset.defaultCharset());
- out.append(text).append('\n').close();
+ try (OutputStreamWriter out = new OutputStreamWriter(new
FileOutputStream(tmpFile), Charset.defaultCharset())) {
+ out.append(text).append('\n').close();
+ }
Runtime.getRuntime().exec(buildCommand(smtPath, scriptPath), new
String[]{}, buildWorkingDirectory(scriptPath));
File tmpTranslatedFile = new File(TMP_FILE_NAME + ".translated");
StringBuilder stringBuilder = new StringBuilder();
- @SuppressWarnings("resource")
- BufferedReader reader = new BufferedReader(new InputStreamReader(
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(tmpTranslatedFile),
Charset.defaultCharset()
- ));
- String line;
- while ((line = reader.readLine()) != null) stringBuilder.append(line);
+ ))) {
+ String line;
+ while ((line = reader.readLine()) != null)
stringBuilder.append(line);
+ }
if (!tmpFile.delete() || !tmpTranslatedFile.delete()){
throw new IOException("Failed to delete temporary files.");