This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-2224 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 5d615497314a4bcb2eb23bbce5949d5f5ebdd48f Author: tallison <[email protected]> AuthorDate: Tue Dec 10 14:50:40 2019 -0500 TIKA-2224 -- clean up imports, missing locales and missing headers --- .../parser/microsoft/onenote/ExtendedGUID.java | 3 +- .../apache/tika/parser/microsoft/onenote/GUID.java | 3 +- .../microsoft/onenote/GlobalIdTableEntry3FNDX.java | 16 +++++ .../microsoft/onenote/GlobalIdTableEntryFNDX.java | 16 +++++ .../parser/microsoft/onenote/OneNoteParser.java | 7 ++- .../microsoft/onenote/OneNoteTreeWalker.java | 2 - .../microsoft/onenote/OneNoteParserTest.java | 24 ++++++-- .../tika/parser/onenote/OneNoteParserTest.java | 68 ---------------------- 8 files changed, 58 insertions(+), 81 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java index 56f5b90..2b46de2 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/ExtendedGUID.java @@ -16,6 +16,7 @@ */ package org.apache.tika.parser.microsoft.onenote; +import java.util.Locale; import java.util.Objects; class ExtendedGUID implements Comparable<ExtendedGUID> { @@ -59,7 +60,7 @@ class ExtendedGUID implements Comparable<ExtendedGUID> { @Override public String toString() { - return String.format("%s [%d]", guid, n); + return String.format(Locale.US, "%s [%d]", guid, n); } public GUID getGuid() { diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java index b25c890..371e328 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GUID.java @@ -20,6 +20,7 @@ import org.apache.commons.lang3.StringUtils; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Locale; class GUID implements Comparable<GUID> { int[] guid; @@ -107,7 +108,7 @@ class GUID implements Comparable<GUID> { sb.append(StringUtils.leftPad(Integer.toHexString(guid[i]), 2, '0')); } sb.append("}"); - return sb.toString().toUpperCase(); + return sb.toString().toUpperCase(Locale.US); } public static GUID nil() { diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry3FNDX.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry3FNDX.java index bc66423..0cc3050 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry3FNDX.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntry3FNDX.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.parser.microsoft.onenote; public class GlobalIdTableEntry3FNDX { diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntryFNDX.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntryFNDX.java index 3fe7d65..16d0016 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntryFNDX.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/GlobalIdTableEntryFNDX.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.parser.microsoft.onenote; public class GlobalIdTableEntryFNDX { diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java index 5112721..32b8b23 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java @@ -25,8 +25,6 @@ import org.apache.tika.mime.MediaType; import org.apache.tika.parser.AbstractParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.XHTMLContentHandler; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -46,8 +44,11 @@ import java.util.Set; */ public class OneNoteParser extends AbstractParser { - private static final Logger LOG = LoggerFactory.getLogger(OneNoteParser.class); private static final Map<MediaType, List<String>> typesMap = new HashMap<>(); + /** + * Serial version UID + */ + private static final long serialVersionUID = -5504243905998074168L; static { // All types should be 4 bytes long, space padded as needed diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java index 45eda57..ff2d192 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteTreeWalker.java @@ -25,8 +25,6 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.rtf.RTFParser; import org.apache.tika.sax.EmbeddedContentHandler; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.SAXException; diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java index c7ad06a..e117fa4 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java @@ -1,13 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.tika.parser.microsoft.onenote; -import org.apache.commons.lang3.StringUtils; import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; -import org.junit.Assert; import org.junit.Test; -import java.util.List; - public class OneNoteParserTest extends TikaTest { //TODO: rename test files testOneNote... @@ -18,8 +30,8 @@ public class OneNoteParserTest extends TikaTest { */ @Test public void testOneNote2013Doc1() throws Exception { - List<Metadata> metadataList = getRecursiveMetadata("Sample1.one"); - debug(metadataList); +// List<Metadata> metadataList = getRecursiveMetadata("Sample1.one"); + // debug(metadataList); Metadata metadata = new Metadata(); String txt = getText("Sample1.one", metadata); assertNoJunk(txt); diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/onenote/OneNoteParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/onenote/OneNoteParserTest.java deleted file mode 100644 index 86900ba..0000000 --- a/tika-parsers/src/test/java/org/apache/tika/parser/onenote/OneNoteParserTest.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.apache.tika.parser.onenote; - -import org.apache.commons.lang3.StringUtils; -import org.apache.tika.TikaTest; -import org.apache.tika.metadata.Metadata; -import org.junit.Assert; -import org.junit.Test; - -public class OneNoteParserTest extends TikaTest { - - /** - * This is the sample document that is automatically created from onenote 2013. - */ - @Test - public void testOneNote2013Doc1() throws Exception { - Metadata metadata = new Metadata(); - String txtOut = getText("Sample1.one", metadata); - - Assert.assertFalse("Should not include font names in the text", StringUtils.contains(txtOut, "Calibri")); - Assert.assertFalse("Should not include UTF-16 property values that are garbage", StringUtils.contains(txtOut, "夂菲䈿Ǡ�")); - - // No need to assert the routine garbage that shows up on all onenote files. - } - - @Test - public void testOneNote2013Doc2() throws Exception { - Metadata metadata = new Metadata(); - String txtOut = getText("Section1SheetTitle.one", metadata); - - Assert.assertTrue(txtOut.contains("wow this is neat")); - Assert.assertTrue(txtOut.contains("neat info about totally killin it bro")); - Assert.assertTrue(txtOut.contains("Section1TextArea1")); - Assert.assertTrue(txtOut.contains("Section1HeaderTitle")); - Assert.assertTrue(txtOut.contains("Section1TextArea2")); - - Assert.assertFalse("Should not include font names in the text", txtOut.contains("Calibri")); - Assert.assertFalse("Should not include UTF-16 property values that are garbage", txtOut.contains("夂菲䈿Ǡ�")); - } - - @Test - public void testOneNote2013Doc3() throws Exception { - Metadata metadata = new Metadata(); - String txtOut = getText("Section2SheetTitle.one", metadata); - - Assert.assertTrue(txtOut.contains("awesome information about sports or some crap like that.")); - Assert.assertTrue(txtOut.contains("Quit doing horrible things to me. Dang you. ")); - Assert.assertTrue(txtOut.contains("Section2TextArea1")); - Assert.assertTrue(txtOut.contains("Section2HeaderTitle")); - Assert.assertTrue(txtOut.contains("Section2TextArea2")); - - Assert.assertFalse("Should not include font names in the text", txtOut.contains("Calibri")); - Assert.assertFalse("Should not include UTF-16 property values that are garbage", txtOut.contains("夂菲䈿Ǡ�")); - } - - @Test - public void testOneNote2013Doc4() throws Exception { - Metadata metadata = new Metadata(); - String txtOut = getText("Section3SheetTitle.one", metadata); - - Assert.assertTrue(txtOut.contains("way too much information about poptarts to handle.")); - Assert.assertTrue(txtOut.contains("Section3TextArea1")); - Assert.assertTrue(txtOut.contains("Section3HeaderTitle")); - Assert.assertTrue(txtOut.contains("Section3TextArea2")); - - Assert.assertFalse("Should not include font names in the text", txtOut.contains("Calibri")); - Assert.assertFalse("Should not include UTF-16 property values that are garbage", txtOut.contains("夂菲䈿Ǡ�")); - } -}
