This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-2224 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4bfda9311423e4448c35f57d62850ebb17a25f73 Author: tallison <[email protected]> AuthorDate: Fri Dec 6 09:49:55 2019 -0500 TIKA-2224 - create initial working branch for development of the one note parser --- .../parser/microsoft/onenote/OneNoteParser.java | 50 ++++++++++++++++++++++ .../services/org.apache.tika.parser.Parser | 1 + .../microsoft/onenote/OneNoteParserTest.java | 36 ++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java new file mode 100644 index 0000000..be25d49 --- /dev/null +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/onenote/OneNoteParser.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.microsoft.onenote; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +public class OneNoteParser extends AbstractParser { + private static final MediaType MEDIA_TYPE = MediaType.application("onenote"); + /** + * Serial version UID + */ + private static final long serialVersionUID = -752276948656079347L; + private static final Set<MediaType> SUPPORTED_TYPES = + Collections.singleton(MEDIA_TYPE); + + @Override + public Set<MediaType> getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + @Override + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { + + } +} diff --git a/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser b/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser index 120c28b..79d5f5d 100644 --- a/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser +++ b/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser @@ -47,6 +47,7 @@ org.apache.tika.parser.microsoft.MSOwnerFileParser org.apache.tika.parser.microsoft.OfficeParser org.apache.tika.parser.microsoft.OldExcelParser org.apache.tika.parser.microsoft.TNEFParser +org.apache.tika.parser.microsoft.onenote.OneNoteParser org.apache.tika.parser.microsoft.ooxml.OOXMLParser org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser org.apache.tika.parser.microsoft.xml.WordMLParser diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java new file mode 100644 index 0000000..541b5ef --- /dev/null +++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * <p/> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p/> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.microsoft.onenote; + +import org.apache.tika.TikaTest; +import org.apache.tika.metadata.Metadata; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class OneNoteParserTest extends TikaTest { + + @Test + public void testBasic() throws Exception { + List<Metadata> metadataList = getRecursiveMetadata("testOneNote.one"); + String[] values = metadataList.get(0).getValues("X-Parsed-By"); + assertEquals("org.apache.tika.parser.microsoft.onenote.OneNoteParser", values[1]); + //debug(metadataList); + } +}
