[
https://issues.apache.org/jira/browse/MARMOTTA-593?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14604959#comment-14604959
]
ASF GitHub Bot commented on MARMOTTA-593:
-----------------------------------------
Github user ansell commented on a diff in the pull request:
https://github.com/apache/marmotta/pull/12#discussion_r33428047
--- Diff:
commons/marmotta-sesame-tools/marmotta-rio-rdfhdt/src/main/java/org/apache/marmotta/commons/sesame/rio/rdfhdt/RDFHDTParser.java
---
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.commons.sesame.rio.rdfhdt;
+
+import static
org.apache.marmotta.commons.sesame.rio.rdfhdt.RDFHDTConstants.MAGIC;
+import info.aduna.io.IOUtil;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.Properties;
+
+import org.openrdf.model.ValueFactory;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.helpers.RDFParserBase;
+import org.rdfhdt.hdt.exceptions.IllegalFormatException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A parser for parsing RDF HDT.
+ * <p/>
+ * Author: Junyue Wang
+ */
+public class RDFHDTParser extends RDFParserBase {
+
+ private static Logger log = LoggerFactory.getLogger(RDFHDTParser.class);
+
+ /**
+ * Creates a new RDFParserBase that will use a
+ * {@link org.openrdf.model.impl.ValueFactoryImpl} to create RDF model
+ * objects.
+ */
+ public RDFHDTParser() {
+ }
+
+ /**
+ * Creates a new RDFParserBase that will use the supplied ValueFactory
to
+ * create RDF model objects.
+ *
+ * @param valueFactory
+ * A ValueFactory.
+ */
+ public RDFHDTParser(ValueFactory valueFactory) {
+ super(valueFactory);
+ }
+
+ /**
+ * Gets the RDF format that this parser can parse.
+ */
+ @Override
+ public RDFFormat getRDFFormat() {
+ return RDFHDTFormat.FORMAT;
+ }
+
+ /**
+ * Parses the data from the supplied InputStream, using the supplied
baseURI
+ * to resolve any relative URI references.
+ *
+ * @param in
+ * The InputStream from which to read the data.
+ * @param baseURI
+ * The URI associated with the data in the InputStream.
+ * @throws java.io.IOException
+ * If an I/O error occurred while data was read from the
+ * InputStream.
+ * @throws org.openrdf.rio.RDFParseException
+ * If the parser has found an unrecoverable parse error.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the configured statement handler has encountered an
+ * unrecoverable error.
+ */
+ @Override
+ public void parse(InputStream in, String baseURI) throws IOException,
+ RDFParseException, RDFHandlerException {
+ Preconditions.checkNotNull(baseURI);
+
+ setBaseURI(baseURI);
+
+ rdfHandler.startRDF();
+ DataInputStream input = new DataInputStream(new
BufferedInputStream(in));
+ parseGlobalInfo(input);
+
+ rdfHandler.endRDF();
+ }
+
+ private void parseGlobalInfo(DataInputStream in) throws
RDFParseException,
+ IOException {
+ parseMagic(in);
+ byte type = parseType(in);
+ byte globalInfoType = 1;
+ if (type != globalInfoType) {
+ reportFatalError("The global Information setion type
should be: "
+ + globalInfoType);
+ }
+ String format = parseFormat(in);
+ if(!RDFHDTConstants.HDT_CONTAINER.equals(format)){
+ throw new IllegalFormatException("This software cannot
open this version of HDT File");
+ }
+
+ parseProperties(in);
+ }
+
+ private void parseMagic(DataInputStream input) throws IOException,
+ RDFParseException {
+
+ // Check magic number
+ byte[] magicNumber = IOUtil.readBytes(input, MAGIC.length);
+ if (!Arrays.equals(magicNumber, MAGIC)) {
+ reportFatalError("File does not contain a binary RDF
document");
+ }
+ }
+
+ private byte parseType(DataInputStream input) throws IOException {
+ return input.readByte();
+ }
+
+ private String parseFormat(DataInputStream input) throws IOException{
+ return parseString(input);
+
+ }
+
+ private Properties parseProperties(DataInputStream input) throws
IOException{
+ Properties properties = new Properties();
+ String propertiesStr = this.parseString(input);
+ for(String item : propertiesStr.split(";")) {
+ int pos = item.indexOf('=');
+ if(pos!=-1) {
+ String property = item.substring(0, pos);
+ String value = item.substring(pos+1);
+ properties.put(property, value);
--- End diff --
You get to the RDFHandler using the following check (it may be null):
if(getRDFHandler() != null) {
getRDFHandler().handleNamespace(prefix, uriString);
}
> RDF HDT implementation for Sesame RIO
> -------------------------------------
>
> Key: MARMOTTA-593
> URL: https://issues.apache.org/jira/browse/MARMOTTA-593
> Project: Marmotta
> Issue Type: Task
> Components: KiWi Triple Store
> Reporter: Sergio Fernández
> Labels: gsoc, gsoc2015, hdt, java, linkeddata, rdf, sesame
> Original Estimate: 480h
> Remaining Estimate: 480h
>
> [RDF HDT|http://www.rdfhdt.org] is a compact data structure and binary
> serialization format for RDF that keeps big datasets compressed to save space
> while maintaining search and browse operations without prior decompression.
> This makes it an ideal format for storing and sharing RDF datasets on the Web.
> Currently the [Java
> Implementation|http://www.rdfhdt.org/manual-of-the-java-hdt-library/] only
> provides bindings for jena RIOT, with a license that does not enable it to be
> integrated into the main Sesame codebase, or any Apache codebase.
> The idea consist on implementing an Apache licensed implementation of RDF HDT
> from scratch and support the [Sesame
> RIO|http://rdf4j.org/sesame/2.8/apidocs/org/openrdf/rio/Rio.html]
> infrastructure (RDFParser/RDFWriter/RDFHandler).
> The implementation would require to have good knowledge of Java programming,
> plus some basic understanding of parsers concepts and the RDF and HDT data
> models.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)