[ 
https://issues.apache.org/jira/browse/MARMOTTA-593?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14604957#comment-14604957
 ] 

ASF GitHub Bot commented on MARMOTTA-593:
-----------------------------------------

Github user ansell commented on a diff in the pull request:

    https://github.com/apache/marmotta/pull/12#discussion_r33428030
  
    --- Diff: 
commons/marmotta-sesame-tools/marmotta-rio-rdfhdt/src/main/java/org/apache/marmotta/commons/sesame/rio/rdfhdt/RDFHDTParser.java
 ---
    @@ -0,0 +1,183 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements. See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership. The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.marmotta.commons.sesame.rio.rdfhdt;
    +
    +import static 
org.apache.marmotta.commons.sesame.rio.rdfhdt.RDFHDTConstants.MAGIC;
    +import info.aduna.io.IOUtil;
    +
    +import java.io.BufferedInputStream;
    +import java.io.ByteArrayOutputStream;
    +import java.io.DataInputStream;
    +import java.io.EOFException;
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.io.Reader;
    +import java.util.Arrays;
    +import java.util.Properties;
    +
    +import org.openrdf.model.ValueFactory;
    +import org.openrdf.rio.RDFFormat;
    +import org.openrdf.rio.RDFHandlerException;
    +import org.openrdf.rio.RDFParseException;
    +import org.openrdf.rio.helpers.RDFParserBase;
    +import org.rdfhdt.hdt.exceptions.IllegalFormatException;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import com.google.common.base.Preconditions;
    +
    +/**
    + * A parser for parsing RDF HDT.
    + * <p/>
    + * Author: Junyue Wang
    + */
    +public class RDFHDTParser extends RDFParserBase {
    +
    +   private static Logger log = LoggerFactory.getLogger(RDFHDTParser.class);
    +
    +   /**
    +    * Creates a new RDFParserBase that will use a
    +    * {@link org.openrdf.model.impl.ValueFactoryImpl} to create RDF model
    +    * objects.
    +    */
    +   public RDFHDTParser() {
    +   }
    +
    +   /**
    +    * Creates a new RDFParserBase that will use the supplied ValueFactory 
to
    +    * create RDF model objects.
    +    *
    +    * @param valueFactory
    +    *            A ValueFactory.
    +    */
    +   public RDFHDTParser(ValueFactory valueFactory) {
    +           super(valueFactory);
    +   }
    +
    +   /**
    +    * Gets the RDF format that this parser can parse.
    +    */
    +   @Override
    +   public RDFFormat getRDFFormat() {
    +           return RDFHDTFormat.FORMAT;
    +   }
    +
    +   /**
    +    * Parses the data from the supplied InputStream, using the supplied 
baseURI
    +    * to resolve any relative URI references.
    +    *
    +    * @param in
    +    *            The InputStream from which to read the data.
    +    * @param baseURI
    +    *            The URI associated with the data in the InputStream.
    +    * @throws java.io.IOException
    +    *             If an I/O error occurred while data was read from the
    +    *             InputStream.
    +    * @throws org.openrdf.rio.RDFParseException
    +    *             If the parser has found an unrecoverable parse error.
    +    * @throws org.openrdf.rio.RDFHandlerException
    +    *             If the configured statement handler has encountered an
    +    *             unrecoverable error.
    +    */
    +   @Override
    +   public void parse(InputStream in, String baseURI) throws IOException,
    +                   RDFParseException, RDFHandlerException {
    +           Preconditions.checkNotNull(baseURI);
    +
    +           setBaseURI(baseURI);
    +
    +           rdfHandler.startRDF();
    +           DataInputStream input = new DataInputStream(new 
BufferedInputStream(in));
    +           parseGlobalInfo(input);
    +
    +           rdfHandler.endRDF();
    +   }
    +
    +   private void parseGlobalInfo(DataInputStream in) throws 
RDFParseException,
    +                   IOException {
    +           parseMagic(in);
    +           byte type = parseType(in);
    +           byte globalInfoType = 1;
    +           if (type != globalInfoType) {
    +                   reportFatalError("The global Information setion type 
should be: "
    +                                   + globalInfoType);
    +           }
    +           String format = parseFormat(in);
    +           if(!RDFHDTConstants.HDT_CONTAINER.equals(format)){
    +                   throw new IllegalFormatException("This software cannot 
open this version of HDT File");
    +           }
    +           
    +           parseProperties(in);
    +   }
    +
    +   private void parseMagic(DataInputStream input) throws IOException,
    +                   RDFParseException {
    +
    +           // Check magic number
    +           byte[] magicNumber = IOUtil.readBytes(input, MAGIC.length);
    +           if (!Arrays.equals(magicNumber, MAGIC)) {
    +                   reportFatalError("File does not contain a binary RDF 
document");
    +           }
    +   }
    +
    +   private byte parseType(DataInputStream input) throws IOException {
    +           return input.readByte();
    +   }
    +
    +   private String parseFormat(DataInputStream input) throws IOException{
    +           return parseString(input);
    +
    +   }
    +   
    +   private Properties parseProperties(DataInputStream input) throws 
IOException{
    +           Properties properties = new Properties();
    +        String propertiesStr = this.parseString(input);   
    +        for(String item : propertiesStr.split(";")) {
    +           int pos = item.indexOf('=');
    +           if(pos!=-1) {
    +                   String property = item.substring(0, pos);
    +                   String value = item.substring(pos+1);
    +                   properties.put(property, value);
    --- End diff --
    
    When these properties are namespace definitions they should be sent to the 
RDFHandler.handleNamespace method.


> RDF HDT implementation for Sesame RIO
> -------------------------------------
>
>                 Key: MARMOTTA-593
>                 URL: https://issues.apache.org/jira/browse/MARMOTTA-593
>             Project: Marmotta
>          Issue Type: Task
>          Components: KiWi Triple Store
>            Reporter: Sergio Fernández
>              Labels: gsoc, gsoc2015, hdt, java, linkeddata, rdf, sesame
>   Original Estimate: 480h
>  Remaining Estimate: 480h
>
> [RDF HDT|http://www.rdfhdt.org] is a compact data structure and binary 
> serialization format for RDF that keeps big datasets compressed to save space 
> while maintaining search and browse operations without prior decompression. 
> This makes it an ideal format for storing and sharing RDF datasets on the Web.
> Currently the [Java 
> Implementation|http://www.rdfhdt.org/manual-of-the-java-hdt-library/] only 
> provides bindings for jena RIOT, with a license that does not enable it to be 
> integrated into the main Sesame codebase, or any Apache codebase.
> The idea consist on implementing an Apache licensed implementation of RDF HDT 
> from scratch and support the [Sesame 
> RIO|http://rdf4j.org/sesame/2.8/apidocs/org/openrdf/rio/Rio.html] 
> infrastructure (RDFParser/RDFWriter/RDFHandler). 
> The implementation would require to have good knowledge of Java programming, 
> plus some basic understanding of parsers concepts and the RDF and HDT data 
> models.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to