RYA-492 Added language support for Literals; Closes #294
Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/2396ebb8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/2396ebb8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/2396ebb8 Branch: refs/heads/master Commit: 2396ebb878d4d49cb64121f0dcf7bbc7f877ebd7 Parents: 2c1efd2 Author: eric.white <[email protected]> Authored: Fri Apr 27 15:01:34 2018 -0400 Committer: Valiyil <[email protected]> Committed: Wed May 16 11:48:24 2018 -0400 ---------------------------------------------------------------------- common/rya.api/pom.xml | 13 + .../java/org/apache/rya/api/domain/RyaType.java | 114 +++-- .../rya/api/resolver/RdfToRyaConversions.java | 114 +++-- .../rya/api/resolver/RyaToRdfConversions.java | 99 ++-- .../resolver/impl/CustomDatatypeResolver.java | 28 +- .../api/resolver/impl/RyaTypeResolverImpl.java | 25 +- .../rya/api/utils/LiteralLanguageUtils.java | 75 +++ .../org/apache/rya/api/domain/RyaTypeTest.java | 33 +- .../api/resolver/LanguageCodesTestHelper.java | 123 +++++ .../api/resolver/RdfToRyaConversionsTest.java | 133 +++++ .../apache/rya/api/resolver/RyaContextTest.java | 54 +- .../api/resolver/RyaToRdfConversionsTest.java | 127 +++++ .../test/resources/ISO-3166-1_Country_Codes.txt | 236 +++++++++ .../test/resources/ISO-639-1_Language_Codes.txt | 191 +++++++ .../test/resources/ISO-639-2_Language_Codes.txt | 504 +++++++++++++++++++ .../AggregationPipelineQueryNode.java | 225 +++++---- .../dao/SimpleMongoDBStorageStrategy.java | 21 +- .../SimpleMongoDBStorageStrategyTest.java | 55 +- extras/indexing/pom.xml | 1 + .../rya/indexing/StatementSerializer.java | 93 ++-- .../java/org/apache/rya/helper/TestFile.java | 53 ++ .../org/apache/rya/helper/TestFileUtils.java | 70 +++ .../AccumuloRyaSailFactoryLoadFilesIT.java | 190 +++++++ .../MongoDbRyaSailFactoryLoadFilesIT.java | 171 +++++++ .../resources/rdf_format_files/binary_data.brf | Bin 0 -> 757 bytes .../rdf_format_files/jsonld_data.jsonld | 17 + .../test/resources/rdf_format_files/n3_data.n3 | 14 + .../resources/rdf_format_files/nquads_data.nq | 2 + .../resources/rdf_format_files/ntriples_data.nt | 3 + .../resources/rdf_format_files/rdfjson_data.rj | 7 + .../resources/rdf_format_files/rdfxml_data.owl | 12 + .../resources/rdf_format_files/trig_data.trig | 10 + .../resources/rdf_format_files/trix_data.trix | 19 + .../resources/rdf_format_files/turtle_data.ttl | 11 + .../GeoTemporalMongoDBStorageStrategyTest.java | 19 +- .../java/org/apache/rya/reasoning/Fact.java | 87 ++-- .../apache/rya/accumulo/mr/RyaTypeWritable.java | 34 +- .../rya/accumulo/mr/GraphXInputFormatTest.java | 50 +- .../pig/StatementPatternStorageTest.java | 76 +-- .../RyaSailRepositoryConnection.java | 51 +- 40 files changed, 2704 insertions(+), 456 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/pom.xml ---------------------------------------------------------------------- diff --git a/common/rya.api/pom.xml b/common/rya.api/pom.xml index ea4fb19..5611132 100644 --- a/common/rya.api/pom.xml +++ b/common/rya.api/pom.xml @@ -115,6 +115,19 @@ under the License. </dependencies> <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <excludes> + <exclude>**/src/test/resources/**</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </pluginManagement> <plugins> <plugin> <artifactId>maven-jar-plugin</artifactId> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaType.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaType.java b/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaType.java index 8de4667..e40c8d6 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaType.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/domain/RyaType.java @@ -18,6 +18,9 @@ */ package org.apache.rya.api.domain; +import java.util.Objects; + +import org.apache.commons.lang.builder.CompareToBuilder; import org.apache.commons.lang.builder.EqualsBuilder; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; @@ -27,23 +30,47 @@ import org.eclipse.rdf4j.model.vocabulary.XMLSchema; * Date: 7/16/12 * Time: 11:45 AM */ -public class RyaType implements Comparable { +public class RyaType implements Comparable<RyaType> { private IRI dataType; private String data; + private String language; + /** + * Creates a new instance of {@link RyaType}. + */ public RyaType() { - setDataType(XMLSchema.STRING); + this(null); } + /** + * Creates a new instance of {@link RyaType} of type + * {@link XMLSchema#STRING} and with no language. + * @param data the data string. + */ public RyaType(final String data) { this(XMLSchema.STRING, data); } - + /** + * Creates a new instance of {@link RyaType} with no language. + * @param dataType the {@link IRI} data type. + * @param data the data string. + */ public RyaType(final IRI dataType, final String data) { - setDataType(dataType); - setData(data); + this(dataType, data, null); + } + + /** + * Creates a new instance of {@link RyaType}. + * @param dataType the {@link IRI} data type. + * @param data the data string. + * @param language the language code. + */ + public RyaType(final IRI dataType, final String data, final String language) { + this.dataType = dataType; + this.data = data; + this.language = language; } /** @@ -67,20 +94,40 @@ public class RyaType implements Comparable { this.data = data; } + /** + * @return the language code. + */ + public String getLanguage() { + return language; + } + + /** + * Sets the language code. + * @param language the language code. + */ + public void setLanguage(final String language) { + this.language = language; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("RyaType"); sb.append("{dataType=").append(dataType); sb.append(", data='").append(data).append('\''); + if (language != null) { + sb.append(", language='").append(language).append('\''); + } sb.append('}'); return sb.toString(); } /** - * Determine equality based on string representations of data and datatype. + * Determine equality based on string representations of data, datatype, and + * language. * @param o The object to compare with - * @return true if the other object is also a RyaType and both data and datatype match. + * @return {@code true} if the other object is also a RyaType and the data, + * datatype, and language all match. */ @Override public boolean equals(final Object o) { @@ -93,53 +140,40 @@ public class RyaType implements Comparable { final RyaType other = (RyaType) o; final EqualsBuilder builder = new EqualsBuilder() .append(getData(), other.getData()) - .append(getDataType(), other.getDataType()); + .append(getDataType(), other.getDataType()) + .append(getLanguage(), other.getLanguage()); return builder.isEquals(); } /** - * Generate a hash based on the string representations of both data and datatype. + * Generate a hash based on the string representations of data, datatype, + * and language. * @return A hash consistent with equals. */ @Override public int hashCode() { - int result = dataType != null ? dataType.hashCode() : 0; - result = 31 * result + (data != null ? data.hashCode() : 0); - return result; + return Objects.hash(dataType, data, language); } /** - * Define a natural ordering based on data and datatype. + * Define a natural ordering based on data, datatype, and language. * @param o The object to compare with - * @return 0 if both the data string and the datatype string representation match between the objects, - * where matching is defined by string comparison or both being null; - * Otherwise, an integer whose sign yields a consistent ordering. + * @return 0 if the data string, the datatype string, and the language + * string representation match between the objects, where matching is + * defined by string comparison or all being null; + * Otherwise, an integer whose sign yields a consistent ordering. */ @Override - public int compareTo(final Object o) { - int result = -1; - if (o != null && o instanceof RyaType) { - result = 0; - final RyaType other = (RyaType) o; - if (this.data != other.data) { - if (this.data == null) { - return 1; - } - if (other.data == null) { - return -1; - } - result = this.data.compareTo(other.data); - } - if (result == 0 && this.dataType != other.dataType) { - if (this.dataType == null) { - return 1; - } - if (other.dataType == null) { - return -1; - } - result = this.dataType.toString().compareTo(other.dataType.toString()); - } + public int compareTo(final RyaType o) { + if (o == null) { + return 1; } - return result; + final String dataTypeStr = getDataType() != null ? getDataType().stringValue() : null; + final String otherDataTypeStr = o.getDataType() != null ? o.getDataType().stringValue() : null; + final CompareToBuilder builder = new CompareToBuilder() + .append(getData(), o.getData()) + .append(dataTypeStr, otherDataTypeStr) + .append(getLanguage(), o.getLanguage()); + return builder.toComparison(); } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/resolver/RdfToRyaConversions.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/resolver/RdfToRyaConversions.java b/common/rya.api/src/main/java/org/apache/rya/api/resolver/RdfToRyaConversions.java index 0618c82..8554e77 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/resolver/RdfToRyaConversions.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/resolver/RdfToRyaConversions.java @@ -1,5 +1,3 @@ -package org.apache.rya.api.resolver; - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -8,9 +6,9 @@ package org.apache.rya.api.resolver; * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -18,57 +16,97 @@ package org.apache.rya.api.resolver; * specific language governing permissions and limitations * under the License. */ +package org.apache.rya.api.resolver; + +import static org.apache.rya.api.utils.LiteralLanguageUtils.UNDETERMINED_LANGUAGE; +import org.apache.log4j.Logger; import org.apache.rya.api.domain.RangeIRI; import org.apache.rya.api.domain.RangeValue; +import org.apache.rya.api.domain.RyaIRI; +import org.apache.rya.api.domain.RyaIRIRange; import org.apache.rya.api.domain.RyaSchema; import org.apache.rya.api.domain.RyaStatement; import org.apache.rya.api.domain.RyaType; import org.apache.rya.api.domain.RyaTypeRange; -import org.apache.rya.api.domain.RyaIRI; -import org.apache.rya.api.domain.RyaIRIRange; +import org.apache.rya.api.log.LogUtils; import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.util.Literals; /** - * Date: 7/17/12 - * Time: 8:34 AM + * Methods for converting values from their RDF4J object representations into + * their Rya object equivalents. */ public class RdfToRyaConversions { + private static final Logger log = Logger.getLogger(RdfToRyaConversions.class); - public static RyaIRI convertIRI(IRI iri) { - if (iri == null) return null; + /** + * Converts a {@link IRI} into a {@link RyaIRI} representation of the + * {@code iri}. + * @param iri the {@link IRI} to convert. + * @return the {@link RyaIRI} representation of the {@code iri}. + */ + public static RyaIRI convertIRI(final IRI iri) { + if (iri == null) { + return null; + } if (iri instanceof RangeIRI) { - RangeIRI riri = (RangeIRI) iri; + final RangeIRI riri = (RangeIRI) iri; return new RyaIRIRange(convertIRI(riri.getStart()), convertIRI(riri.getEnd())); } return new RyaIRI(iri.stringValue()); } - public static RyaType convertLiteral(Literal literal) { - if (literal == null) return null; + /** + * Converts a {@link Literal} into a {@link RyaType} representation of the + * {@code literal}. + * @param literal the {@link Literal} to convert. + * @return the {@link RyaType} representation of the {@code literal}. + */ + public static RyaType convertLiteral(final Literal literal) { + if (literal == null) { + return null; + } if (literal.getDatatype() != null) { + if (Literals.isLanguageLiteral(literal)) { + final String language = literal.getLanguage().get(); + if (Literals.isValidLanguageTag(language)) { + return new RyaType(literal.getDatatype(), literal.stringValue(), language); + } else { + log.warn("Invalid language (" + LogUtils.clean(language) + ") found in Literal. Defaulting to: " + UNDETERMINED_LANGUAGE); + // Replace invalid language with "und" + return new RyaType(literal.getDatatype(), literal.stringValue(), UNDETERMINED_LANGUAGE); + } + } return new RyaType(literal.getDatatype(), literal.stringValue()); } - //no language literal conversion yet return new RyaType(literal.stringValue()); } - public static RyaType convertValue(Value value) { - if (value == null) return null; + /** + * Converts a {@link Value} into a {@link RyaType} representation of the + * {@code value}. + * @param value the {@link Value} to convert. + * @return the {@link RyaType} representation of the {@code value}. + */ + public static RyaType convertValue(final Value value) { + if (value == null) { + return null; + } //assuming either IRI or Literal here - if(value instanceof Resource) { + if (value instanceof Resource) { return convertResource((Resource) value); } if (value instanceof Literal) { return convertLiteral((Literal) value); } if (value instanceof RangeValue) { - RangeValue<?> rv = (RangeValue<?>) value; + final RangeValue<?> rv = (RangeValue<?>) value; if (rv.getStart() instanceof IRI) { return new RyaIRIRange(convertIRI((IRI) rv.getStart()), convertIRI((IRI) rv.getEnd())); } else { @@ -79,25 +117,41 @@ public class RdfToRyaConversions { return null; } - public static RyaIRI convertResource(Resource subject) { - if(subject == null) return null; - if (subject instanceof BNode) { - return new RyaIRI(RyaSchema.BNODE_NAMESPACE + ((BNode) subject).getID()); + /** + * Converts a {@link Resource} into a {@link RyaIRI} representation of the + * {@code resource}. + * @param resource the {@link Resource} to convert. Generally this will be + * the subject. + * @return the {@link RyaIRI} representation of the {@code resource}. + */ + public static RyaIRI convertResource(final Resource resource) { + if (resource == null) { + return null; } - return convertIRI((IRI) subject); + if (resource instanceof BNode) { + return new RyaIRI(RyaSchema.BNODE_NAMESPACE + ((BNode) resource).getID()); + } + return convertIRI((IRI) resource); } - public static RyaStatement convertStatement(Statement statement) { - if (statement == null) return null; - Resource subject = statement.getSubject(); - IRI predicate = statement.getPredicate(); - Value object = statement.getObject(); - Resource context = statement.getContext(); + /** + * Converts a {@link Statement} into a {@link RyaStatement} representation + * of the {@code statement}. + * @param statement the {@link Statement} to convert. + * @return the {@link RyaStatement} representation of the {@code statement}. + */ + public static RyaStatement convertStatement(final Statement statement) { + if (statement == null) { + return null; + } + final Resource subject = statement.getSubject(); + final IRI predicate = statement.getPredicate(); + final Value object = statement.getObject(); + final Resource context = statement.getContext(); return new RyaStatement( convertResource(subject), convertIRI(predicate), convertValue(object), convertResource(context)); } - } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/resolver/RyaToRdfConversions.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/resolver/RyaToRdfConversions.java b/common/rya.api/src/main/java/org/apache/rya/api/resolver/RyaToRdfConversions.java index 88b79bf..27d329a 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/resolver/RyaToRdfConversions.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/resolver/RyaToRdfConversions.java @@ -1,5 +1,3 @@ -package org.apache.rya.api.resolver; - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -8,9 +6,9 @@ package org.apache.rya.api.resolver; * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -18,59 +16,98 @@ package org.apache.rya.api.resolver; * specific language governing permissions and limitations * under the License. */ +package org.apache.rya.api.resolver; +import org.apache.rya.api.domain.RyaIRI; import org.apache.rya.api.domain.RyaStatement; import org.apache.rya.api.domain.RyaType; -import org.apache.rya.api.domain.RyaIRI; +import org.apache.rya.api.utils.LiteralLanguageUtils; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; /** - * Date: 7/17/12 - * Time: 8:34 AM + * Methods for converting values from their Rya object representations into + * their RDF4J object equivalents. */ public class RyaToRdfConversions { private static final ValueFactory VF = SimpleValueFactory.getInstance(); - public static IRI convertIRI(RyaIRI iri) { - return VF.createIRI(iri.getData()); + /** + * Converts a {@link RyaIRI} into a {@link IRI} representation of the + * {@code ryaIri}. + * @param ryaIri the {@link RyaIRI} to convert. + * @return the {@link IRI} representation of the {@code ryaIri}. + */ + public static IRI convertIRI(final RyaIRI ryaIri) { + return VF.createIRI(ryaIri.getData()); } - - private static IRI convertIRI(RyaType value) { - return VF.createIRI(value.getData()); + + /** + * Converts a {@link RyaType} into a {@link IRI} representation of the + * {@code ryaType}. + * @param ryaType the {@link RyaType} to convert. + * @return the {@link IRI} representation of the {@code ryaType}. + */ + private static IRI convertIRI(final RyaType ryaType) { + return VF.createIRI(ryaType.getData()); } - public static Literal convertLiteral(RyaType literal) { - if (XMLSchema.STRING.equals(literal.getDataType())) { - return VF.createLiteral(literal.getData()); - } else { - return VF.createLiteral(literal.getData(), literal.getDataType()); + /** + * Converts a {@link RyaType} into a {@link Literal} representation of the + * {@code ryaType}. + * @param ryaType the {@link RyaType} to convert. + * @return the {@link Literal} representation of the {@code ryaType}. + */ + public static Literal convertLiteral(final RyaType ryaType) { + if (XMLSchema.STRING.equals(ryaType.getDataType())) { + return VF.createLiteral(ryaType.getData()); + } else if (RDF.LANGSTRING.equals(ryaType.getDataType())) { + final String data = ryaType.getData(); + final String language = ryaType.getLanguage(); + if (language != null && Literals.isValidLanguageTag(language)) { + return VF.createLiteral(data, language); + } else { + return VF.createLiteral(data, LiteralLanguageUtils.UNDETERMINED_LANGUAGE); + } } - //TODO: No Language support yet + return VF.createLiteral(ryaType.getData(), ryaType.getDataType()); } - public static Value convertValue(RyaType value) { + /** + * Converts a {@link RyaType} into a {@link Value} representation of the + * {@code ryaType}. + * @param ryaType the {@link RyaType} to convert. + * @return the {@link Value} representation of the {@code ryaType}. + */ + public static Value convertValue(final RyaType ryaType) { //assuming either IRI or Literal here - return (value instanceof RyaIRI || value.getDataType().equals(XMLSchema.ANYURI)) ? convertIRI(value) : convertLiteral(value); + return (ryaType instanceof RyaIRI || ryaType.getDataType().equals(XMLSchema.ANYURI)) ? convertIRI(ryaType) : convertLiteral(ryaType); } - public static Statement convertStatement(RyaStatement statement) { - assert statement != null; - if (statement.getContext() != null) { - return VF.createStatement(convertIRI(statement.getSubject()), - convertIRI(statement.getPredicate()), - convertValue(statement.getObject()), - convertIRI(statement.getContext())); + /** + * Converts a {@link RyaStatement} into a {@link Statement} representation + * of the {@code ryaStatement}. + * @param ryaStatement the {@link RyaStatement} to convert. + * @return the {@link Statement} representation of the {@code ryaStatement}. + */ + public static Statement convertStatement(final RyaStatement ryaStatement) { + assert ryaStatement != null; + if (ryaStatement.getContext() != null) { + return VF.createStatement(convertIRI(ryaStatement.getSubject()), + convertIRI(ryaStatement.getPredicate()), + convertValue(ryaStatement.getObject()), + convertIRI(ryaStatement.getContext())); } else { - return VF.createStatement(convertIRI(statement.getSubject()), - convertIRI(statement.getPredicate()), - convertValue(statement.getObject())); + return VF.createStatement(convertIRI(ryaStatement.getSubject()), + convertIRI(ryaStatement.getPredicate()), + convertValue(ryaStatement.getObject())); } } - } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/CustomDatatypeResolver.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/CustomDatatypeResolver.java b/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/CustomDatatypeResolver.java index 075b3f8..da38a8c 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/CustomDatatypeResolver.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/CustomDatatypeResolver.java @@ -25,7 +25,10 @@ import java.nio.charset.StandardCharsets; import org.apache.rya.api.domain.RyaType; import org.apache.rya.api.resolver.RyaTypeResolverException; +import org.apache.rya.api.utils.LiteralLanguageUtils; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; import com.google.common.primitives.Bytes; @@ -42,7 +45,15 @@ public class CustomDatatypeResolver extends RyaTypeResolverImpl { @Override public byte[][] serializeType(final RyaType ryaType) throws RyaTypeResolverException { - final byte[] bytes = serializeData(ryaType.getData()).getBytes(StandardCharsets.UTF_8); + final StringBuilder dataBuilder = new StringBuilder(); + dataBuilder.append(ryaType.getData()); + final String validatedLanguage = LiteralLanguageUtils.validateLanguage(ryaType.getLanguage(), ryaType.getDataType()); + if (validatedLanguage != null) { + dataBuilder.append(LiteralLanguageUtils.LANGUAGE_DELIMITER); + dataBuilder.append(validatedLanguage); + } + // Combine data and language + final byte[] bytes = serializeData(dataBuilder.toString()).getBytes(StandardCharsets.UTF_8); return new byte[][]{bytes, Bytes.concat(TYPE_DELIM_BYTES, ryaType.getDataType().stringValue().getBytes(StandardCharsets.UTF_8), TYPE_DELIM_BYTES, markerBytes)}; } @@ -63,9 +74,20 @@ public class CustomDatatypeResolver extends RyaTypeResolverImpl { if (indexOfType < 1) { throw new RyaTypeResolverException("Not a datatype literal"); } - final String label = deserializeData(new String(bytes, 0, indexOfType, StandardCharsets.UTF_8)); + String data = deserializeData(new String(bytes, 0, indexOfType, StandardCharsets.UTF_8)); rt.setDataType(SimpleValueFactory.getInstance().createIRI(new String(bytes, indexOfType + 1, (length - indexOfType) - 3, StandardCharsets.UTF_8))); - rt.setData(label); + if (RDF.LANGSTRING.equals(rt.getDataType())) { + final int langDelimiterPos = data.lastIndexOf(LiteralLanguageUtils.LANGUAGE_DELIMITER); + final String parsedData = data.substring(0, langDelimiterPos); + final String language = data.substring(langDelimiterPos + 1, data.length()); + if (language != null && Literals.isValidLanguageTag(language)) { + rt.setLanguage(language); + } else { + rt.setLanguage(LiteralLanguageUtils.UNDETERMINED_LANGUAGE); + } + data = parsedData; + } + rt.setData(data); return rt; } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/RyaTypeResolverImpl.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/RyaTypeResolverImpl.java b/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/RyaTypeResolverImpl.java index fba7a29..8b45221 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/RyaTypeResolverImpl.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/resolver/impl/RyaTypeResolverImpl.java @@ -27,9 +27,12 @@ import org.apache.rya.api.domain.RyaRange; import org.apache.rya.api.domain.RyaType; import org.apache.rya.api.resolver.RyaTypeResolver; import org.apache.rya.api.resolver.RyaTypeResolverException; +import org.apache.rya.api.utils.LiteralLanguageUtils; import org.calrissian.mango.types.LexiTypeEncoders; import org.calrissian.mango.types.TypeEncoder; import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; import com.google.common.primitives.Bytes; @@ -79,7 +82,14 @@ public class RyaTypeResolverImpl implements RyaTypeResolver { @Override public byte[][] serializeType(final RyaType ryaType) throws RyaTypeResolverException { - final byte[] bytes = serializeData(ryaType.getData()).getBytes(StandardCharsets.UTF_8); + final StringBuilder dataBuilder = new StringBuilder(); + dataBuilder.append(ryaType.getData()); + final String validatedLanguage = LiteralLanguageUtils.validateLanguage(ryaType.getLanguage(), dataType); + if (validatedLanguage != null) { + dataBuilder.append(LiteralLanguageUtils.LANGUAGE_DELIMITER); + dataBuilder.append(validatedLanguage); + } + final byte[] bytes = serializeData(dataBuilder.toString()).getBytes(StandardCharsets.UTF_8); return new byte[][]{bytes, Bytes.concat(TYPE_DELIM_BYTES, markerBytes)}; } @@ -113,7 +123,18 @@ public class RyaTypeResolverImpl implements RyaTypeResolver { } final RyaType rt = newInstance(); rt.setDataType(getRyaDataType()); - final String data = new String(bytes, 0, bytes.length - 2, StandardCharsets.UTF_8); + String data = new String(bytes, 0, bytes.length - 2, StandardCharsets.UTF_8); + if (RDF.LANGSTRING.equals(rt.getDataType())) { + final int langDelimiterPos = data.lastIndexOf(LiteralLanguageUtils.LANGUAGE_DELIMITER); + final String parsedData = data.substring(0, langDelimiterPos); + final String language = data.substring(langDelimiterPos + 1, data.length()); + if (language != null && Literals.isValidLanguageTag(language)) { + rt.setLanguage(language); + } else { + rt.setLanguage(LiteralLanguageUtils.UNDETERMINED_LANGUAGE); + } + data = parsedData; + } rt.setData(deserializeData(data)); return rt; } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/main/java/org/apache/rya/api/utils/LiteralLanguageUtils.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/utils/LiteralLanguageUtils.java b/common/rya.api/src/main/java/org/apache/rya/api/utils/LiteralLanguageUtils.java new file mode 100644 index 0000000..d1c81ee --- /dev/null +++ b/common/rya.api/src/main/java/org/apache/rya/api/utils/LiteralLanguageUtils.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.utils; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; + +/** + * Utility methods and constants for {@link Literal} languages. + */ +public final class LiteralLanguageUtils { + /** + * Special identifier used if there is language content, but the specific + * language cannot be determined. Should be avoided if possible. + * See <a href=https://www.loc.gov/standards/iso639-2/faq.html#25>here</a> + * for more info. + */ + public static final String UNDETERMINED_LANGUAGE = "und"; + + /** + * Delimiter between the data and the language tag. + */ + public static final String LANGUAGE_DELIMITER = "@"; + + /** + * Private constructor to prevent instantiation. + */ + private LiteralLanguageUtils() { + } + + /** + * Validates the language based on the data type. + * <p> + * This will do one of the following: + * <ul> + * <li>Return the original {@code language} if the {@code dataType} is + * {@link RDF#LANGSTRING} and it's of a VALID format.</li> + * <li>Returns {@link UNDETERMINED_LANGUAGE} if the {@code dataType} is + * {@link RDF#LANGSTRING} and it's of an INVALID format.</li> + * <li>Return {@code null} if the dataType is NOT {@link RDF#LANGSTRING}.</li> + * </ul> + * @param language the language to validate. + * @param dataType the {@link IRI} data type to validate against. + * @return the validated language. + */ + public static String validateLanguage(final String language, final IRI dataType) { + String result = null; + if (RDF.LANGSTRING.equals(dataType)) { + if (language != null && Literals.isValidLanguageTag(language)) { + result = language; + } else { + result = UNDETERMINED_LANGUAGE; + } + } + return result; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/java/org/apache/rya/api/domain/RyaTypeTest.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/domain/RyaTypeTest.java b/common/rya.api/src/test/java/org/apache/rya/api/domain/RyaTypeTest.java index 7cfc77b..53d02cd 100644 --- a/common/rya.api/src/test/java/org/apache/rya/api/domain/RyaTypeTest.java +++ b/common/rya.api/src/test/java/org/apache/rya/api/domain/RyaTypeTest.java @@ -1,5 +1,3 @@ -package org.apache.rya.api.domain; - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -18,7 +16,9 @@ package org.apache.rya.api.domain; * specific language governing permissions and limitations * under the License. */ +package org.apache.rya.api.domain; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; import org.junit.Assert; import org.junit.Test; @@ -29,8 +29,12 @@ public class RyaTypeTest { static RyaType c = new RyaType(XMLSchema.STRING, "http://www.example.com/Carol"); static RyaType aIri = new RyaType(XMLSchema.ANYURI, "http://www.example.com/Alice"); static RyaType bIri = new RyaType(XMLSchema.ANYURI, "http://www.example.com/Bob"); + static RyaType aLang = new RyaType(RDF.LANGSTRING, "http://www.example.com/Alice", "en"); + static RyaType aDiffLang = new RyaType(RDF.LANGSTRING, "http://www.example.com/Alice", "fr"); + static RyaType bLang = new RyaType(RDF.LANGSTRING, "http://www.example.com/Bob", "en"); RyaType nullData = new RyaType(XMLSchema.STRING, null); RyaType nullType = new RyaType(null, "http://www.example.com/Alice"); + RyaType nullLang = new RyaType(RDF.LANGSTRING, "http://www.example.com/Alice", null); RyaType nullBoth = new RyaType(null, null); RyaType same = new RyaType(XMLSchema.STRING, "http://www.example.com/Alice"); @@ -42,17 +46,21 @@ public class RyaTypeTest { Assert.assertFalse("compareTo should return nonzero for same datatype and different data.", bIri.compareTo(aIri) == 0); Assert.assertEquals("compareTo should return zero for different objects with matching data and datatype.", 0, a.compareTo(same)); + Assert.assertEquals("compareTo(self) return zero.", 0, aLang.compareTo(aLang)); + Assert.assertFalse("compareTo should return nonzero for different languages.", aLang.compareTo(aDiffLang) == 0); + Assert.assertFalse("compareTo should return nonzero for same datatype and language and different data.", aLang.compareTo(bLang) == 0); } @Test public void testCompareToNullFields() throws Exception { - Assert.assertEquals("[has no nulls].compareTo([has null data]) should return -1", -1, a.compareTo(nullData)); - Assert.assertEquals("[has no nulls].compareTo([has null type]) should return -1 if data is equal", - -1, a.compareTo(nullType)); - Assert.assertEquals("[has null data].compareTo([has no nulls]) should return 1", 1, nullData.compareTo(a)); - Assert.assertEquals("[has null type].compareTo([has no nulls]) should return 1 if data is equal", - 1, nullType.compareTo(a)); - Assert.assertEquals("[has null type].compareTo([has null data]) should return -1", -1, nullType.compareTo(nullData)); + Assert.assertEquals("[has no nulls].compareTo([has null data]) should return 1", 1, a.compareTo(nullData)); + Assert.assertEquals("[has no nulls].compareTo([has null type]) should return 1 if data is equal", + 1, a.compareTo(nullType)); + Assert.assertEquals("[has null data].compareTo([has no nulls]) should return -1", -1, nullData.compareTo(a)); + Assert.assertEquals("[has null type].compareTo([has no nulls]) should return -1 if data is equal", + -1, nullType.compareTo(a)); + Assert.assertEquals("[has null type].compareTo([has null data]) should return 1", 1, nullType.compareTo(nullData)); + Assert.assertEquals("[has no nulls].compareTo([has null lang]) should return 1", 1, aLang.compareTo(nullLang)); } @Test @@ -71,7 +79,7 @@ public class RyaTypeTest { @Test public void testCompareToTransitive() throws Exception { - int sign = Integer.signum(a.compareTo(b)); + final int sign = Integer.signum(a.compareTo(b)); Assert.assertEquals("compareTo(a,b) and compareTo(b,c) should have the same sign.", sign, Integer.signum(b.compareTo(c))); Assert.assertEquals("if a > b > c, compareTo(a,c) should be consistent.", sign, Integer.signum(a.compareTo(c))); @@ -89,7 +97,8 @@ public class RyaTypeTest { Assert.assertFalse("equals(null) should return false.", a.equals(null)); Assert.assertFalse("Same data, one null datatype should be unequal.", a.equals(nullType)); Assert.assertFalse("Same datatype, one null data should be unequal.", a.equals(nullData)); - RyaType sameNull = new RyaType(null, null); + Assert.assertFalse("Same datatype, data, one null lang should be unequal.", aLang.equals(nullLang)); + final RyaType sameNull = new RyaType(null, null); Assert.assertTrue("Matching null fields should be equal.", sameNull.equals(nullBoth)); } @@ -103,6 +112,8 @@ public class RyaTypeTest { a.equals(aIri), a.compareTo(aIri) == 0); Assert.assertEquals("equals and compareTo inconsistent for different values and different types.", a.equals(bIri), a.compareTo(bIri) == 0); + Assert.assertEquals("equals and compareTo inconsistent for different lang and same types/data.", + aLang.equals(bLang), aLang.compareTo(bLang) == 0); } @Test http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/java/org/apache/rya/api/resolver/LanguageCodesTestHelper.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/resolver/LanguageCodesTestHelper.java b/common/rya.api/src/test/java/org/apache/rya/api/resolver/LanguageCodesTestHelper.java new file mode 100644 index 0000000..e95951c --- /dev/null +++ b/common/rya.api/src/test/java/org/apache/rya/api/resolver/LanguageCodesTestHelper.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.resolver; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; + +/** + * Helper class for reading language code files and generating a list of + * languages to test. + */ +public final class LanguageCodesTestHelper { + private static final String ALPHA_2_LANGUAGE_CODE_FILE = "src/test/resources/ISO-639-1_Language_Codes.txt"; + private static final String ALPHA_3_LANGUAGE_CODE_FILE = "src/test/resources/ISO-639-2_Language_Codes.txt"; + private static final String COUNTRY_CODE_FILE = "src/test/resources/ISO-3166-1_Country_Codes.txt"; + + private Set<String> languageCodes; + + /** + * Private constructor to enforce singleton pattern. + * @throws Exception + */ + private LanguageCodesTestHelper() throws Exception { + setupLanguageCodes(); + } + + private static class InstanceHolder { + private static final LanguageCodesTestHelper INSTANCE; + static { + try { + INSTANCE = new LanguageCodesTestHelper(); + } catch (final Exception e) { + throw new ExceptionInInitializerError(e); + } + } + } + + /** + * @return the singleton instance of {@link LanguageCodesTestHelper}. + */ + public static LanguageCodesTestHelper getInstance(){ + return InstanceHolder.INSTANCE; + } + + /** + * Generates a list of language codes with country codes to test. This tries + * to reproduce BCP-47 compliant language tags as specified by + * <a href="https://tools.ietf.org/html/bcp47">BCP47</a>. + * <p> + * Adds all of the following combinations for testing: + * <ul> + * <li>Alpha-2 language code only</li> + * <li>Alpha-3 language code only</li> + * <li>Alpha-2 language code + Country code</li> + * <li>Alpha-3 language code + Country code</li> + * </ul> + * This should produce most common combinations and a lot of unlikely ones + * too. + * @throws Exception + */ + private void setupLanguageCodes() throws Exception { + final List<String> alpha2LangCodes = readCodeFile(ALPHA_2_LANGUAGE_CODE_FILE); + final List<String> alpha3LangCodes = readCodeFile(ALPHA_3_LANGUAGE_CODE_FILE); + final List<String> countryCodes = readCodeFile(COUNTRY_CODE_FILE); + + // Generate all combinations of language codes and region codes. + final List<String> langCodes = new ArrayList<>(); + langCodes.addAll(alpha2LangCodes); + langCodes.addAll(alpha3LangCodes); + for (final String languageCode : alpha2LangCodes) { + for (final String countryCode : countryCodes) { + langCodes.add(languageCode + "-" + countryCode); + } + } + for (final String languageCode : alpha3LangCodes) { + for (final String countryCode : countryCodes) { + langCodes.add(languageCode + "-" + countryCode); + } + } + languageCodes = ImmutableSet.copyOf(langCodes); + } + + private static List<String> readCodeFile(final String fileName) throws IOException { + final List<String> codes = new ArrayList<>(); + // Read each line + try (final Stream<String> stream = Files.lines(Paths.get(fileName))) { + // Each line might be comma-separated so add multiple codes per line + stream.forEach(line -> codes.addAll(Lists.newArrayList(line.split(",")))); + } + return codes; + } + + /** + * @return the {@link Set} of language codes. + */ + public Set<String> getLanguageCodes() { + return languageCodes; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/java/org/apache/rya/api/resolver/RdfToRyaConversionsTest.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/resolver/RdfToRyaConversionsTest.java b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RdfToRyaConversionsTest.java new file mode 100644 index 0000000..5c83537 --- /dev/null +++ b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RdfToRyaConversionsTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.resolver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.List; +import java.util.Set; + +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.utils.LiteralLanguageUtils; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleLiteral; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.XMLSchema; +import org.junit.Test; + +import com.google.common.collect.Lists; + +/** + * Tests the methods of {@link RdfToRyaConversions}. + */ +public class RdfToRyaConversionsTest { + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + + private static final Set<String> LANGUAGE_CODES = LanguageCodesTestHelper.getInstance().getLanguageCodes(); + + @Test + public void testConvertLiteral_null() { + final RyaType ryaType = RdfToRyaConversions.convertLiteral(null); + assertNull(ryaType); + } + + @Test + public void testConvertLiteral_nullDataType() { + final Literal literal = mock(SimpleLiteral.class); + final String expectedData = "Ice Cream"; + when(literal.getLabel()).thenReturn(expectedData); + when(literal.stringValue()).thenReturn(expectedData); + // Don't think this is possible but test anyways. Need to mock to force this null value. + when(literal.getDatatype()).thenReturn(null); + final RyaType ryaType = RdfToRyaConversions.convertLiteral(literal); + final RyaType expected = new RyaType(XMLSchema.STRING, expectedData); + assertEquals(expected, ryaType); + assertNull(ryaType.getLanguage()); + } + + @Test + public void testConvertLiteral_validLanguage() { + final String expectedData = "Hello"; + for (final String language : LANGUAGE_CODES) { + // This only checks the validity of the format. Not that the language tag actually exists. + assertTrue(Literals.isValidLanguageTag(language)); + final Literal literal = VF.createLiteral(expectedData, language); + final RyaType ryaType = RdfToRyaConversions.convertLiteral(literal); + assertEquals(RDF.LANGSTRING, ryaType.getDataType()); + assertEquals(expectedData, ryaType.getData()); + assertEquals(language, ryaType.getLanguage()); + final RyaType expectedRyaType = new RyaType(RDF.LANGSTRING, expectedData, language); + assertEquals(expectedRyaType, ryaType); + } + } + + @Test + public void testConvertLiteral_undeterminedLanguage() { + final String expectedData = "Hello"; + final String language = LiteralLanguageUtils.UNDETERMINED_LANGUAGE; + assertTrue(Literals.isValidLanguageTag(language)); + final Literal literal = VF.createLiteral(expectedData, language); + final RyaType ryaType = RdfToRyaConversions.convertLiteral(literal); + assertEquals(RDF.LANGSTRING, ryaType.getDataType()); + assertEquals(expectedData, ryaType.getData()); + final RyaType expectedRyaType = new RyaType(RDF.LANGSTRING, expectedData, language); + assertEquals(expectedRyaType, ryaType); + assertEquals(LiteralLanguageUtils.UNDETERMINED_LANGUAGE, ryaType.getLanguage()); + } + + @Test + public void testConvertLiteral_invalidLanguage() { + final String expectedData = "Hello"; + final List<String> badLanguages = Lists.newArrayList( + "bad language", + "en-", + "en-US-" + ); + for (final String badLanguage : badLanguages) { + // This only checks the validity of the format. Not that the language tag actually exists. + assertFalse(Literals.isValidLanguageTag(badLanguage)); + final Literal literal = VF.createLiteral(expectedData, badLanguage); + final RyaType ryaType = RdfToRyaConversions.convertLiteral(literal); + assertEquals(RDF.LANGSTRING, ryaType.getDataType()); + assertEquals(expectedData, ryaType.getData()); + // Check that the invalid language is replaced with "und" + assertEquals(LiteralLanguageUtils.UNDETERMINED_LANGUAGE, ryaType.getLanguage()); + } + } + + @Test + public void testConvertLiteral_normalString() { + final String expectedData = "Hello"; + final Literal literal = VF.createLiteral(expectedData); + final RyaType ryaType = RdfToRyaConversions.convertLiteral(literal); + assertEquals(XMLSchema.STRING, ryaType.getDataType()); + assertEquals(expectedData, ryaType.getData()); + final RyaType expectedRyaType = new RyaType(XMLSchema.STRING, expectedData); + assertEquals(expectedRyaType, ryaType); + assertNull(ryaType.getLanguage()); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaContextTest.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaContextTest.java b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaContextTest.java index d4b5f5c..954fa85 100644 --- a/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaContextTest.java +++ b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaContextTest.java @@ -8,9 +8,9 @@ package org.apache.rya.api.resolver; * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -22,61 +22,67 @@ package org.apache.rya.api.resolver; import java.util.Map; import org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT; +import org.apache.rya.api.domain.RyaIRI; import org.apache.rya.api.domain.RyaStatement; import org.apache.rya.api.domain.RyaType; -import org.apache.rya.api.domain.RyaIRI; import org.apache.rya.api.query.strategy.wholerow.MockRdfConfiguration; import org.apache.rya.api.resolver.triple.TripleRow; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; import junit.framework.TestCase; /** */ public class RyaContextTest extends TestCase { - + public void testDefaultSerialization() throws Exception { - RyaContext instance = RyaContext.getInstance(); + final RyaContext instance = RyaContext.getInstance(); //plain string RyaType ryaType = new RyaType("mydata"); byte[] serialize = instance.serialize(ryaType); assertEquals(ryaType, instance.deserialize(serialize)); //iri - RyaIRI ryaIRI = new RyaIRI("urn:test#1234"); + final RyaIRI ryaIRI = new RyaIRI("urn:test#1234"); serialize = instance.serialize(ryaIRI); - RyaType deserialize = instance.deserialize(serialize); + final RyaType deserialize = instance.deserialize(serialize); assertEquals(ryaIRI, deserialize); //custom type ryaType = new RyaType(SimpleValueFactory.getInstance().createIRI("urn:test#customDataType"), "mydata"); serialize = instance.serialize(ryaType); assertEquals(ryaType, instance.deserialize(serialize)); + + //language type + ryaType = new RyaType(RDF.LANGSTRING, "Hello", "en"); + serialize = instance.serialize(ryaType); + assertEquals(ryaType, instance.deserialize(serialize)); } public void testTripleRowSerialization() throws Exception { - RyaIRI subj = new RyaIRI("urn:test#subj"); - RyaIRI pred = new RyaIRI("urn:test#pred"); - RyaType obj = new RyaType("mydata"); - RyaStatement statement = new RyaStatement(subj, pred, obj); - RyaTripleContext instance = RyaTripleContext.getInstance(new MockRdfConfiguration()); + final RyaIRI subj = new RyaIRI("urn:test#subj"); + final RyaIRI pred = new RyaIRI("urn:test#pred"); + final RyaType obj = new RyaType("mydata"); + final RyaStatement statement = new RyaStatement(subj, pred, obj); + final RyaTripleContext instance = RyaTripleContext.getInstance(new MockRdfConfiguration()); - Map<TABLE_LAYOUT, TripleRow> map = instance.serializeTriple(statement); - TripleRow tripleRow = map.get(TABLE_LAYOUT.SPO); + final Map<TABLE_LAYOUT, TripleRow> map = instance.serializeTriple(statement); + final TripleRow tripleRow = map.get(TABLE_LAYOUT.SPO); assertEquals(statement, instance.deserializeTriple(TABLE_LAYOUT.SPO, tripleRow)); } - + public void testHashedTripleRowSerialization() throws Exception { - RyaIRI subj = new RyaIRI("urn:test#subj"); - RyaIRI pred = new RyaIRI("urn:test#pred"); - RyaType obj = new RyaType("mydata"); - RyaStatement statement = new RyaStatement(subj, pred, obj); - MockRdfConfiguration config = new MockRdfConfiguration(); - config.set(MockRdfConfiguration.CONF_PREFIX_ROW_WITH_HASH, Boolean.TRUE.toString()); - RyaTripleContext instance = RyaTripleContext.getInstance(config); + final RyaIRI subj = new RyaIRI("urn:test#subj"); + final RyaIRI pred = new RyaIRI("urn:test#pred"); + final RyaType obj = new RyaType("mydata"); + final RyaStatement statement = new RyaStatement(subj, pred, obj); + final MockRdfConfiguration config = new MockRdfConfiguration(); + config.set(MockRdfConfiguration.CONF_PREFIX_ROW_WITH_HASH, Boolean.TRUE.toString()); + final RyaTripleContext instance = RyaTripleContext.getInstance(config); - Map<TABLE_LAYOUT, TripleRow> map = instance.serializeTriple(statement); - TripleRow tripleRow = map.get(TABLE_LAYOUT.SPO); + final Map<TABLE_LAYOUT, TripleRow> map = instance.serializeTriple(statement); + final TripleRow tripleRow = map.get(TABLE_LAYOUT.SPO); assertEquals(statement, instance.deserializeTriple(TABLE_LAYOUT.SPO, tripleRow)); } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaToRdfConversionsTest.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaToRdfConversionsTest.java b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaToRdfConversionsTest.java new file mode 100644 index 0000000..242221f --- /dev/null +++ b/common/rya.api/src/test/java/org/apache/rya/api/resolver/RyaToRdfConversionsTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.resolver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Set; + +import org.apache.rya.api.domain.RyaType; +import org.apache.rya.api.utils.LiteralLanguageUtils; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.util.Literals; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.XMLSchema; +import org.junit.Test; + +import com.google.common.collect.Lists; + +/** + * Tests the methods of {@link RyaToRdfConversionsTest}. + */ +public class RyaToRdfConversionsTest { + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + + private static final Set<String> LANGUAGE_CODES = LanguageCodesTestHelper.getInstance().getLanguageCodes(); + + @Test (expected=NullPointerException.class) + public void testConvertLiteral_null() { + RyaToRdfConversions.convertLiteral(null); + } + + @Test + public void testConvertLiteral_nullDataType() { + final String expectedData = "Ice Cream"; + final RyaType ryaType = new RyaType(null, expectedData); + final Literal literal = RyaToRdfConversions.convertLiteral(ryaType); + final Literal expected = VF.createLiteral(expectedData, XMLSchema.STRING); + assertEquals(expected, literal); + assertFalse(literal.getLanguage().isPresent()); + } + + @Test + public void testConvertLiteral_validLanguage() { + final String expectedData = "Hello"; + for (final String language : LANGUAGE_CODES) { + // This only checks the validity of the format. Not that the language tag actually exists. + assertTrue(Literals.isValidLanguageTag(language)); + final RyaType ryaType = new RyaType(RDF.LANGSTRING, expectedData, language); + final Literal literal = RyaToRdfConversions.convertLiteral(ryaType); + assertEquals(RDF.LANGSTRING, literal.getDatatype()); + assertEquals(expectedData, literal.getLabel()); + assertTrue(literal.getLanguage().isPresent()); + assertEquals(language, literal.getLanguage().get()); + final Literal expectedLiteral = VF.createLiteral(expectedData, language); + assertEquals(expectedLiteral, literal); + } + } + + @Test + public void testConvertLiteral_undeterminedLanguage() { + final String expectedData = "Hello"; + final String language = LiteralLanguageUtils.UNDETERMINED_LANGUAGE; + assertTrue(Literals.isValidLanguageTag(language)); + final RyaType ryaType = new RyaType(RDF.LANGSTRING, expectedData, language); + final Literal literal = RyaToRdfConversions.convertLiteral(ryaType); + assertEquals(RDF.LANGSTRING, literal.getDatatype()); + assertEquals(expectedData, literal.getLabel()); + assertTrue(literal.getLanguage().isPresent()); + assertEquals(LiteralLanguageUtils.UNDETERMINED_LANGUAGE, literal.getLanguage().get()); + final Literal expectedLiteral = VF.createLiteral(expectedData, language); + assertEquals(expectedLiteral, literal); + } + + @Test + public void testConvertLiteral_invalidLanguage() { + final String expectedData = "Hello"; + final List<String> badLanguages = Lists.newArrayList( + "bad language", + "en-", + "en-US-" + ); + for (final String badLanguage : badLanguages) { + // This only checks the validity of the format. Not that the language tag actually exists. + assertFalse(Literals.isValidLanguageTag(badLanguage)); + final RyaType ryaType = new RyaType(RDF.LANGSTRING, expectedData, badLanguage); + final Literal literal = RyaToRdfConversions.convertLiteral(ryaType); + assertEquals(RDF.LANGSTRING, literal.getDatatype()); + assertEquals(expectedData, literal.getLabel()); + assertTrue(literal.getLanguage().isPresent()); + // Check that the invalid language is replaced with "und" + assertEquals(LiteralLanguageUtils.UNDETERMINED_LANGUAGE, literal.getLanguage().get()); + } + } + + @Test + public void testConvertLiteral_normalString() { + final String expectedData = "Hello"; + final RyaType ryaType = new RyaType(XMLSchema.STRING, expectedData); + final Literal literal = RyaToRdfConversions.convertLiteral(ryaType); + assertEquals(XMLSchema.STRING, literal.getDatatype()); + assertEquals(expectedData, literal.getLabel()); + assertFalse(literal.getLanguage().isPresent()); + final Literal expectedLiteral = VF.createLiteral(expectedData); + assertEquals(expectedLiteral, literal); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/resources/ISO-3166-1_Country_Codes.txt ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/resources/ISO-3166-1_Country_Codes.txt b/common/rya.api/src/test/resources/ISO-3166-1_Country_Codes.txt new file mode 100644 index 0000000..8c1c997 --- /dev/null +++ b/common/rya.api/src/test/resources/ISO-3166-1_Country_Codes.txt @@ -0,0 +1,236 @@ +AF +AL +DZ +AS +AD +AO +AQ +AG +AR +AM +AW +AU +AT +AZ +BS +BH +BD +BB +BY +BE +BZ +BJ +BM +BT +BO +BA +BW +BV +BR +IO +BN +BG +BF +BI +KH +CM +CA +CV +KY +CF +TD +CL +CN +CX +CC +CO +KM +CG +CD +CK +CR +CI +HR +CU +CY +CZ +DK +DJ +DM +DO +EC +EG +SV +GQ +ER +EE +ET +FK +FO +FJ +FI +FR +GF +PF +TF +GA +GM +GE +DE +GH +GI +GR +GL +GD +GP +GU +GT +GN +GW +GY +HT +HM +HN +HK +HU +IS +IN +ID +IR +IQ +IE +IL +IT +JM +JP +JO +KZ +KE +KI +KP +KR +KW +KG +LA +LV +LB +LS +LR +LY +LI +LT +LU +MO +MK +MG +MW +MY +MV +ML +MT +MH +MQ +MR +MU +YT +MX +FM +MD +MD +MN +MS +MA +MZ +MM +NA +NR +NP +NL +AN +NC +NZ +NI +NE +NG +NU +NF +MP +NO +OM +PK +PW +PS +PA +PG +PY +PE +PH +PN +PL +PR +QA +RE +RO +RU +RW +SH +KN +LC +PM +VC +WS +SM +ST +SA +SN +CS +SC +SL +SG +SK +SI +SB +SO +ZA +GS +ES +LK +SD +SR +SJ +SZ +SE +CH +SY +TW +TJ +TZ +TH +TL +TG +TK +TO +TT +TN +TR +TM +TC +TV +UG +UA +AE +GB +US +UM +UY +UZ +VU +VE +VN +VG +VI +WF +EH +YE +ZM +ZW \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/resources/ISO-639-1_Language_Codes.txt ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/resources/ISO-639-1_Language_Codes.txt b/common/rya.api/src/test/resources/ISO-639-1_Language_Codes.txt new file mode 100644 index 0000000..1fc94da --- /dev/null +++ b/common/rya.api/src/test/resources/ISO-639-1_Language_Codes.txt @@ -0,0 +1,191 @@ +ab +aa +af +ak +sq +am +ar +an +hy +as +av +ae +ay +az +bm +ba +eu +be +bn +bh +bi +bs +br +bg +my +ca +ch +ce +ny +zh +zh-Hans +zh-Hant +cv +kw +co +cr +hr +cs +da +dv +nl +dz +en +eo +et +ee +fo +fj +fi +fr +ff +gl +gd +gv +ka +de +el +kl +gn +gu +ht +ha +he +hz +hi +ho +hu +is +io +ig +id,in +ia +ie +iu +ik +ga +it +ja +jv +kl +kn +kr +ks +kk +km +ki +rw +rn +ky +kv +kg +ko +ku +kj +lo +la +lv +li +ln +lt +lu +lg +lb +gv +mk +mg +ms +ml +mt +mi +mr +mh +mo +mn +na +nv +ng +nd +ne +no +nb +nn +ii +oc +oj +cu +or +om +os +pi +ps +fa +pl +pt +pa +qu +rm +ro +ru +se +sm +sg +sa +sr +sh +st +tn +sn +ii +sd +si +ss +sk +sl +so +nr +es +su +sw +ss +sv +tl +ty +tg +ta +tt +te +th +bo +ti +to +ts +tr +tk +tw +ug +uk +ur +uz +ve +vi +vo +wa +cy +wo +fy +xh +yi,ji +yo +za +zu \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/2396ebb8/common/rya.api/src/test/resources/ISO-639-2_Language_Codes.txt ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/resources/ISO-639-2_Language_Codes.txt b/common/rya.api/src/test/resources/ISO-639-2_Language_Codes.txt new file mode 100644 index 0000000..21f3834 --- /dev/null +++ b/common/rya.api/src/test/resources/ISO-639-2_Language_Codes.txt @@ -0,0 +1,504 @@ +aar +abk +ace +ach +ada +ady +afa +afh +afr +ain +aka +akk +alb,sqi +ale +alg +alt +amh +ang +anp +apa +ara +arc +arg +arm,hye +arn +arp +art +arw +asm +ast +ath +aus +ava +ave +awa +aym +aze +bad +bai +bak +bal +bam +ban +baq,eus +bas +bat +bej +bel +bem +ben +ber +bho +bih +bik +bin +bis +bla +bnt +tib,bod +bos +bra +bre +btk +bua +bug +bul +bur,mya +byn +cad +cai +car +cat +cau +ceb +cel +cze,ces +cha +chb +che +chg +chi,zho +chk +chm +chn +cho +chp +chr +chu +chv +chy +cmc +cnr +cop +cor +cos +cpe +cpf +cpp +cre +crh +crp +csb +cus +wel,cym +cze,ces +dak +dan +dar +day +del +den +ger,deu +dgr +din +div +doi +dra +dsb +dua +dum +dut,nld +dyu +dzo +efi +egy +eka +gre,ell +elx +eng +enm +epo +est +baq,eus +ewe +ewo +fan +fao +per,fas +fat +fij +fil +fin +fiu +fon +fre,fra +frm +fro +frr +frs +fry +ful +fur +gaa +gay +gba +gem +geo,kat +ger,deu +gez +gil +gla +gle +glg +glv +gmh +goh +gon +gor +got +grb +grc +gre,ell +grn +gsw +guj +gwi +hai +hat +hau +haw +heb +her +hil +him +hin +hit +hmn +hmo +hrv +hsb +hun +hup +arm,hye +iba +ibo +ice,isl +ido +iii +ijo +iku +ile +ilo +ina +inc +ind +ine +inh +ipk +ira +iro +ice,isl +ita +jav +jbo +jpn +jpr +jrb +kaa +kab +kac +kal +kam +kan +kar +kas +geo,kat +kau +kaw +kaz +kbd +kha +khi +khm +kho +kik +kin +kir +kmb +kok +kom +kon +kor +kos +kpe +krc +krl +kro +kru +kua +kum +kur +kut +lad +lah +lam +lao +lat +lav +lez +lim +lin +lit +lol +loz +ltz +lua +lub +lug +lui +lun +luo +lus +mac,mkd +mad +mag +mah +mai +mak +mal +man +mao,mri +map +mar +mas +may,msa +mdf +mdr +men +mga +mic +min +mis +mac,mkd +mkh +mlg +mlt +mnc +mni +mno +moh +mon +mos +mao,mri +may,msa +mul +mun +mus +mwl +mwr +bur,mya +myn +myv +nah +nai +nap +nau +nav +nbl +nde +ndo +nds +nep +new +nia +nic +niu +dut,nld +nno +nob +nog +non +nor +nqo +nso +nub +nwc +nya +nym +nyn +nyo +nzi +oci +oji +ori +orm +osa +oss +ota +oto +paa +pag +pal +pam +pan +pap +pau +peo +per,fas +phi +phn +pli +pol +pon +por +pra +pro +pus +qaa-qtz +que +raj +rap +rar +roa +roh +rom +rum,ron +run +rup +rus +sad +sag +sah +sai +sal +sam +san +sas +sat +scn +sco +sel +sem +sga +sgn +shn +sid +sin +sio +sit +sla +slo,slk +slv +sma +sme +smi +smj +smn +smo +sms +sna +snd +snk +sog +som +son +sot +spa +alb,sqi +srd +srn +srp +srr +ssa +ssw +suk +sun +sus +sux +swa +swe +syc +syr +tah +tai +tam +tat +tel +tem +ter +tet +tgk +tgl +tha +tib,bod +tig +tir +tiv +tkl +tlh +tli +tmh +tog +ton +tpi +tsi +tsn +tso +tuk +tum +tup +tur +tut +tvl +twi +tyv +udm +uga +uig +ukr +umb +und +urd +uzb +vai +ven +vie +vol +vot +wak +wal +war +was +wel,cym +wen +wln +wol +xal +xho +yao +yap +yid +yor +ypk +zap +zbl +zen +zgh +zha +chi,zho +znd +zul +zun +zxx +zza
