This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 596bfa06f92a9f4a0b7218a101ced51f992cc64a Author: Andy Seaborne <[email protected]> AuthorDate: Sun Nov 2 14:25:08 2025 +0000 GH-3558: LiteralLabel lazy value --- .../org/apache/jena/sparql/expr/NodeValue.java | 29 +--- .../org/apache/jena/graph/impl/LiteralLabel.java | 178 ++++++++++++--------- 2 files changed, 110 insertions(+), 97 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java index b6776c518e..877007806a 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/NodeValue.java @@ -608,28 +608,10 @@ public abstract class NodeValue extends ExprNode return new NodeValueLang(node); } - // Typed literal - LiteralLabel lit = node.getLiteral(); - - // This includes type testing - // if ( ! lit.getDatatype().isValidLiteral(lit) ) - - // Use this - already calculated when the node is formed. - if ( !lit.isWellFormed() ) { - if ( NodeValue.VerboseWarnings ) { - String tmp = FmtUtils.stringForNode(node); - Log.warn(NodeValue.class, "Datatype format exception: " + tmp); - } - // Invalid lexical form. - return new NodeValueNode(node); - } - NodeValue nv = _setByValue(node); if ( nv != null ) return nv; - return new NodeValueNode(node); - //raise(new ExprException("NodeValue.nodeToNodeValue: Unknown Node type: "+n)); } // Jena code does not have these types (yet) @@ -677,10 +659,11 @@ public abstract class NodeValue extends ExprNode // so must test for validity on the untrimmed lexical form. String lexTrimmed = lex.trim(); - if ( ! datatype.equals(XSDdecimal) ) { // ! decimal is short for integers and all derived types. + if ( ! datatype.equals(XSDdecimal) ) { // decimal covers integer, and all derived types, lexical forms // XSD integer and derived types - if ( XSDinteger.isValidLiteral(lit) ) - { + if ( XSDinteger.isValidLiteral(lit) ) { + if ( ! lit.isWellFormed() ) + return null; // BigInteger does not accept such whitespace. String s = lexTrimmed; if ( s.startsWith("+") ) @@ -715,7 +698,9 @@ public abstract class NodeValue extends ExprNode return new NodeValueBoolean(b, node); } - if ( (datatype.equals(XSDdateTime) || datatype.equals(XSDdateTimeStamp)) && XSDdateTime.isValid(lex) ) { + if ( datatype.equals(XSDdateTime) || datatype.equals(XSDdateTimeStamp) ) { + if ( ! XSDdateTime.isValid(lex) ) + return null; return NodeValueDateTime.create(lexTrimmed, node); } diff --git a/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabel.java b/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabel.java index 66965dd45e..1f8310ad86 100644 --- a/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabel.java +++ b/jena-core/src/main/java/org/apache/jena/graph/impl/LiteralLabel.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.Objects; import org.apache.jena.atlas.lib.EscapeStr; +import org.apache.jena.atlas.lib.InternalErrorException; import org.apache.jena.datatypes.DatatypeFormatException; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.XSDDatatype; @@ -68,25 +69,34 @@ final public class LiteralLabel { private RDFDatatype dtype; /** - * The value form of the literal. It will be null if the value - * has not been parsed or if it is an illegal value. + * The value form of the literal. It will be null if the value has not been + * parsed or if it is an illegal value. + * Used in EAGER mode, otherwise it's null, + * and also if the LiteralLabel is created from a values, not from the lexical form + * and datatype. + * @implNote + * Access using {@link #getValueInternal()} or {@link #getIndexingValue()}. */ - private Object value; + private Object value = null; - private enum ValueMode { EAGER , LAZY } - // LAZY does not completely pass the test suite - the point where bad literals - // cause exceptions changes + // Whether to calculate values early (when the LiteralLabel is created) + // or when the value is asked for. For some uses (parsering-printing) + // GraphMem is the only storage to use value-based indexing. + // All other graphs are term-based and do not need the value. + + // For LAZY mode, the value is in "value1" if the LiteralLabel + // is created from lexical form and datatype. // - // Whether this is the fact the tests are over sensitive or there is going to be - // unexpected behaviour needs investigation. - private static ValueMode valueMode = ValueMode.EAGER; + // Setting -- up to Jena5: EAGER -- from Jena6 : LAZY. + private enum ValueMode { EAGER , LAZY } + private static ValueMode valueMode = ValueMode.LAZY; /** * Indicates whether this is literal has a valid lexical form for the datatype. + * @implNote + * The inital value is know to {@link #isWellFormedRaw()} */ - private boolean wellformed = true; - - private Exception exception = null; + private boolean wellformed = false; private final int hash; @@ -112,7 +122,7 @@ final public class LiteralLabel { * * @param lex the lexical form of the literal * @param lang the optional language tag, only relevant for rdf:langString and rdf:dirLangString - * @param dirLang only relevant for rdf:langString and rdf:dirLangString + * @param dirLang the language base direction, only relevant for rdf:langString and rdf:dirLangString * @param datatype the type of the literal */ /*package*/ LiteralLabel(String lex, String lang, TextDirection textDir, RDFDatatype datatype) { @@ -121,17 +131,14 @@ final public class LiteralLabel { this.lang = lang; this.textDir = textDir; this.hash = calcHashCode(); - if ( valueMode == ValueMode.EAGER ) { - this.wellformed = setValue(lex, this.dtype); - this.dtype = normalize(value, this.dtype); - } else - // Lazy value calculation. - this.value = null; - } - - /** Calculate the indexing form for a language tag */ - private static String indexingLang(String lang) { - return lang; + switch(valueMode) { + case EAGER -> { + this.wellformed = setValue(lex, this.dtype); + this.dtype = normalize(value, this.dtype); + } + case LAZY -> + this.value = null; + } } /** @@ -176,7 +183,7 @@ final public class LiteralLabel { throw new DatatypeFormatException(value.toString(), datatype, "in literal creation"); this.lexicalForm = (datatype == null ? value.toString() : datatype.unparse(value)); - hash = calcHashCode(); + this.hash = calcHashCode(); } /** @@ -199,7 +206,7 @@ final public class LiteralLabel { this.lang = ""; this.textDir = null; this.wellformed = true; - hash = calcHashCode(); + this.hash = calcHashCode(); } /** @@ -214,42 +221,88 @@ final public class LiteralLabel { return true; } catch (DatatypeFormatException e) { // Normally this parameter is false. - if (JenaParameters.enableEagerLiteralValidation) { - e.fillInStackTrace(); + if (JenaParameters.enableEagerLiteralValidation) throw e; - } - exception = e; return false; } } // -- Thread safe delayed initialization at the cost of "volatile" incurred in getValueLazy() - // Used by set-by-term. - // set-by-value is always eager. + + /** Calculate the indexing form for a language tag */ + private static String indexingLang(String lang) { + return lang; + } + + /** + * Answer a suitable instance of a Java class representing this literal's value. + * Throws an exception if the literal is ill-formed. + */ + public Object getValue() throws DatatypeFormatException { + Object val = getValueInternal(); + if (! wellformed ) + throw new DatatypeFormatException(lexicalForm, dtype, (Throwable)null); + if ( val != null ) + // Value is good. + return val; + if ( ! JenaParameters.enableEagerLiteralValidation ) + throw new DatatypeFormatException(); + return null; + } + + /** + * Get the possibly lazy-evaluated value. + * This method returns "null" for no value. + */ + private Object getValueInternal() { + switch(valueMode) { + case EAGER: return value; + case LAZY: { + Object v = getValueLazy(); + return (v == invalidValue ) ? null : v; + } + case null: throw new InternalErrorException(); + } + } private volatile Object value1 = null; - private static Object invalidValue = new Object(); + private static final Object invalidValue = new Object(); - /** Does not return null - returns "invalidValue" */ + /** + * Calculate the value if not already set. + * Does not return null - returns distinguished object "invalidValue". + */ private Object getValueLazy() { - // Eager value processing. + switch(valueMode) { + case EAGER: return value; + case LAZY: return getSetValueLazily(); + } + throw new InternalErrorException(); + } + + private Object getSetValueLazily() { + // Value can be set even in LAZY mode. This happens when a literal label is created from a value. if ( value != null ) return value; + // Lazy value processing. if ( value1 != null ) { // value1 only goes from null to Object, and not back to null. return value1; } synchronized(this) { if ( value1 == null ) - value1 = calcValue(lexicalForm); + value1 = calcValueFromLex(lexicalForm); + // Leave this.value as null. + return value1; } - // Object assignment is atomic. - // Synchronized ensured the value object is properly constructed. - value = (value1 != invalidValue ) ? value1 : null; - return value1; } - private Object calcValue(String lex) { + /** Calculate the value. + * This returns "invalidValue" if the value is not conformant with the datatype. + * This sets {@link #wellformed}. + * This may adjust the datatype. + */ + private Object calcValueFromLex(String lex) { try { Object v = dtype.parse(lex); wellformed = true; @@ -279,16 +332,18 @@ final public class LiteralLabel { /** * Answer true iff this is a well-formed literal (the lexical form conforms to the datatype). - * String literals (xsd:string, rdf:LangString,m rdf:dirLangString) are always well-formed. + * String literals (xsd:string, rdf:LangString, rdf:dirLangString) are always well-formed. */ public boolean isWellFormed() { return dtype != null && isWellFormedRaw(); } private boolean isWellFormedRaw() { - if ( ! wellformed ) - return false; - // Force initialization. + // If wellformed is initialized to false so if it is true then this object passed checking. + // (invert if initialized to true). + if ( wellformed ) + return true; + // Ensure value initialization. getValueInternal(); return wellformed; } @@ -299,7 +354,7 @@ final public class LiteralLabel { public String toString(PrefixMapping pmap, boolean quoting) { StringBuilder b = new StringBuilder(); - if ( ! quoting && simpleLiteral() ) + if ( ! quoting && dtype.equals(XSDDatatype.XSDstring) ) return getLexicalForm(); quoting = true; @@ -324,10 +379,6 @@ final public class LiteralLabel { return b.toString(); } - private boolean simpleLiteral() { - return dtype.equals(XSDDatatype.XSDstring); - } - @Override public String toString() { return toString(true); @@ -351,8 +402,8 @@ final public class LiteralLabel { if ( !lang.equals("") ) // Assumed formatted/case-insensitive language tags. return getLexicalForm() + "@" + indexingLang(lang); + Object value = getValueInternal(); if ( wellformed ) { - Object value = getValue(); // JENA-1936 // byte[] does not provide hashCode/equals based on the contents of the array. if ( value instanceof byte[] ) @@ -424,27 +475,6 @@ final public class LiteralLabel { return textDir; } - /** - * Answer a suitable instance of a Java class representing this literal's value. - * May throw an exception if the literal is ill-formed. - */ - public Object getValue() throws DatatypeFormatException { - Object val = getValueInternal(); - if (! wellformed ) - throw new DatatypeFormatException(lexicalForm, dtype, (Throwable)null); - if ( val != null ) - // Value is good. - return val; - if ( ! JenaParameters.enableEagerLiteralValidation ) - throw new DatatypeFormatException(); - return null; - } - - private Object getValueInternal() { - Object v = getValueLazy(); - return (v == invalidValue ) ? null : v; - } - /** * Answer the datatype of this literal, null if it is untyped. */ @@ -626,9 +656,7 @@ final public class LiteralLabel { public int getValueHashCode() { if ( indexingValueIsSelf() ) return hashCode(); - Object v = getValueInternal(); - if ( ! wellformed ) - return hashCode(); + Object v = getIndexingValue(); if ( ! wellformed ) return hashCode(); return v.hashCode();
