This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push: new 1733c7777a Tune the resolution of `xlink:href` attributes having a fragment in their URI values. 1733c7777a is described below commit 1733c7777abb1b5ba781bf78f38ee7650a514f4d Author: Martin Desruisseaux <martin.desruisse...@geomatys.com> AuthorDate: Wed Dec 20 12:52:33 2023 +0100 Tune the resolution of `xlink:href` attributes having a fragment in their URI values. --- .../main/org/apache/sis/xml/MarshalContext.java | 4 +- .../main/org/apache/sis/xml/Pooled.java | 5 +- .../main/org/apache/sis/xml/ReferenceResolver.java | 76 +++++++++++++++------- .../main/org/apache/sis/xml/bind/Context.java | 51 ++++++++++----- .../apache/sis/xml/util/ExternalLinkHandler.java | 12 ---- .../main/org/apache/sis/xml/util/URISource.java | 6 +- .../main/org/apache/sis/xml/util/XmlUtilities.java | 41 ++++++++++++ .../org/apache/sis/xml/ReferenceResolverTest.java | 14 +++- 8 files changed, 147 insertions(+), 62 deletions(-) diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java index 57b4e22f16..c2454965ff 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java @@ -19,6 +19,7 @@ package org.apache.sis.xml; import java.util.Locale; import java.util.TimeZone; import org.opengis.util.InternationalString; +import org.apache.sis.util.Localized; import org.apache.sis.util.Version; @@ -29,7 +30,7 @@ import org.apache.sis.util.Version; * @version 1.5 * @since 0.3 */ -public abstract class MarshalContext { +public abstract class MarshalContext implements Localized { /** * Creates a new (un)marshalling context. */ @@ -69,6 +70,7 @@ public abstract class MarshalContext { * * @see org.apache.sis.util.DefaultInternationalString#toString(Locale) */ + @Override public abstract Locale getLocale(); /** diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java index 593d71f0bd..78b5b6dd0b 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java @@ -560,12 +560,9 @@ abstract class Pooled { */ final Context begin(final ExternalLinkHandler linkHandler) { if (includedDocumentSystemId != null) { - if (linkHandler != null) { - linkHandler.includedDocumentSystemId = includedDocumentSystemId; - } final Context current = Context.current(); if (current != null) { - return current.createChild(linkHandler); + return current.createChild(includedDocumentSystemId, linkHandler); } } return new Context(bitMasks | specificBitMasks(), pool, locale, timezone, diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java index c6cab8471e..4e86d6def6 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java @@ -17,12 +17,10 @@ package org.apache.sis.xml; import java.net.URI; -import java.io.IOException; import java.util.UUID; import java.lang.reflect.Proxy; import javax.xml.transform.Source; import jakarta.xml.bind.Unmarshaller; -import jakarta.xml.bind.JAXBException; import org.opengis.metadata.Identifier; import org.apache.sis.util.ArgumentChecks; import org.apache.sis.util.Emptiable; @@ -33,6 +31,7 @@ import org.apache.sis.xml.bind.Context; import org.apache.sis.xml.bind.gcx.Anchor; import org.apache.sis.xml.util.ExternalLinkHandler; import org.apache.sis.xml.util.URISource; +import org.apache.sis.xml.util.XmlUtilities; /** @@ -117,12 +116,19 @@ public class ReferenceResolver { * * <ul> * <li>If {@code xlink:href} is null or {@linkplain URI#isOpaque() opaque}, returns {@code null}.</li> - * <li>Otherwise, if {@code xlink:href} {@linkplain URI#isAbsolute() is absolute} or has a non-empty - * {@linkplain URI#getPath() path}, delegate to {@link #resolveExternal(MarshalContext, Source)}.</li> - * <li>Otherwise, if {@code xlink:href} is a {@linkplain URI#getFragment() fragment} of the form {@code "#foo"} - * and if an object of class {@code type} with the {@code gml:id="foo"} attribute has previously been seen - * in the same XML document, then return that object.</li> - * <li>Otherwise, returns {@code null}.</li> + * <li>Otherwise, if {@code xlink:href} is a {@linkplain URI#getFragment() fragment} with no path such + * as {@code "#foo"}, then: + * <ul> + * <li>If an object of class {@code type} with an identifier attribute such as {@code gml:id="foo"} + * has previously been seen in the same XML document (i.e., "foo" is a backward reference), + * returns that object.</li> + * <li>Otherwise, emits a warning and returns {@code null}. + * Note that it may happen if the {@code xlink:href} is a forward reference.</li> + * </ul> + * </li> + * <li>Otherwise, (URI {@linkplain URI#isAbsolute() is absolute} or has a {@linkplain URI#getPath() path}), + * resolve the URI relatively to current document being unmarshalled and + * delegate to {@link #resolveExternal(MarshalContext, Source)}.</li> * </ul> * * If an object is found but is not of the class declared in {@code type}, @@ -196,42 +202,55 @@ public class ReferenceResolver { * @param context context (GML version, locale, <i>etc.</i>) of the (un)marshalling process. * @param source source of the document specified by the {@code xlink:href} attribute value. * @return an object for the given source, or {@code null} if none. - * @throws IOException if an error occurred while opening the document. - * @throws JAXBException if an error occurred while parsing the document. + * @throws Exception if an error occurred while opening or parsing the document. * * @since 1.5 */ - protected Object resolveExternal(final MarshalContext context, final Source source) throws IOException, JAXBException { - final Object systemId; + protected Object resolveExternal(final MarshalContext context, final Source source) throws Exception { + final Object document; final String fragment; final URI uri; if (source instanceof URISource) { final var s = (URISource) source; uri = s.getReadableURI(); - systemId = s.document; + document = s.document; fragment = s.fragment; } else { - systemId = source.getSystemId(); - fragment = null; - uri = null; + uri = null; + final int s; + final String systemId = source.getSystemId(); + if (systemId != null && (s = systemId.lastIndexOf('#')) >= 0) { + document = Strings.trimOrNull(systemId.substring(0,s)); + fragment = Strings.trimOrNull(systemId.substring(s+1)); + } else { + document = systemId; + fragment = null; + } } /* * At this point, we got the system identifier (usually as a resolved URI, but not necessarily) * and the URI fragment to use as a GML identifier. Check if the document is in the cache. + * Note that if the fragment is null, then by convention we lookup for the whole document. */ final Context c = Context.current(); - Object object = Context.getObjectForID(c, systemId, fragment); - if (object != null) { - return object; + if (c != null) { + final Object object = c.getExternalObjectForID(document, fragment); + if (object != null) { + XmlUtilities.close(source); + return object; + } } /* - * Object not found in the cache. Parse it. + * Object not found in the cache. Parse it. As a side-effect of unmarshalling the document, + * a map of fragments found in the document will be populated. We use that map at the end + * for extracting the requested object. */ final MarshallerPool pool = context.getPool(); final Unmarshaller m = pool.acquireUnmarshaller(); if (m instanceof Pooled) { - ((Pooled) m).forIncludedDocument(systemId); + ((Pooled) m).forIncludedDocument(document); } + final Object object; if (uri != null) { object = m.unmarshal(uri.toURL()); } else { @@ -239,10 +258,19 @@ public class ReferenceResolver { } pool.recycle(m); /* - * Object should be in the cache now. + * All fragments in the referenced document should be in the cache now. + * Cache the whole document, then request the fragment from the cache. */ - if (fragment != null) { - object = Context.getObjectForID(c, systemId, fragment); + if (c != null) { + c.cacheDocument(document, object); + if (fragment != null) { + Object part = c.getExternalObjectForID(document, fragment); + if (part != null || uri != null) return part; + /* + * Fragment not found. The source was not built by ourselves (`uri == null`), + * so maybe the user provided a source which was returning directly the fragment. + */ + } } return object; } diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java index 85d6ec4e21..b4723f081c 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Locale; import java.util.TimeZone; +import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; import java.util.logging.LogRecord; @@ -410,11 +411,20 @@ public final class Context extends MarshalContext { * will use a different {@link ExternalLinkHandler}. The returned context shall be * used in the same way as a context created from the public constructor. * + * <p>The {@code systemId} argument is the key to use for caching the result of (un)marshalling a document + * in the new context. This is usually the same value than the {@link String}, {@link File} or {@link URL} + * specified at {@code linkHandler} construction time, but as an {@link URI} or {@link String} instance.</p> + * + * @param systemId {@link URI} or {@link String} identifier of the document to (un)marshal. * @param linkHandler the document-dependent resolver of relative URIs, or {@code null}. * @return the context as a child of current context, never {@code null}. + * + * @see Context#getExternalObjectForID(Object, String) */ - public final Context createChild(final ExternalLinkHandler linkHandler) { + public final Context createChild(final Object systemId, final ExternalLinkHandler linkHandler) { + // Use Map.put(…) instead of putIfAbsent(…) because maybe the previous map is unmodifiable. final Context context = new Context(this, locale, linkHandler, false); + identifiersPerDocuments.put(systemId, context.identifiers); CURRENT.set(context); return context; } @@ -646,19 +656,30 @@ public final class Context extends MarshalContext { * However, the original URI instances should be used instead when they are available. * By convention, a null {@code id} returns the whole document.</p> * - * @param context the current context, or {@code null} if none. * @param systemId document identifier (without the fragment part) as an {@link URI} or a {@link String}. * @param id the fragment part of the URI identifying the object to get. * @return the object associated to the given identifier, or {@code null} if none. */ - public static Object getObjectForID(final Context context, final Object systemId, final String id) { - if (context != null) { - final Map<String,Object> identifiers = context.identifiersPerDocuments.get(systemId); - if (identifiers != null) { - return identifiers.get(id); - } + public final Object getExternalObjectForID(final Object systemId, final String id) { + final Map<String,Object> cache = identifiersPerDocuments.get(systemId); + return (cache != null) ? cache.get(id) : null; + } + + /** + * Adds in the cache the result of unmarshalling a document referenced by {@code xlink:href}. + * This method cache the whole document, ignoring the fragment in the {@code xlink:href}. + * The fragment part is obtained by {@link #getExternalObjectForID(Object, String)}. + * + * @param systemId document identifier (without the fragment part) as an {@link URI} or a {@link String}. + * @param document the document to cache. + */ + public final void cacheDocument(final Object systemId, final Object document) { + final Map<String, Object> cache = identifiersPerDocuments.get(systemId); + if (cache == null || cache.isEmpty()) { + identifiersPerDocuments.put(systemId, Collections.singletonMap(null, document)); + } else { + cache.put(null, document); } - return null; } /** @@ -820,16 +841,10 @@ public final class Context extends MarshalContext { if ((bitMasks & CLEAR_SEMAPHORE) != 0) { Semaphores.clear(Semaphores.NULL_COLLECTION); } - if (previous == null) { - CURRENT.remove(); - } else { + if (previous != null) { CURRENT.set(previous); - if (linkHandler != null) { - final Object systemId = linkHandler.includedDocumentSystemId; - if (systemId != null) { - previous.identifiersPerDocuments.putIfAbsent(systemId, identifiers); - } - } + } else { + CURRENT.remove(); } } diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java index 89f8db0884..65f31fb8c2 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java @@ -64,18 +64,6 @@ public class ExternalLinkHandler { */ private Object base; - /** - * Key to use for caching the result of (un)marshalling the document resolved by this link handler. - * This is usually the same value than the {@link String}, {@link File} or {@link URL} specified at - * construction time, but as an {@link URI} or {@link String} instance. - * - * <p>This field is not used by {@code ExternalLinkHandler}. It is defined only as a way - * to transfer information between {@code PooledUnmarshaller} and {@link Context}.</p> - * - * @see Context#getObjectForID(Context, Object, String) - */ - public Object includedDocumentSystemId; - /** * Creates a new resolver for documents relative to the document in the specified URL. * The given URL can be what StAX, SAX and DOM call {@code systemId}. diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java index 7ea961e943..cf8747830c 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java @@ -45,16 +45,18 @@ public final class URISource extends StreamSource { public final String fragment; /** - * Creates a source from an URI. + * Creates a source from an URI. This constructor separates the fragment from the path. + * The URI stored by this constructor in {@link #document} excludes the fragment part. * * @param source URI to the XML document. * @throws URISyntaxException if the URI is not valid. */ URISource(URI source) throws URISyntaxException { source = source.normalize(); + // Build a new URI unconditionally because it also decodes escaped characters. final URI c = new URI(source.getScheme(), source.getSchemeSpecificPart(), null); document = source.equals(c) ? source : c; // Share the existing instance if applicable. - fragment = source.getFragment(); + fragment = Strings.trimOrNull(source.getFragment()); } /** diff --git a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java index 18e2777c28..e34546ba15 100644 --- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java +++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java @@ -16,6 +16,8 @@ */ package org.apache.sis.xml.util; +import java.io.Closeable; +import java.io.IOException; import java.math.BigDecimal; import java.time.Instant; import java.time.LocalDate; @@ -35,6 +37,10 @@ import java.util.Locale; import java.util.TimeZone; import java.util.GregorianCalendar; import java.util.function.ObjIntConsumer; +import javax.xml.transform.Source; +import javax.xml.transform.sax.SAXSource; +import javax.xml.transform.stax.StAXSource; +import javax.xml.transform.stream.StreamSource; import javax.xml.datatype.XMLGregorianCalendar; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.DatatypeConfigurationException; @@ -330,4 +336,39 @@ public final class XmlUtilities extends SystemListener { } return null; } + + /** + * Closes the reader, input stream or XML stream used by the given source. + * + * @param source the source to close. + * @throws Exception if an error occurred while closing the source. + */ + public static void close(final Source source) throws Exception { + if (source instanceof StreamSource) { + var s = (StreamSource) source; + close(s.getInputStream(), s.getReader()); + } else if (source instanceof SAXSource) { + var s = ((SAXSource) source).getInputSource(); + if (s != null) { + close(s.getByteStream(), s.getCharacterStream()); + } + } else if (source instanceof StAXSource) { + var s = (StAXSource) source; + var c = s.getXMLEventReader(); if (c != null) c.close(); + var b = s.getXMLStreamReader(); if (b != null) b.close(); + /* + * Note: above calls to `close()` do not close the underlying streams, + * so we are not really done yet. But we cannot do better in this method. + */ + } + } + + /** + * Closes the given byte stream and character stream if non-null. + * This is an helper method for {@link #close(Source)} only. + */ + private static void close(final Closeable b, final Closeable c) throws IOException { + if (c != null) c.close(); + if (b != null) b.close(); + } } diff --git a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java index caca2472e4..141eef9d9a 100644 --- a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java +++ b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java @@ -18,6 +18,7 @@ package org.apache.sis.xml; import java.io.IOException; import jakarta.xml.bind.JAXBException; +import org.opengis.metadata.citation.Citation; import org.opengis.metadata.identification.DataIdentification; // Test dependencies @@ -25,6 +26,7 @@ import org.junit.Test; import static org.junit.jupiter.api.Assertions.*; import org.apache.sis.metadata.xml.TestUsingFile; import org.apache.sis.metadata.iso.citation.DefaultCitationTest; +import static org.apache.sis.test.TestUtilities.getSingleton; /** @@ -49,6 +51,16 @@ public final class ReferenceResolverTest extends TestUsingFile { public void testUsingExternalXLink() throws IOException, JAXBException { final var data = (DataIdentification) XML.unmarshal(Format.XML2016.getURL("UsingExternalXLink.xml")); assertEquals("Test the use of XLink to an external document.", data.getAbstract().toString()); - DefaultCitationTest.verifyUnmarshalledCitation(data.getCitation()); + final Citation citation = data.getCitation(); + DefaultCitationTest.verifyUnmarshalledCitation(citation); + /* + * The fragment should reference the exact same object than the one in the citation. + */ + final var parent = getSingleton(citation.getCitedResponsibleParties().iterator().next().getParties()); + final var reusing = getSingleton(getSingleton(data.getPointOfContacts()).getParties()); + assertEquals("Little John", reusing.getName().toString()); + assertSame(getSingleton(parent .getContactInfo()), + getSingleton(reusing.getContactInfo())); + } }