This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
     new 1733c7777a Tune the resolution of `xlink:href` attributes having a 
fragment in their URI values.
1733c7777a is described below

commit 1733c7777abb1b5ba781bf78f38ee7650a514f4d
Author: Martin Desruisseaux <martin.desruisse...@geomatys.com>
AuthorDate: Wed Dec 20 12:52:33 2023 +0100

    Tune the resolution of `xlink:href` attributes having a fragment in their 
URI values.
---
 .../main/org/apache/sis/xml/MarshalContext.java    |  4 +-
 .../main/org/apache/sis/xml/Pooled.java            |  5 +-
 .../main/org/apache/sis/xml/ReferenceResolver.java | 76 +++++++++++++++-------
 .../main/org/apache/sis/xml/bind/Context.java      | 51 ++++++++++-----
 .../apache/sis/xml/util/ExternalLinkHandler.java   | 12 ----
 .../main/org/apache/sis/xml/util/URISource.java    |  6 +-
 .../main/org/apache/sis/xml/util/XmlUtilities.java | 41 ++++++++++++
 .../org/apache/sis/xml/ReferenceResolverTest.java  | 14 +++-
 8 files changed, 147 insertions(+), 62 deletions(-)

diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
index 57b4e22f16..c2454965ff 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
@@ -19,6 +19,7 @@ package org.apache.sis.xml;
 import java.util.Locale;
 import java.util.TimeZone;
 import org.opengis.util.InternationalString;
+import org.apache.sis.util.Localized;
 import org.apache.sis.util.Version;
 
 
@@ -29,7 +30,7 @@ import org.apache.sis.util.Version;
  * @version 1.5
  * @since   0.3
  */
-public abstract class MarshalContext {
+public abstract class MarshalContext implements Localized {
     /**
      * Creates a new (un)marshalling context.
      */
@@ -69,6 +70,7 @@ public abstract class MarshalContext {
      *
      * @see org.apache.sis.util.DefaultInternationalString#toString(Locale)
      */
+    @Override
     public abstract Locale getLocale();
 
     /**
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
index 593d71f0bd..78b5b6dd0b 100644
--- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
+++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
@@ -560,12 +560,9 @@ abstract class Pooled {
      */
     final Context begin(final ExternalLinkHandler linkHandler) {
         if (includedDocumentSystemId != null) {
-            if (linkHandler != null) {
-                linkHandler.includedDocumentSystemId = 
includedDocumentSystemId;
-            }
             final Context current = Context.current();
             if (current != null) {
-                return current.createChild(linkHandler);
+                return current.createChild(includedDocumentSystemId, 
linkHandler);
             }
         }
         return new Context(bitMasks | specificBitMasks(), pool, locale, 
timezone,
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
index c6cab8471e..4e86d6def6 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
@@ -17,12 +17,10 @@
 package org.apache.sis.xml;
 
 import java.net.URI;
-import java.io.IOException;
 import java.util.UUID;
 import java.lang.reflect.Proxy;
 import javax.xml.transform.Source;
 import jakarta.xml.bind.Unmarshaller;
-import jakarta.xml.bind.JAXBException;
 import org.opengis.metadata.Identifier;
 import org.apache.sis.util.ArgumentChecks;
 import org.apache.sis.util.Emptiable;
@@ -33,6 +31,7 @@ import org.apache.sis.xml.bind.Context;
 import org.apache.sis.xml.bind.gcx.Anchor;
 import org.apache.sis.xml.util.ExternalLinkHandler;
 import org.apache.sis.xml.util.URISource;
+import org.apache.sis.xml.util.XmlUtilities;
 
 
 /**
@@ -117,12 +116,19 @@ public class ReferenceResolver {
      *
      * <ul>
      *   <li>If {@code xlink:href} is null or {@linkplain URI#isOpaque() 
opaque}, returns {@code null}.</li>
-     *   <li>Otherwise, if {@code xlink:href} {@linkplain URI#isAbsolute() is 
absolute} or has a non-empty
-     *       {@linkplain URI#getPath() path}, delegate to {@link 
#resolveExternal(MarshalContext, Source)}.</li>
-     *   <li>Otherwise, if {@code xlink:href} is a {@linkplain 
URI#getFragment() fragment} of the form {@code "#foo"}
-     *       and if an object of class {@code type} with the {@code 
gml:id="foo"} attribute has previously been seen
-     *       in the same XML document, then return that object.</li>
-     *   <li>Otherwise, returns {@code null}.</li>
+     *   <li>Otherwise, if {@code xlink:href} is a {@linkplain 
URI#getFragment() fragment} with no path such
+     *       as {@code "#foo"}, then:
+     *     <ul>
+     *       <li>If an object of class {@code type} with an identifier 
attribute such as {@code gml:id="foo"}
+     *           has previously been seen in the same XML document (i.e., 
"foo" is a backward reference),
+     *           returns that object.</li>
+     *       <li>Otherwise, emits a warning and returns {@code null}.
+     *           Note that it may happen if the {@code xlink:href} is a 
forward reference.</li>
+     *     </ul>
+     *   </li>
+     *   <li>Otherwise, (URI {@linkplain URI#isAbsolute() is absolute} or has 
a {@linkplain URI#getPath() path}),
+     *       resolve the URI relatively to current document being unmarshalled 
and
+     *       delegate to {@link #resolveExternal(MarshalContext, Source)}.</li>
      * </ul>
      *
      * If an object is found but is not of the class declared in {@code type},
@@ -196,42 +202,55 @@ public class ReferenceResolver {
      * @param  context  context (GML version, locale, <i>etc.</i>) of the 
(un)marshalling process.
      * @param  source   source of the document specified by the {@code 
xlink:href} attribute value.
      * @return an object for the given source, or {@code null} if none.
-     * @throws IOException if an error occurred while opening the document.
-     * @throws JAXBException if an error occurred while parsing the document.
+     * @throws Exception if an error occurred while opening or parsing the 
document.
      *
      * @since 1.5
      */
-    protected Object resolveExternal(final MarshalContext context, final 
Source source) throws IOException, JAXBException {
-        final Object systemId;
+    protected Object resolveExternal(final MarshalContext context, final 
Source source) throws Exception {
+        final Object document;
         final String fragment;
         final URI uri;
         if (source instanceof URISource) {
             final var s = (URISource) source;
             uri = s.getReadableURI();
-            systemId = s.document;
+            document = s.document;
             fragment = s.fragment;
         } else {
-            systemId = source.getSystemId();
-            fragment = null;
-            uri      = null;
+            uri = null;
+            final int s;
+            final String systemId = source.getSystemId();
+            if (systemId != null && (s = systemId.lastIndexOf('#')) >= 0) {
+                document = Strings.trimOrNull(systemId.substring(0,s));
+                fragment = Strings.trimOrNull(systemId.substring(s+1));
+            } else {
+                document = systemId;
+                fragment = null;
+            }
         }
         /*
          * At this point, we got the system identifier (usually as a resolved 
URI, but not necessarily)
          * and the URI fragment to use as a GML identifier. Check if the 
document is in the cache.
+         * Note that if the fragment is null, then by convention we lookup for 
the whole document.
          */
         final Context c = Context.current();
-        Object object = Context.getObjectForID(c, systemId, fragment);
-        if (object != null) {
-            return object;
+        if (c != null) {
+            final Object object = c.getExternalObjectForID(document, fragment);
+            if (object != null) {
+                XmlUtilities.close(source);
+                return object;
+            }
         }
         /*
-         * Object not found in the cache. Parse it.
+         * Object not found in the cache. Parse it. As a side-effect of 
unmarshalling the document,
+         * a map of fragments found in the document will be populated. We use 
that map at the end
+         * for extracting the requested object.
          */
         final MarshallerPool pool = context.getPool();
         final Unmarshaller m = pool.acquireUnmarshaller();
         if (m instanceof Pooled) {
-            ((Pooled) m).forIncludedDocument(systemId);
+            ((Pooled) m).forIncludedDocument(document);
         }
+        final Object object;
         if (uri != null) {
             object = m.unmarshal(uri.toURL());
         } else {
@@ -239,10 +258,19 @@ public class ReferenceResolver {
         }
         pool.recycle(m);
         /*
-         * Object should be in the cache now.
+         * All fragments in the referenced document should be in the cache now.
+         * Cache the whole document, then request the fragment from the cache.
          */
-        if (fragment != null) {
-            object = Context.getObjectForID(c, systemId, fragment);
+        if (c != null) {
+            c.cacheDocument(document, object);
+            if (fragment != null) {
+                Object part = c.getExternalObjectForID(document, fragment);
+                if (part != null || uri != null) return part;
+                /*
+                 * Fragment not found. The source was not built by ourselves 
(`uri == null`),
+                 * so maybe the user provided a source which was returning 
directly the fragment.
+                 */
+            }
         }
         return object;
     }
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
index 85d6ec4e21..b4723f081c 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.IdentityHashMap;
 import java.util.Locale;
 import java.util.TimeZone;
+import java.util.Collections;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.logging.LogRecord;
@@ -410,11 +411,20 @@ public final class Context extends MarshalContext {
      * will use a different {@link ExternalLinkHandler}. The returned context 
shall be
      * used in the same way as a context created from the public constructor.
      *
+     * <p>The {@code systemId} argument is the key to use for caching the 
result of (un)marshalling a document
+     * in the new context. This is usually the same value than the {@link 
String}, {@link File} or {@link URL}
+     * specified at {@code linkHandler} construction time, but as an {@link 
URI} or {@link String} instance.</p>
+     *
+     * @param  systemId     {@link URI} or {@link String} identifier of the 
document to (un)marshal.
      * @param  linkHandler  the document-dependent resolver of relative URIs, 
or {@code null}.
      * @return the context as a child of current context, never {@code null}.
+     *
+     * @see Context#getExternalObjectForID(Object, String)
      */
-    public final Context createChild(final ExternalLinkHandler linkHandler) {
+    public final Context createChild(final Object systemId, final 
ExternalLinkHandler linkHandler) {
+        // Use Map.put(…) instead of putIfAbsent(…) because maybe the previous 
map is unmodifiable.
         final Context context = new Context(this, locale, linkHandler, false);
+        identifiersPerDocuments.put(systemId, context.identifiers);
         CURRENT.set(context);
         return context;
     }
@@ -646,19 +656,30 @@ public final class Context extends MarshalContext {
      * However, the original URI instances should be used instead when they 
are available.
      * By convention, a null {@code id} returns the whole document.</p>
      *
-     * @param  context   the current context, or {@code null} if none.
      * @param  systemId  document identifier (without the fragment part) as an 
{@link URI} or a {@link String}.
      * @param  id        the fragment part of the URI identifying the object 
to get.
      * @return the object associated to the given identifier, or {@code null} 
if none.
      */
-    public static Object getObjectForID(final Context context, final Object 
systemId, final String id) {
-        if (context != null) {
-            final Map<String,Object> identifiers = 
context.identifiersPerDocuments.get(systemId);
-            if (identifiers != null) {
-                return identifiers.get(id);
-            }
+    public final Object getExternalObjectForID(final Object systemId, final 
String id) {
+        final Map<String,Object> cache = identifiersPerDocuments.get(systemId);
+        return (cache != null) ? cache.get(id) : null;
+    }
+
+    /**
+     * Adds in the cache the result of unmarshalling a document referenced by 
{@code xlink:href}.
+     * This method cache the whole document, ignoring the fragment in the 
{@code xlink:href}.
+     * The fragment part is obtained by {@link #getExternalObjectForID(Object, 
String)}.
+     *
+     * @param  systemId  document identifier (without the fragment part) as an 
{@link URI} or a {@link String}.
+     * @param  document  the document to cache.
+     */
+    public final void cacheDocument(final Object systemId, final Object 
document) {
+        final Map<String, Object> cache = 
identifiersPerDocuments.get(systemId);
+        if (cache == null || cache.isEmpty()) {
+            identifiersPerDocuments.put(systemId, 
Collections.singletonMap(null, document));
+        } else {
+            cache.put(null, document);
         }
-        return null;
     }
 
     /**
@@ -820,16 +841,10 @@ public final class Context extends MarshalContext {
         if ((bitMasks & CLEAR_SEMAPHORE) != 0) {
             Semaphores.clear(Semaphores.NULL_COLLECTION);
         }
-        if (previous == null) {
-            CURRENT.remove();
-        } else {
+        if (previous != null) {
             CURRENT.set(previous);
-            if (linkHandler != null) {
-                final Object systemId = linkHandler.includedDocumentSystemId;
-                if (systemId != null) {
-                    previous.identifiersPerDocuments.putIfAbsent(systemId, 
identifiers);
-                }
-            }
+        } else {
+            CURRENT.remove();
         }
     }
 
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
index 89f8db0884..65f31fb8c2 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
@@ -64,18 +64,6 @@ public class ExternalLinkHandler {
      */
     private Object base;
 
-    /**
-     * Key to use for caching the result of (un)marshalling the document 
resolved by this link handler.
-     * This is usually the same value than the {@link String}, {@link File} or 
{@link URL} specified at
-     * construction time, but as an {@link URI} or {@link String} instance.
-     *
-     * <p>This field is not used by {@code ExternalLinkHandler}. It is defined 
only as a way
-     * to transfer information between {@code PooledUnmarshaller} and {@link 
Context}.</p>
-     *
-     * @see Context#getObjectForID(Context, Object, String)
-     */
-    public Object includedDocumentSystemId;
-
     /**
      * Creates a new resolver for documents relative to the document in the 
specified URL.
      * The given URL can be what StAX, SAX and DOM call {@code systemId}.
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
index 7ea961e943..cf8747830c 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
@@ -45,16 +45,18 @@ public final class URISource extends StreamSource {
     public final String fragment;
 
     /**
-     * Creates a source from an URI.
+     * Creates a source from an URI. This constructor separates the fragment 
from the path.
+     * The URI stored by this constructor in {@link #document} excludes the 
fragment part.
      *
      * @param source  URI to the XML document.
      * @throws URISyntaxException if the URI is not valid.
      */
     URISource(URI source) throws URISyntaxException {
         source = source.normalize();
+        // Build a new URI unconditionally because it also decodes escaped 
characters.
         final URI c = new URI(source.getScheme(), 
source.getSchemeSpecificPart(), null);
         document = source.equals(c) ? source : c;       // Share the existing 
instance if applicable.
-        fragment = source.getFragment();
+        fragment = Strings.trimOrNull(source.getFragment());
     }
 
     /**
diff --git 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
index 18e2777c28..e34546ba15 100644
--- 
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
+++ 
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
@@ -16,6 +16,8 @@
  */
 package org.apache.sis.xml.util;
 
+import java.io.Closeable;
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.time.Instant;
 import java.time.LocalDate;
@@ -35,6 +37,10 @@ import java.util.Locale;
 import java.util.TimeZone;
 import java.util.GregorianCalendar;
 import java.util.function.ObjIntConsumer;
+import javax.xml.transform.Source;
+import javax.xml.transform.sax.SAXSource;
+import javax.xml.transform.stax.StAXSource;
+import javax.xml.transform.stream.StreamSource;
 import javax.xml.datatype.XMLGregorianCalendar;
 import javax.xml.datatype.DatatypeFactory;
 import javax.xml.datatype.DatatypeConfigurationException;
@@ -330,4 +336,39 @@ public final class XmlUtilities extends SystemListener {
         }
         return null;
     }
+
+    /**
+     * Closes the reader, input stream or XML stream used by the given source.
+     *
+     * @param  source  the source to close.
+     * @throws Exception if an error occurred while closing the source.
+     */
+    public static void close(final Source source) throws Exception {
+        if (source instanceof StreamSource) {
+            var s = (StreamSource) source;
+            close(s.getInputStream(), s.getReader());
+        } else if (source instanceof SAXSource) {
+            var s = ((SAXSource) source).getInputSource();
+            if (s != null) {
+                close(s.getByteStream(), s.getCharacterStream());
+            }
+        } else if (source instanceof StAXSource) {
+            var s = (StAXSource) source;
+            var c = s.getXMLEventReader();  if (c != null) c.close();
+            var b = s.getXMLStreamReader(); if (b != null) b.close();
+            /*
+             * Note: above calls to `close()` do not close the underlying 
streams,
+             * so we are not really done yet. But we cannot do better in this 
method.
+             */
+        }
+    }
+
+    /**
+     * Closes the given byte stream and character stream if non-null.
+     * This is an helper method for {@link #close(Source)} only.
+     */
+    private static void close(final Closeable b, final Closeable c) throws 
IOException {
+        if (c != null) c.close();
+        if (b != null) b.close();
+    }
 }
diff --git 
a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
 
b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
index caca2472e4..141eef9d9a 100644
--- 
a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
+++ 
b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
@@ -18,6 +18,7 @@ package org.apache.sis.xml;
 
 import java.io.IOException;
 import jakarta.xml.bind.JAXBException;
+import org.opengis.metadata.citation.Citation;
 import org.opengis.metadata.identification.DataIdentification;
 
 // Test dependencies
@@ -25,6 +26,7 @@ import org.junit.Test;
 import static org.junit.jupiter.api.Assertions.*;
 import org.apache.sis.metadata.xml.TestUsingFile;
 import org.apache.sis.metadata.iso.citation.DefaultCitationTest;
+import static org.apache.sis.test.TestUtilities.getSingleton;
 
 
 /**
@@ -49,6 +51,16 @@ public final class ReferenceResolverTest extends 
TestUsingFile {
     public void testUsingExternalXLink() throws IOException, JAXBException {
         final var data = (DataIdentification) 
XML.unmarshal(Format.XML2016.getURL("UsingExternalXLink.xml"));
         assertEquals("Test the use of XLink to an external document.", 
data.getAbstract().toString());
-        DefaultCitationTest.verifyUnmarshalledCitation(data.getCitation());
+        final Citation citation = data.getCitation();
+        DefaultCitationTest.verifyUnmarshalledCitation(citation);
+        /*
+         * The fragment should reference the exact same object than the one in 
the citation.
+         */
+        final var parent  = 
getSingleton(citation.getCitedResponsibleParties().iterator().next().getParties());
+        final var reusing = 
getSingleton(getSingleton(data.getPointOfContacts()).getParties());
+        assertEquals("Little John", reusing.getName().toString());
+        assertSame(getSingleton(parent .getContactInfo()),
+                   getSingleton(reusing.getContactInfo()));
+
     }
 }

Reply via email to