This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
new 1733c7777a Tune the resolution of `xlink:href` attributes having a
fragment in their URI values.
1733c7777a is described below
commit 1733c7777abb1b5ba781bf78f38ee7650a514f4d
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Wed Dec 20 12:52:33 2023 +0100
Tune the resolution of `xlink:href` attributes having a fragment in their
URI values.
---
.../main/org/apache/sis/xml/MarshalContext.java | 4 +-
.../main/org/apache/sis/xml/Pooled.java | 5 +-
.../main/org/apache/sis/xml/ReferenceResolver.java | 76 +++++++++++++++-------
.../main/org/apache/sis/xml/bind/Context.java | 51 ++++++++++-----
.../apache/sis/xml/util/ExternalLinkHandler.java | 12 ----
.../main/org/apache/sis/xml/util/URISource.java | 6 +-
.../main/org/apache/sis/xml/util/XmlUtilities.java | 41 ++++++++++++
.../org/apache/sis/xml/ReferenceResolverTest.java | 14 +++-
8 files changed, 147 insertions(+), 62 deletions(-)
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
index 57b4e22f16..c2454965ff 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/MarshalContext.java
@@ -19,6 +19,7 @@ package org.apache.sis.xml;
import java.util.Locale;
import java.util.TimeZone;
import org.opengis.util.InternationalString;
+import org.apache.sis.util.Localized;
import org.apache.sis.util.Version;
@@ -29,7 +30,7 @@ import org.apache.sis.util.Version;
* @version 1.5
* @since 0.3
*/
-public abstract class MarshalContext {
+public abstract class MarshalContext implements Localized {
/**
* Creates a new (un)marshalling context.
*/
@@ -69,6 +70,7 @@ public abstract class MarshalContext {
*
* @see org.apache.sis.util.DefaultInternationalString#toString(Locale)
*/
+ @Override
public abstract Locale getLocale();
/**
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
index 593d71f0bd..78b5b6dd0b 100644
--- a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
+++ b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/Pooled.java
@@ -560,12 +560,9 @@ abstract class Pooled {
*/
final Context begin(final ExternalLinkHandler linkHandler) {
if (includedDocumentSystemId != null) {
- if (linkHandler != null) {
- linkHandler.includedDocumentSystemId =
includedDocumentSystemId;
- }
final Context current = Context.current();
if (current != null) {
- return current.createChild(linkHandler);
+ return current.createChild(includedDocumentSystemId,
linkHandler);
}
}
return new Context(bitMasks | specificBitMasks(), pool, locale,
timezone,
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
index c6cab8471e..4e86d6def6 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/ReferenceResolver.java
@@ -17,12 +17,10 @@
package org.apache.sis.xml;
import java.net.URI;
-import java.io.IOException;
import java.util.UUID;
import java.lang.reflect.Proxy;
import javax.xml.transform.Source;
import jakarta.xml.bind.Unmarshaller;
-import jakarta.xml.bind.JAXBException;
import org.opengis.metadata.Identifier;
import org.apache.sis.util.ArgumentChecks;
import org.apache.sis.util.Emptiable;
@@ -33,6 +31,7 @@ import org.apache.sis.xml.bind.Context;
import org.apache.sis.xml.bind.gcx.Anchor;
import org.apache.sis.xml.util.ExternalLinkHandler;
import org.apache.sis.xml.util.URISource;
+import org.apache.sis.xml.util.XmlUtilities;
/**
@@ -117,12 +116,19 @@ public class ReferenceResolver {
*
* <ul>
* <li>If {@code xlink:href} is null or {@linkplain URI#isOpaque()
opaque}, returns {@code null}.</li>
- * <li>Otherwise, if {@code xlink:href} {@linkplain URI#isAbsolute() is
absolute} or has a non-empty
- * {@linkplain URI#getPath() path}, delegate to {@link
#resolveExternal(MarshalContext, Source)}.</li>
- * <li>Otherwise, if {@code xlink:href} is a {@linkplain
URI#getFragment() fragment} of the form {@code "#foo"}
- * and if an object of class {@code type} with the {@code
gml:id="foo"} attribute has previously been seen
- * in the same XML document, then return that object.</li>
- * <li>Otherwise, returns {@code null}.</li>
+ * <li>Otherwise, if {@code xlink:href} is a {@linkplain
URI#getFragment() fragment} with no path such
+ * as {@code "#foo"}, then:
+ * <ul>
+ * <li>If an object of class {@code type} with an identifier
attribute such as {@code gml:id="foo"}
+ * has previously been seen in the same XML document (i.e.,
"foo" is a backward reference),
+ * returns that object.</li>
+ * <li>Otherwise, emits a warning and returns {@code null}.
+ * Note that it may happen if the {@code xlink:href} is a
forward reference.</li>
+ * </ul>
+ * </li>
+ * <li>Otherwise, (URI {@linkplain URI#isAbsolute() is absolute} or has
a {@linkplain URI#getPath() path}),
+ * resolve the URI relatively to current document being unmarshalled
and
+ * delegate to {@link #resolveExternal(MarshalContext, Source)}.</li>
* </ul>
*
* If an object is found but is not of the class declared in {@code type},
@@ -196,42 +202,55 @@ public class ReferenceResolver {
* @param context context (GML version, locale, <i>etc.</i>) of the
(un)marshalling process.
* @param source source of the document specified by the {@code
xlink:href} attribute value.
* @return an object for the given source, or {@code null} if none.
- * @throws IOException if an error occurred while opening the document.
- * @throws JAXBException if an error occurred while parsing the document.
+ * @throws Exception if an error occurred while opening or parsing the
document.
*
* @since 1.5
*/
- protected Object resolveExternal(final MarshalContext context, final
Source source) throws IOException, JAXBException {
- final Object systemId;
+ protected Object resolveExternal(final MarshalContext context, final
Source source) throws Exception {
+ final Object document;
final String fragment;
final URI uri;
if (source instanceof URISource) {
final var s = (URISource) source;
uri = s.getReadableURI();
- systemId = s.document;
+ document = s.document;
fragment = s.fragment;
} else {
- systemId = source.getSystemId();
- fragment = null;
- uri = null;
+ uri = null;
+ final int s;
+ final String systemId = source.getSystemId();
+ if (systemId != null && (s = systemId.lastIndexOf('#')) >= 0) {
+ document = Strings.trimOrNull(systemId.substring(0,s));
+ fragment = Strings.trimOrNull(systemId.substring(s+1));
+ } else {
+ document = systemId;
+ fragment = null;
+ }
}
/*
* At this point, we got the system identifier (usually as a resolved
URI, but not necessarily)
* and the URI fragment to use as a GML identifier. Check if the
document is in the cache.
+ * Note that if the fragment is null, then by convention we lookup for
the whole document.
*/
final Context c = Context.current();
- Object object = Context.getObjectForID(c, systemId, fragment);
- if (object != null) {
- return object;
+ if (c != null) {
+ final Object object = c.getExternalObjectForID(document, fragment);
+ if (object != null) {
+ XmlUtilities.close(source);
+ return object;
+ }
}
/*
- * Object not found in the cache. Parse it.
+ * Object not found in the cache. Parse it. As a side-effect of
unmarshalling the document,
+ * a map of fragments found in the document will be populated. We use
that map at the end
+ * for extracting the requested object.
*/
final MarshallerPool pool = context.getPool();
final Unmarshaller m = pool.acquireUnmarshaller();
if (m instanceof Pooled) {
- ((Pooled) m).forIncludedDocument(systemId);
+ ((Pooled) m).forIncludedDocument(document);
}
+ final Object object;
if (uri != null) {
object = m.unmarshal(uri.toURL());
} else {
@@ -239,10 +258,19 @@ public class ReferenceResolver {
}
pool.recycle(m);
/*
- * Object should be in the cache now.
+ * All fragments in the referenced document should be in the cache now.
+ * Cache the whole document, then request the fragment from the cache.
*/
- if (fragment != null) {
- object = Context.getObjectForID(c, systemId, fragment);
+ if (c != null) {
+ c.cacheDocument(document, object);
+ if (fragment != null) {
+ Object part = c.getExternalObjectForID(document, fragment);
+ if (part != null || uri != null) return part;
+ /*
+ * Fragment not found. The source was not built by ourselves
(`uri == null`),
+ * so maybe the user provided a source which was returning
directly the fragment.
+ */
+ }
}
return object;
}
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
index 85d6ec4e21..b4723f081c 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/bind/Context.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Locale;
import java.util.TimeZone;
+import java.util.Collections;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.logging.LogRecord;
@@ -410,11 +411,20 @@ public final class Context extends MarshalContext {
* will use a different {@link ExternalLinkHandler}. The returned context
shall be
* used in the same way as a context created from the public constructor.
*
+ * <p>The {@code systemId} argument is the key to use for caching the
result of (un)marshalling a document
+ * in the new context. This is usually the same value than the {@link
String}, {@link File} or {@link URL}
+ * specified at {@code linkHandler} construction time, but as an {@link
URI} or {@link String} instance.</p>
+ *
+ * @param systemId {@link URI} or {@link String} identifier of the
document to (un)marshal.
* @param linkHandler the document-dependent resolver of relative URIs,
or {@code null}.
* @return the context as a child of current context, never {@code null}.
+ *
+ * @see Context#getExternalObjectForID(Object, String)
*/
- public final Context createChild(final ExternalLinkHandler linkHandler) {
+ public final Context createChild(final Object systemId, final
ExternalLinkHandler linkHandler) {
+ // Use Map.put(…) instead of putIfAbsent(…) because maybe the previous
map is unmodifiable.
final Context context = new Context(this, locale, linkHandler, false);
+ identifiersPerDocuments.put(systemId, context.identifiers);
CURRENT.set(context);
return context;
}
@@ -646,19 +656,30 @@ public final class Context extends MarshalContext {
* However, the original URI instances should be used instead when they
are available.
* By convention, a null {@code id} returns the whole document.</p>
*
- * @param context the current context, or {@code null} if none.
* @param systemId document identifier (without the fragment part) as an
{@link URI} or a {@link String}.
* @param id the fragment part of the URI identifying the object
to get.
* @return the object associated to the given identifier, or {@code null}
if none.
*/
- public static Object getObjectForID(final Context context, final Object
systemId, final String id) {
- if (context != null) {
- final Map<String,Object> identifiers =
context.identifiersPerDocuments.get(systemId);
- if (identifiers != null) {
- return identifiers.get(id);
- }
+ public final Object getExternalObjectForID(final Object systemId, final
String id) {
+ final Map<String,Object> cache = identifiersPerDocuments.get(systemId);
+ return (cache != null) ? cache.get(id) : null;
+ }
+
+ /**
+ * Adds in the cache the result of unmarshalling a document referenced by
{@code xlink:href}.
+ * This method cache the whole document, ignoring the fragment in the
{@code xlink:href}.
+ * The fragment part is obtained by {@link #getExternalObjectForID(Object,
String)}.
+ *
+ * @param systemId document identifier (without the fragment part) as an
{@link URI} or a {@link String}.
+ * @param document the document to cache.
+ */
+ public final void cacheDocument(final Object systemId, final Object
document) {
+ final Map<String, Object> cache =
identifiersPerDocuments.get(systemId);
+ if (cache == null || cache.isEmpty()) {
+ identifiersPerDocuments.put(systemId,
Collections.singletonMap(null, document));
+ } else {
+ cache.put(null, document);
}
- return null;
}
/**
@@ -820,16 +841,10 @@ public final class Context extends MarshalContext {
if ((bitMasks & CLEAR_SEMAPHORE) != 0) {
Semaphores.clear(Semaphores.NULL_COLLECTION);
}
- if (previous == null) {
- CURRENT.remove();
- } else {
+ if (previous != null) {
CURRENT.set(previous);
- if (linkHandler != null) {
- final Object systemId = linkHandler.includedDocumentSystemId;
- if (systemId != null) {
- previous.identifiersPerDocuments.putIfAbsent(systemId,
identifiers);
- }
- }
+ } else {
+ CURRENT.remove();
}
}
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
index 89f8db0884..65f31fb8c2 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/ExternalLinkHandler.java
@@ -64,18 +64,6 @@ public class ExternalLinkHandler {
*/
private Object base;
- /**
- * Key to use for caching the result of (un)marshalling the document
resolved by this link handler.
- * This is usually the same value than the {@link String}, {@link File} or
{@link URL} specified at
- * construction time, but as an {@link URI} or {@link String} instance.
- *
- * <p>This field is not used by {@code ExternalLinkHandler}. It is defined
only as a way
- * to transfer information between {@code PooledUnmarshaller} and {@link
Context}.</p>
- *
- * @see Context#getObjectForID(Context, Object, String)
- */
- public Object includedDocumentSystemId;
-
/**
* Creates a new resolver for documents relative to the document in the
specified URL.
* The given URL can be what StAX, SAX and DOM call {@code systemId}.
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
index 7ea961e943..cf8747830c 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/URISource.java
@@ -45,16 +45,18 @@ public final class URISource extends StreamSource {
public final String fragment;
/**
- * Creates a source from an URI.
+ * Creates a source from an URI. This constructor separates the fragment
from the path.
+ * The URI stored by this constructor in {@link #document} excludes the
fragment part.
*
* @param source URI to the XML document.
* @throws URISyntaxException if the URI is not valid.
*/
URISource(URI source) throws URISyntaxException {
source = source.normalize();
+ // Build a new URI unconditionally because it also decodes escaped
characters.
final URI c = new URI(source.getScheme(),
source.getSchemeSpecificPart(), null);
document = source.equals(c) ? source : c; // Share the existing
instance if applicable.
- fragment = source.getFragment();
+ fragment = Strings.trimOrNull(source.getFragment());
}
/**
diff --git
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
index 18e2777c28..e34546ba15 100644
---
a/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
+++
b/endorsed/src/org.apache.sis.metadata/main/org/apache/sis/xml/util/XmlUtilities.java
@@ -16,6 +16,8 @@
*/
package org.apache.sis.xml.util;
+import java.io.Closeable;
+import java.io.IOException;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.LocalDate;
@@ -35,6 +37,10 @@ import java.util.Locale;
import java.util.TimeZone;
import java.util.GregorianCalendar;
import java.util.function.ObjIntConsumer;
+import javax.xml.transform.Source;
+import javax.xml.transform.sax.SAXSource;
+import javax.xml.transform.stax.StAXSource;
+import javax.xml.transform.stream.StreamSource;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.DatatypeConfigurationException;
@@ -330,4 +336,39 @@ public final class XmlUtilities extends SystemListener {
}
return null;
}
+
+ /**
+ * Closes the reader, input stream or XML stream used by the given source.
+ *
+ * @param source the source to close.
+ * @throws Exception if an error occurred while closing the source.
+ */
+ public static void close(final Source source) throws Exception {
+ if (source instanceof StreamSource) {
+ var s = (StreamSource) source;
+ close(s.getInputStream(), s.getReader());
+ } else if (source instanceof SAXSource) {
+ var s = ((SAXSource) source).getInputSource();
+ if (s != null) {
+ close(s.getByteStream(), s.getCharacterStream());
+ }
+ } else if (source instanceof StAXSource) {
+ var s = (StAXSource) source;
+ var c = s.getXMLEventReader(); if (c != null) c.close();
+ var b = s.getXMLStreamReader(); if (b != null) b.close();
+ /*
+ * Note: above calls to `close()` do not close the underlying
streams,
+ * so we are not really done yet. But we cannot do better in this
method.
+ */
+ }
+ }
+
+ /**
+ * Closes the given byte stream and character stream if non-null.
+ * This is an helper method for {@link #close(Source)} only.
+ */
+ private static void close(final Closeable b, final Closeable c) throws
IOException {
+ if (c != null) c.close();
+ if (b != null) b.close();
+ }
}
diff --git
a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
index caca2472e4..141eef9d9a 100644
---
a/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
+++
b/endorsed/src/org.apache.sis.metadata/test/org/apache/sis/xml/ReferenceResolverTest.java
@@ -18,6 +18,7 @@ package org.apache.sis.xml;
import java.io.IOException;
import jakarta.xml.bind.JAXBException;
+import org.opengis.metadata.citation.Citation;
import org.opengis.metadata.identification.DataIdentification;
// Test dependencies
@@ -25,6 +26,7 @@ import org.junit.Test;
import static org.junit.jupiter.api.Assertions.*;
import org.apache.sis.metadata.xml.TestUsingFile;
import org.apache.sis.metadata.iso.citation.DefaultCitationTest;
+import static org.apache.sis.test.TestUtilities.getSingleton;
/**
@@ -49,6 +51,16 @@ public final class ReferenceResolverTest extends
TestUsingFile {
public void testUsingExternalXLink() throws IOException, JAXBException {
final var data = (DataIdentification)
XML.unmarshal(Format.XML2016.getURL("UsingExternalXLink.xml"));
assertEquals("Test the use of XLink to an external document.",
data.getAbstract().toString());
- DefaultCitationTest.verifyUnmarshalledCitation(data.getCitation());
+ final Citation citation = data.getCitation();
+ DefaultCitationTest.verifyUnmarshalledCitation(citation);
+ /*
+ * The fragment should reference the exact same object than the one in
the citation.
+ */
+ final var parent =
getSingleton(citation.getCitedResponsibleParties().iterator().next().getParties());
+ final var reusing =
getSingleton(getSingleton(data.getPointOfContacts()).getParties());
+ assertEquals("Little John", reusing.getName().toString());
+ assertSame(getSingleton(parent .getContactInfo()),
+ getSingleton(reusing.getContactInfo()));
+
}
}