This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 6232fd176 Update scala-xml to 2.2.0
6232fd176 is described below
commit 6232fd176b2c31b09a1ae9ea75b2c910babf3590
Author: Scala Steward <[email protected]>
AuthorDate: Mon Jul 24 15:14:26 2023 -0400
Update scala-xml to 2.2.0
Update code to work with changes to the scala-xml library. This also
includes removing a workaround for an infinite loop bug in
reportSyntaxError.
---
.../lib/xml/DaffodilConstructingLoader.scala | 10 ---
.../daffodil/lib/xml/DaffodilXMLLoader.scala | 72 +++++++++++++----
.../lib/util/TestXMLCatalogAndValidate.scala | 91 +++++++++++++---------
project/Dependencies.scala | 2 +-
4 files changed, 111 insertions(+), 64 deletions(-)
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
index 03e0bb895..66ee620e2 100644
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
@@ -172,16 +172,6 @@ class DaffodilConstructingLoader private[xml] (
override def reportSyntaxError(pos: Int, msg: String): Unit = {
val exc = makeSAXParseException(pos, msg)
errorHandler.fatalError(exc)
- if (msg == "'<' not allowed in attrib value") {
- // DAFFODIL-2586
- // There is a bug in scala-xml which causes an infinite loop when
- // this error condition is reached. The loop expects the scanner
- // to advance but in this case it does not. When this error is
- // seen, an exception needs to be thrown to stop processing.
- //
- // See:
https://github.com/scala/scala-xml/blob/v2.1.0/shared/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala#L67-L72
- throw exc
- }
}
/*
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
index f2d7b9126..942d7bce2 100644
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
@@ -36,7 +36,6 @@ import javax.xml.transform.stream.StreamSource
import javax.xml.validation.Schema
import javax.xml.validation.SchemaFactory
import scala.collection.JavaConverters.asScalaBufferConverter
-import scala.xml.InputSource
import scala.xml.SAXParseException
import scala.xml.SAXParser
import scala.xml.parsing.NoBindingFactoryAdapter
@@ -52,6 +51,13 @@ import org.apache.xerces.xni.parser.XMLInputSource
import org.apache.xml.resolver.Catalog
import org.apache.xml.resolver.CatalogManager
import org.w3c.dom.ls.LSInput
+import org.xml.sax.ContentHandler
+import org.xml.sax.DTDHandler
+import org.xml.sax.EntityResolver
+import org.xml.sax.ErrorHandler
+import org.xml.sax.InputSource
+import org.xml.sax.XMLReader
+import org.xml.sax.helpers.XMLFilterImpl
/**
* Resolves URI/URL/URNs to loadable files/streams.
@@ -561,10 +567,18 @@ class DaffodilXMLLoader(val errorHandler:
org.xml.sax.ErrorHandler)
}
- // $COVERAGE-OFF$
+ // $COVERAGE-OFF$ These three functions should only be used if someone calls
one of the
+ // Scala-XML load* functions. Only our custom load() functions should be
used, which ensures
+ // hat correct parses/readers are used
override def parser = {
Assert.usageError("not to be called.")
}
+ override def reader = {
+ Assert.usageError("not to be called.")
+ }
+ override def adapter = {
+ Assert.usageError("not to be called.")
+ }
// $COVERAGE-ON$
/**
@@ -609,21 +623,10 @@ class DaffodilXMLLoader(val errorHandler:
org.xml.sax.ErrorHandler)
private def parserFromFactory(f: SAXParserFactory) = {
val p = f.newSAXParser()
- //
// Not allowed on a SAXParser
// p.setProperty(XMLUtils.SAX_NAvMESPACES_FEATURE, true)
// Not allowed on a SAXParser
// p.setProperty(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
- val xrdr = p.getXMLReader()
- xrdr.setErrorHandler(errorHandler)
- // not recognized by XMLReader
- // xrdr.setFeature("http://xml.org/sax/features/validation/dynamic", true)
- xrdr.setContentHandler(this)
- //
- // This is required to get the parse to really use our resolver.
- // The setEntityResolver(resolver) does not work.
- //
-
xrdr.setProperty("http://apache.org/xml/properties/internal/entity-resolver",
resolver)
p
}
@@ -685,10 +688,49 @@ class DaffodilXMLLoader(val errorHandler:
org.xml.sax.ErrorHandler)
// this writing, we have no tests that use that.
//
val parser = parserFromURI(optSchemaURI)
- val xrdr = parser.getXMLReader()
+ val xrdr: XMLReader = {
+ val r = parser.getXMLReader()
+
+ // We must use XMLReader setProperty() function to set the entity
resolver--calling
+ // setEntityResolver with the Xerces XML reader causes validation to
fail for some
+ // reason. We call the right function below, but unfortunately,
scala-xml calls
+ // setEntityResolver in loadDocument(), which cannot be disabled and
scala-xml does not
+ // want to change. To avoid this, we wrap the Xerces XMLReader in an
XMLFilterImpl and
+ // override setEntityResolver to a no-op. However, XMLFilterImpl
parse() calls
+ // setEntityResolver() on the XMLReader, which for the same reason as
before causes
+ // issues. To fix this, we can override parse() to just pass through
to the parent, but
+ // that means we must override the various set/get handler functions
to also pass
+ // through to the parent.
+ val w = new XMLFilterImpl(r) {
+ override def setEntityResolver(resolver: EntityResolver): Unit = {}
// no-op
+ override def parse(input: InputSource): Unit = getParent.parse(input)
+
+ override def setContentHandler(handler: ContentHandler): Unit =
+ getParent.setContentHandler(handler)
+ override def setDTDHandler(handler: DTDHandler): Unit =
+ getParent.setDTDHandler(handler)
+ override def setErrorHandler(handler: ErrorHandler): Unit =
+ getParent.setErrorHandler(handler)
+ override def getContentHandler(): ContentHandler =
+ getParent.getContentHandler()
+ override def getDTDHandler(): DTDHandler =
+ getParent.getDTDHandler()
+ override def getErrorHandler(): ErrorHandler =
+ getParent.getErrorHandler()
+ }
+ w.setErrorHandler(errorHandler)
+
w.setProperty("http://apache.org/xml/properties/internal/entity-resolver",
resolver)
+ w
+ }
+
val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
try {
- xrdr.parse(saxSource)
+ // it is important that we call loadDocument to parse/validate the XML
instead of
+ // directly calling xrdr.parse. Although loadDocument does eventually
call xrdr.parse,
+ // it first modifies the reader in a number of ways to prepare it for
use with this
+ // FactoryAdapter, as well as initialize private state that is used by
ContentHandler
+ // functions.
+ loadDocument(saxSource, xrdr)
} catch {
// can be thrown by the resolver if a schemaLocation of
// an import/include cannot be resolved.
diff --git
a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
index 59e87764f..3e1538cb8 100644
---
a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
+++
b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
@@ -24,7 +24,6 @@ import scala.collection.mutable
import scala.language.reflectiveCalls
import scala.xml.Attribute
import scala.xml.Elem
-import scala.xml.InputSource
import scala.xml.MetaData
import scala.xml.NamespaceBinding
import scala.xml.Node
@@ -45,6 +44,13 @@ import org.apache.xml.resolver.CatalogManager
import org.junit.Assert.assertTrue
import org.junit.Assert.fail
import org.junit.Test
+import org.xml.sax.ContentHandler
+import org.xml.sax.DTDHandler
+import org.xml.sax.EntityResolver
+import org.xml.sax.ErrorHandler
+import org.xml.sax.InputSource
+import org.xml.sax.XMLReader
+import org.xml.sax.helpers.XMLFilterImpl
object Implicits {
@@ -446,48 +452,56 @@ class SchemaAwareFactoryAdapter() extends
NoBindingFactoryAdapter {
f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
f.setValidating(true)
val p = f.newSAXParser()
- val xr = p.getXMLReader()
+ val r = p.getXMLReader()
+
+ // We must use XMLReader setProperty() function to set the entity
resolver--calling
+ // setEntityResolver with the Xerces XML reader causes validation to fail
for some
+ // reason. We call the right function below, but unfortunately, scala-xml
calls
+ // setEntityResolver in loadDocument(), which cannot be disabled and
scala-xml does not
+ // want to change. To avoid this, we wrap the Xerces XMLReader in an
XMLFilterImpl and
+ // override setEntityResolver to a no-op. However, XMLFilterImpl parse()
calls
+ // setEntityResolver() on the XMLReader, which for the same reason as before
causes
+ // issues. To fix this, we can override parse() to just pass through to the
parent, but
+ // that means we must override the various set/get handler functions to also
pass
+ // through to the parent.
+ val xr = new XMLFilterImpl(r) {
+ override def setEntityResolver(resolver: EntityResolver): Unit = {} //
no-op
+ override def parse(input: InputSource): Unit = getParent.parse(input)
+
+ override def setContentHandler(handler: ContentHandler): Unit =
+ getParent.setContentHandler(handler)
+ override def setDTDHandler(handler: DTDHandler): Unit =
+ getParent.setDTDHandler(handler)
+ override def setErrorHandler(handler: ErrorHandler): Unit =
+ getParent.setErrorHandler(handler)
+ override def getContentHandler(): ContentHandler =
+ getParent.getContentHandler()
+ override def getDTDHandler(): DTDHandler =
+ getParent.getDTDHandler()
+ override def getErrorHandler(): ErrorHandler =
+ getParent.getErrorHandler()
+ }
+
xr.setContentHandler(this)
- xr.setEntityResolver(res) // older API??
+ // older API, must not be called for validation to work, must use setPropery
bloew
+ // xr.setEntityResolver(res)
xr.setProperty("http://apache.org/xml/properties/internal/entity-resolver",
res)
- //
-
- override lazy val parser: SAXParser = p
var exceptionList: List[Exception] = Nil
+ xr.setErrorHandler(new org.xml.sax.ErrorHandler() {
+ def warning(exception: SAXParseException) = { exceptionList :+= exception }
+ def error(exception: SAXParseException) = { exceptionList :+= exception }
+ def fatalError(exception: SAXParseException) = { exceptionList :+=
exception }
+ })
+
+ override lazy val parser: SAXParser = p
+ override lazy val reader: XMLReader = xr
/**
- * Called by all the load(...) methods to actually do the loading.
- *
- * @param source
- * @param ignored
- * @return the scala.xml.Node loaded that is the document element of the
loaded source.
+ * Scala-XML creates its own adapter and calls the loadDocument function on
that. We want it
+ * to use this custom adpater, so we must override it to point to this
*/
- override def loadXML(source: InputSource, ignored: SAXParser): Node = {
- val xr = parser.getXMLReader()
- xr.setErrorHandler(new org.xml.sax.ErrorHandler() {
-
- def warning(exception: SAXParseException) = {
- // System.err.println(exception.getMessage())
- exceptionList :+= exception
- }
-
- def error(exception: SAXParseException) = {
- // System.err.println("Error: " + exception.getMessage())
- exceptionList :+= exception
- }
- def fatalError(exception: SAXParseException) = {
- // System.err.println(exception.getMessage())
- exceptionList :+= exception
- }
- })
-
- // validation occurs during the loading process because
- // we set the feature requiring it above where the parser is constructed.
-
- xr.parse(source)
- return rootElem.asInstanceOf[Elem]
- }
+ override def adapter = this
}
/**
@@ -501,16 +515,17 @@ class MyResolver()
with org.w3c.dom.ls.LSResourceResolver
with org.xml.sax.EntityResolver
with org.xml.sax.ext.EntityResolver2 {
+
val cm = new CatalogManager()
val catFiles = cm.getCatalogFiles().toArray.toList.asInstanceOf[List[String]]
// println("catalog files: " + catFiles)
- cm.setIgnoreMissingProperties(false)
+ cm.setIgnoreMissingProperties(true)
cm.setRelativeCatalogs(true)
// cm.setVerbosity(4)
// cm.debug.setDebug(100)
val delegate = // new org.apache.xerces.util.XMLCatalogResolver() // cl)
new Catalog(cm) {
- // catalogManager.debug.setDebug(100)
+ // cm.debug.setDebug(100)
}
delegate.setupReaders()
delegate.loadSystemCatalogs()
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index ca60197cb..70b54b829 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -23,7 +23,7 @@ object Dependencies {
lazy val core = Seq(
"com.lihaoyi" %% "os-lib" % "0.9.1", // for writing/compiling C source
files
- "org.scala-lang.modules" %% "scala-xml" % "2.1.0",
+ "org.scala-lang.modules" %% "scala-xml" % "2.2.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "2.3.0",
"com.ibm.icu" % "icu4j" % "73.2",
"xerces" % "xercesImpl" % "2.12.2",