This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 6232fd176 Update scala-xml to 2.2.0
6232fd176 is described below

commit 6232fd176b2c31b09a1ae9ea75b2c910babf3590
Author: Scala Steward <[email protected]>
AuthorDate: Mon Jul 24 15:14:26 2023 -0400

    Update scala-xml to 2.2.0
    
    Update code to work with changes to the scala-xml library. This also
    includes removing a workaround for an infinite loop bug in 
reportSyntaxError.
---
 .../lib/xml/DaffodilConstructingLoader.scala       | 10 ---
 .../daffodil/lib/xml/DaffodilXMLLoader.scala       | 72 +++++++++++++----
 .../lib/util/TestXMLCatalogAndValidate.scala       | 91 +++++++++++++---------
 project/Dependencies.scala                         |  2 +-
 4 files changed, 111 insertions(+), 64 deletions(-)

diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
index 03e0bb895..66ee620e2 100644
--- 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
+++ 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilConstructingLoader.scala
@@ -172,16 +172,6 @@ class DaffodilConstructingLoader private[xml] (
   override def reportSyntaxError(pos: Int, msg: String): Unit = {
     val exc = makeSAXParseException(pos, msg)
     errorHandler.fatalError(exc)
-    if (msg == "'<' not allowed in attrib value") {
-      // DAFFODIL-2586
-      // There is a bug in scala-xml which causes an infinite loop when
-      // this error condition is reached. The loop expects the scanner
-      // to advance but in this case it does not. When this error is
-      // seen, an exception needs to be thrown to stop processing.
-      //
-      // See: 
https://github.com/scala/scala-xml/blob/v2.1.0/shared/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala#L67-L72
-      throw exc
-    }
   }
 
   /*
diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
index f2d7b9126..942d7bce2 100644
--- 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
+++ 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/DaffodilXMLLoader.scala
@@ -36,7 +36,6 @@ import javax.xml.transform.stream.StreamSource
 import javax.xml.validation.Schema
 import javax.xml.validation.SchemaFactory
 import scala.collection.JavaConverters.asScalaBufferConverter
-import scala.xml.InputSource
 import scala.xml.SAXParseException
 import scala.xml.SAXParser
 import scala.xml.parsing.NoBindingFactoryAdapter
@@ -52,6 +51,13 @@ import org.apache.xerces.xni.parser.XMLInputSource
 import org.apache.xml.resolver.Catalog
 import org.apache.xml.resolver.CatalogManager
 import org.w3c.dom.ls.LSInput
+import org.xml.sax.ContentHandler
+import org.xml.sax.DTDHandler
+import org.xml.sax.EntityResolver
+import org.xml.sax.ErrorHandler
+import org.xml.sax.InputSource
+import org.xml.sax.XMLReader
+import org.xml.sax.helpers.XMLFilterImpl
 
 /**
  * Resolves URI/URL/URNs to loadable files/streams.
@@ -561,10 +567,18 @@ class DaffodilXMLLoader(val errorHandler: 
org.xml.sax.ErrorHandler)
 
   }
 
-  // $COVERAGE-OFF$
+  // $COVERAGE-OFF$ These three functions should only be used if someone calls 
one of the
+  // Scala-XML load* functions. Only our custom load() functions should be 
used, which ensures
+  // hat correct parses/readers are used
   override def parser = {
     Assert.usageError("not to be called.")
   }
+  override def reader = {
+    Assert.usageError("not to be called.")
+  }
+  override def adapter = {
+    Assert.usageError("not to be called.")
+  }
   // $COVERAGE-ON$
 
   /**
@@ -609,21 +623,10 @@ class DaffodilXMLLoader(val errorHandler: 
org.xml.sax.ErrorHandler)
 
   private def parserFromFactory(f: SAXParserFactory) = {
     val p = f.newSAXParser()
-    //
     // Not allowed on a SAXParser
     // p.setProperty(XMLUtils.SAX_NAvMESPACES_FEATURE, true)
     // Not allowed on a SAXParser
     // p.setProperty(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
-    val xrdr = p.getXMLReader()
-    xrdr.setErrorHandler(errorHandler)
-    // not recognized by XMLReader
-    // xrdr.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
-    xrdr.setContentHandler(this)
-    //
-    // This is required to get the parse to really use our resolver.
-    // The setEntityResolver(resolver) does not work.
-    //
-    
xrdr.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
resolver)
     p
   }
 
@@ -685,10 +688,49 @@ class DaffodilXMLLoader(val errorHandler: 
org.xml.sax.ErrorHandler)
       // this writing, we have no tests that use that.
       //
       val parser = parserFromURI(optSchemaURI)
-      val xrdr = parser.getXMLReader()
+      val xrdr: XMLReader = {
+        val r = parser.getXMLReader()
+
+        // We must use XMLReader setProperty() function to set the entity 
resolver--calling
+        // setEntityResolver with the Xerces XML reader causes validation to 
fail for some
+        // reason. We call the right function below, but unfortunately, 
scala-xml calls
+        // setEntityResolver in loadDocument(), which cannot be disabled and 
scala-xml does not
+        // want to change. To avoid this, we wrap the Xerces XMLReader in an 
XMLFilterImpl and
+        // override setEntityResolver to a no-op. However, XMLFilterImpl 
parse() calls
+        // setEntityResolver() on the XMLReader, which for the same reason as 
before causes
+        // issues. To fix this, we can override parse() to just pass through 
to the parent, but
+        // that means we must override the various set/get handler functions 
to also pass
+        // through to the parent.
+        val w = new XMLFilterImpl(r) {
+          override def setEntityResolver(resolver: EntityResolver): Unit = {} 
// no-op
+          override def parse(input: InputSource): Unit = getParent.parse(input)
+
+          override def setContentHandler(handler: ContentHandler): Unit =
+            getParent.setContentHandler(handler)
+          override def setDTDHandler(handler: DTDHandler): Unit =
+            getParent.setDTDHandler(handler)
+          override def setErrorHandler(handler: ErrorHandler): Unit =
+            getParent.setErrorHandler(handler)
+          override def getContentHandler(): ContentHandler =
+            getParent.getContentHandler()
+          override def getDTDHandler(): DTDHandler =
+            getParent.getDTDHandler()
+          override def getErrorHandler(): ErrorHandler =
+            getParent.getErrorHandler()
+        }
+        w.setErrorHandler(errorHandler)
+        
w.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
resolver)
+        w
+      }
+
       val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
       try {
-        xrdr.parse(saxSource)
+        // it is important that we call loadDocument to parse/validate the XML 
instead of
+        // directly calling xrdr.parse. Although loadDocument does eventually 
call xrdr.parse,
+        // it first modifies the reader in a number of ways to prepare it for 
use with this
+        // FactoryAdapter, as well as initialize private state that is used by 
ContentHandler
+        // functions.
+        loadDocument(saxSource, xrdr)
       } catch {
         // can be thrown by the resolver if a schemaLocation of
         // an import/include cannot be resolved.
diff --git 
a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
 
b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
index 59e87764f..3e1538cb8 100644
--- 
a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
+++ 
b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestXMLCatalogAndValidate.scala
@@ -24,7 +24,6 @@ import scala.collection.mutable
 import scala.language.reflectiveCalls
 import scala.xml.Attribute
 import scala.xml.Elem
-import scala.xml.InputSource
 import scala.xml.MetaData
 import scala.xml.NamespaceBinding
 import scala.xml.Node
@@ -45,6 +44,13 @@ import org.apache.xml.resolver.CatalogManager
 import org.junit.Assert.assertTrue
 import org.junit.Assert.fail
 import org.junit.Test
+import org.xml.sax.ContentHandler
+import org.xml.sax.DTDHandler
+import org.xml.sax.EntityResolver
+import org.xml.sax.ErrorHandler
+import org.xml.sax.InputSource
+import org.xml.sax.XMLReader
+import org.xml.sax.helpers.XMLFilterImpl
 
 object Implicits {
 
@@ -446,48 +452,56 @@ class SchemaAwareFactoryAdapter() extends 
NoBindingFactoryAdapter {
   f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
   f.setValidating(true)
   val p = f.newSAXParser()
-  val xr = p.getXMLReader()
+  val r = p.getXMLReader()
+
+  // We must use XMLReader setProperty() function to set the entity 
resolver--calling
+  // setEntityResolver with the Xerces XML reader causes validation to fail 
for some
+  // reason. We call the right function below, but unfortunately, scala-xml 
calls
+  // setEntityResolver in loadDocument(), which cannot be disabled and 
scala-xml does not
+  // want to change. To avoid this, we wrap the Xerces XMLReader in an 
XMLFilterImpl and
+  // override setEntityResolver to a no-op. However, XMLFilterImpl parse() 
calls
+  // setEntityResolver() on the XMLReader, which for the same reason as before 
causes
+  // issues. To fix this, we can override parse() to just pass through to the 
parent, but
+  // that means we must override the various set/get handler functions to also 
pass
+  // through to the parent.
+  val xr = new XMLFilterImpl(r) {
+    override def setEntityResolver(resolver: EntityResolver): Unit = {} // 
no-op
+    override def parse(input: InputSource): Unit = getParent.parse(input)
+
+    override def setContentHandler(handler: ContentHandler): Unit =
+      getParent.setContentHandler(handler)
+    override def setDTDHandler(handler: DTDHandler): Unit =
+      getParent.setDTDHandler(handler)
+    override def setErrorHandler(handler: ErrorHandler): Unit =
+      getParent.setErrorHandler(handler)
+    override def getContentHandler(): ContentHandler =
+      getParent.getContentHandler()
+    override def getDTDHandler(): DTDHandler =
+      getParent.getDTDHandler()
+    override def getErrorHandler(): ErrorHandler =
+      getParent.getErrorHandler()
+  }
+
   xr.setContentHandler(this)
-  xr.setEntityResolver(res) // older API??
+  // older API, must not be called for validation to work, must use setPropery 
bloew
+  // xr.setEntityResolver(res)
   xr.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
res)
-  //
-
-  override lazy val parser: SAXParser = p
 
   var exceptionList: List[Exception] = Nil
+  xr.setErrorHandler(new org.xml.sax.ErrorHandler() {
+    def warning(exception: SAXParseException) = { exceptionList :+= exception }
+    def error(exception: SAXParseException) = { exceptionList :+= exception }
+    def fatalError(exception: SAXParseException) = { exceptionList :+= 
exception }
+  })
+
+  override lazy val parser: SAXParser = p
+  override lazy val reader: XMLReader = xr
 
   /**
-   * Called by all the load(...) methods to actually do the loading.
-   *
-   * @param source
-   * @param ignored
-   * @return the scala.xml.Node loaded that is the document element of the 
loaded source.
+   * Scala-XML creates its own adapter and calls the loadDocument function on 
that. We want it
+   * to use this custom adpater, so we must override it to point to this
    */
-  override def loadXML(source: InputSource, ignored: SAXParser): Node = {
-    val xr = parser.getXMLReader()
-    xr.setErrorHandler(new org.xml.sax.ErrorHandler() {
-
-      def warning(exception: SAXParseException) = {
-        // System.err.println(exception.getMessage())
-        exceptionList :+= exception
-      }
-
-      def error(exception: SAXParseException) = {
-        // System.err.println("Error: " + exception.getMessage())
-        exceptionList :+= exception
-      }
-      def fatalError(exception: SAXParseException) = {
-        // System.err.println(exception.getMessage())
-        exceptionList :+= exception
-      }
-    })
-
-    // validation occurs during the loading process because
-    // we set the feature requiring it above where the parser is constructed.
-
-    xr.parse(source)
-    return rootElem.asInstanceOf[Elem]
-  }
+  override def adapter = this
 }
 
 /**
@@ -501,16 +515,17 @@ class MyResolver()
   with org.w3c.dom.ls.LSResourceResolver
   with org.xml.sax.EntityResolver
   with org.xml.sax.ext.EntityResolver2 {
+
   val cm = new CatalogManager()
   val catFiles = cm.getCatalogFiles().toArray.toList.asInstanceOf[List[String]]
   // println("catalog files: " + catFiles)
-  cm.setIgnoreMissingProperties(false)
+  cm.setIgnoreMissingProperties(true)
   cm.setRelativeCatalogs(true)
   // cm.setVerbosity(4)
   // cm.debug.setDebug(100)
   val delegate = // new org.apache.xerces.util.XMLCatalogResolver() // cl)
     new Catalog(cm) {
-      // catalogManager.debug.setDebug(100)
+      // cm.debug.setDebug(100)
     }
   delegate.setupReaders()
   delegate.loadSystemCatalogs()
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index ca60197cb..70b54b829 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -23,7 +23,7 @@ object Dependencies {
 
   lazy val core = Seq(
     "com.lihaoyi" %% "os-lib" % "0.9.1", // for writing/compiling C source 
files
-    "org.scala-lang.modules" %% "scala-xml" % "2.1.0",
+    "org.scala-lang.modules" %% "scala-xml" % "2.2.0",
     "org.scala-lang.modules" %% "scala-parser-combinators" % "2.3.0",
     "com.ibm.icu" % "icu4j" % "73.2",
     "xerces" % "xercesImpl" % "2.12.2",

Reply via email to