mbeckerle commented on a change in pull request #560:
URL: https://github.com/apache/daffodil/pull/560#discussion_r633909836



##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
##########
@@ -538,47 +401,226 @@ trait SchemaAwareLoaderMixin {
  * do any validation either), however, once Daffodil starts processing the
  * DFDL schema nodes, it resolves references using the same one true XML 
catalog resolver.
  */
-class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) {
+class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
+  extends NoBindingFactoryAdapter {
 
   def this() = this(RethrowSchemaErrorHandler)
 
-  def xercesAdapter = new DFDLXercesAdapter(errorHandler)
+  private def resolver = DFDLCatalogResolver.get
 
-  //
-  // Controls whether we setup Xerces for validation or not.
-  //
-  final var doValidation: Boolean = true
+  /**
+   * UPA errors are detected by xerces if the schema-full-checking feature is
+   * turned on, AND if you inform xerces that it is reading an XML Schema
+   * (i.e., xsd).
+   *
+   * Detecting these requires that we do THREE passes
+   * 1) load the DFDL schema as an XML document. This validates it against the 
XML Schema
+   * for DFDL schemas.
+   * 2) load the DFDL schema as an XSD - xerces then does lots of more 
intensive checking
+   * of the schema
+   * 3) load the schema for our own consumption by Daffodil code. This uses the
+   * constructing parser so as to preserve CDATA regions (xerces just does the 
wrong
+   * thing with those,...fatally so). Then our own semantic checking is 
performed
+   * as part of compiling the DFDL schema.
+   *
+   * Checks like UPA are in step (2) above. They are coded algorithmically
+   * right into Xerces. This is accomplished by
+   * using the below SchemaFactory and SchemaFactory.newSchema calls.  The
+   * newSchema call is what forces schema validation to take place.
+   */
+  private lazy val schemaFactory = {
+    val sf = new org.apache.xerces.jaxp.validation.XMLSchemaFactory()
+    sf.setResourceResolver(resolver)
+    sf.setErrorHandler(errorHandler)
+    sf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    sf.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
+    // These are not recognized by a schemaFactory
+    // sf.setFeature("http://xml.org/sax/features/validation";, true)
+    // sf.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    // 
sf.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    sf
+  }
+
+  /**
+   * This loads the DFDL schema as an XML Schema. This will
+   * check many more things (ex: UPA) about the DFDL schema other than
+   * just whether it validates against the XML Schema for DFDL schemas.
+   *
+   * Unfortunately, we don't have control over how Xerces loads up these 
schemas
+   * (other than the resolver anyway), so we can't really redirect the way
+   * it issues error messages so that it properly lays blame at say, the 
schema fragments
+   * inside an embedded schema of a TDML file.
+   *
+   * So if we want good file/line/column info from this, we have to give
+   * it a plain old file or resource, and not try to play games to get it to
+   * pick up the file/line/col information from attributes of the elements.
+   *
+   * Due to limitations in the xerces newSchema() method
+   * this method should be called only after loading
+   * the schema as a regular XML file, which itself insists
+   * on the XMLUtils.setSecureDefaults, so we don't need to
+   * further check that here.
+   */
+  def validateAsXMLSchema(source: DaffodilSchemaSource): Unit = {
+    // first we load it, with validation explicitly against the
+    // schema for DFDL Schemas.
+    load(source, Some(XMLUtils.schemaForDFDLSchemas), addPositionAttributes = 
true)
+    //
+    // Then we validate explicitly so Xerces can check things
+    // such as for UPA violations
+    //
+    val inputSource = source.newInputSource()
+    val saxSource = new SAXSource(inputSource)
+    //
+    // We would like this saxSource to be created from an XMLReader
+    // so that we can call XMLUtils.setSecureDefaults on it.
+    // but we get strange errors if I do that, where every symbol
+    // in the schema has an unrecognized namespace prefix.
+    //
+    schemaFactory.newSchema(saxSource)
+    inputSource.getByteStream().close()
+  }
 
-  def setValidation(flag: Boolean): Unit = {
-    doValidation = flag
+  // $COVERAGE-OFF$
+  override def parser = {
+    Assert.usageError("not to be called.")
   }
+  // $COVERAGE-ON$
 
   /**
+   * Obtain and initialize parser which validates the schema is defined.
+   */
+  private def parserFromURI(optSchemaURI: Option[URI]): SAXParser = {
+    if (optSchemaURI.isEmpty) noSchemaParser
+    else {
+      val f = parserFactory()
+      val schema = schemaFromURI(optSchemaURI)
+      f.setSchema(schema)
+      parserFromFactory(f)
+    }
+  }
+
+  private def schemaFromURI(optSchemaURI: Option[URI]): Schema = {
+    val sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
+    sf.setErrorHandler(errorHandler)
+    sf.setResourceResolver(resolver)
+    val schema = sf.newSchema(new StreamSource(optSchemaURI.get.toString))
+    schema
+  }
+
+  private def parserFactory() = {
+    val f = new SAXParserFactoryImpl
+    f.setNamespaceAware(true)
+    f.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    f.setValidating(false)// according to javadoc, just controls DTD validation
+    f.setFeature("http://xml.org/sax/features/validation";, true)
+    // not recognized by SAXParserFactory
+    // f.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    f.setFeature("http://apache.org/xml/features/honour-all-schemaLocations";, 
true)
+    f.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    
f.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    f
+  }
+
+  private lazy val noSchemaParser: SAXParser = {
+    parserFromFactory(parserFactory())
+  }
+
+  private def parserFromFactory(f: SAXParserFactory) = {
+    val p = f.newSAXParser()
+    //
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAvMESPACES_FEATURE, true)
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val xrdr = p.getXMLReader()
+    XMLUtils.setSecureDefaults(xrdr)
+    xrdr.setErrorHandler(errorHandler)
+    // not recognized by XMLReader
+    // xrdr.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    xrdr.setContentHandler(this)
+    //
+    // This is required to get the parse to really use our resolver.
+    // The setEntityResolver(resolver) does not work.
+    //
+    
xrdr.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
resolver)
+    p
+  }
+
+  /**
+   * This is the common routine called by all the load calls to actually
+   * carry out the loading of the schema.
+   *
+   * There are no calls to this in Daffodil code base as of this writing, but
+   * the base class scala.xml.factory.XMLLoader calls it.
+   *
    * Does (optional) validation,
+   *
+   * @param source The URI for the XML document which may be a XML or DFDL 
schema, or just XML data.
+   * @param optSchemaURI Optional URI for XML schema for the XML source 
document.
+   * @param addPositionAttributes True to add dafint:file dafint:line 
attributes to all elements.
+   *                              Defaults to false.
+   * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. 
This should usually be true,
+   *                          but some special case situations may require 
preservation of CRLF/CR.
+   * @return an scala.xml.Node (Element actually) which is the document 
element of the source.
    */
-  def load(source: DaffodilSchemaSource): scala.xml.Node = {
-    if (doValidation) {
-      val inputSource = source.newInputSource()
-      val xercesNode = xercesAdapter.load(inputSource) // validates
-      inputSource.getByteStream().close()
-
-      if (xercesNode == null) return null
-      // Note: we don't call xercesAdapter.validateSchema(source)
-      // here, because this is an XML loader, not necessarily
-      // just a DFDL schema loader. So for example the doValidation flag
-      // above could be telling us to validate a TDML file or not.
+  def load(source: DaffodilSchemaSource,
+    optSchemaURI: Option[URI],
+    addPositionAttributes: Boolean = false,
+    normalizeCRLFtoLF: Boolean = true): scala.xml.Node = {
+    //
+    // First we invoke the validator to explicitly validate the XML against
+    // the XML Schema (not necessarily a DFDL schema), via the
+    // javax.xml.validation.Validator's validate method.
+    //
+    optSchemaURI.foreach { schemaURI =>
+
+      val validator = XercesValidator.fromURIs(Seq(schemaURI))
+      val inputStream = source.uriForLoading.toURL.openStream()
+      validator.validateXML(inputStream, errorHandler)
+      inputStream.close()
+      //
+      // Next we have to invoke a regular xerces loader, setup for validation
+      // because that will actually interpret things like xsi:schemaLocation 
attributes
+      // of the root element.
+      //
+      // scopeStack.push(TopScope) // not in scala xml v2.0.0
+      val parser = parserFromURI(optSchemaURI)
+      val xrdr = parser.getXMLReader()
+      val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
+      // parser.parse(source.uriForLoading.toURL.openStream(), this)
+      xrdr.parse(saxSource)
+      // scopeStack.pop() // not in scala xml v2.0.0

Review comment:
       It would be good to document exactly what additional errors this second 
validation catches that the first validation does not. Without this numerous 
negative tests failed. 
   
   At least do this: comment out this check, and list what the tests are that 
fail. 
   

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
##########
@@ -538,47 +401,226 @@ trait SchemaAwareLoaderMixin {
  * do any validation either), however, once Daffodil starts processing the
  * DFDL schema nodes, it resolves references using the same one true XML 
catalog resolver.
  */
-class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) {
+class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
+  extends NoBindingFactoryAdapter {
 
   def this() = this(RethrowSchemaErrorHandler)
 
-  def xercesAdapter = new DFDLXercesAdapter(errorHandler)
+  private def resolver = DFDLCatalogResolver.get
 
-  //
-  // Controls whether we setup Xerces for validation or not.
-  //
-  final var doValidation: Boolean = true
+  /**
+   * UPA errors are detected by xerces if the schema-full-checking feature is
+   * turned on, AND if you inform xerces that it is reading an XML Schema
+   * (i.e., xsd).
+   *
+   * Detecting these requires that we do THREE passes
+   * 1) load the DFDL schema as an XML document. This validates it against the 
XML Schema
+   * for DFDL schemas.
+   * 2) load the DFDL schema as an XSD - xerces then does lots of more 
intensive checking
+   * of the schema
+   * 3) load the schema for our own consumption by Daffodil code. This uses the
+   * constructing parser so as to preserve CDATA regions (xerces just does the 
wrong
+   * thing with those,...fatally so). Then our own semantic checking is 
performed
+   * as part of compiling the DFDL schema.
+   *
+   * Checks like UPA are in step (2) above. They are coded algorithmically
+   * right into Xerces. This is accomplished by
+   * using the below SchemaFactory and SchemaFactory.newSchema calls.  The
+   * newSchema call is what forces schema validation to take place.
+   */
+  private lazy val schemaFactory = {
+    val sf = new org.apache.xerces.jaxp.validation.XMLSchemaFactory()
+    sf.setResourceResolver(resolver)
+    sf.setErrorHandler(errorHandler)
+    sf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    sf.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
+    // These are not recognized by a schemaFactory
+    // sf.setFeature("http://xml.org/sax/features/validation";, true)
+    // sf.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    // 
sf.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    sf
+  }
+
+  /**
+   * This loads the DFDL schema as an XML Schema. This will
+   * check many more things (ex: UPA) about the DFDL schema other than
+   * just whether it validates against the XML Schema for DFDL schemas.
+   *
+   * Unfortunately, we don't have control over how Xerces loads up these 
schemas
+   * (other than the resolver anyway), so we can't really redirect the way
+   * it issues error messages so that it properly lays blame at say, the 
schema fragments
+   * inside an embedded schema of a TDML file.
+   *
+   * So if we want good file/line/column info from this, we have to give
+   * it a plain old file or resource, and not try to play games to get it to
+   * pick up the file/line/col information from attributes of the elements.
+   *
+   * Due to limitations in the xerces newSchema() method
+   * this method should be called only after loading
+   * the schema as a regular XML file, which itself insists
+   * on the XMLUtils.setSecureDefaults, so we don't need to
+   * further check that here.
+   */
+  def validateAsXMLSchema(source: DaffodilSchemaSource): Unit = {
+    // first we load it, with validation explicitly against the
+    // schema for DFDL Schemas.
+    load(source, Some(XMLUtils.schemaForDFDLSchemas), addPositionAttributes = 
true)
+    //
+    // Then we validate explicitly so Xerces can check things
+    // such as for UPA violations
+    //
+    val inputSource = source.newInputSource()
+    val saxSource = new SAXSource(inputSource)
+    //
+    // We would like this saxSource to be created from an XMLReader
+    // so that we can call XMLUtils.setSecureDefaults on it.
+    // but we get strange errors if I do that, where every symbol
+    // in the schema has an unrecognized namespace prefix.
+    //
+    schemaFactory.newSchema(saxSource)
+    inputSource.getByteStream().close()
+  }
 
-  def setValidation(flag: Boolean): Unit = {
-    doValidation = flag
+  // $COVERAGE-OFF$
+  override def parser = {
+    Assert.usageError("not to be called.")
   }
+  // $COVERAGE-ON$
 
   /**
+   * Obtain and initialize parser which validates the schema is defined.
+   */
+  private def parserFromURI(optSchemaURI: Option[URI]): SAXParser = {
+    if (optSchemaURI.isEmpty) noSchemaParser
+    else {
+      val f = parserFactory()
+      val schema = schemaFromURI(optSchemaURI)
+      f.setSchema(schema)
+      parserFromFactory(f)
+    }
+  }
+
+  private def schemaFromURI(optSchemaURI: Option[URI]): Schema = {
+    val sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
+    sf.setErrorHandler(errorHandler)
+    sf.setResourceResolver(resolver)
+    val schema = sf.newSchema(new StreamSource(optSchemaURI.get.toString))
+    schema
+  }
+
+  private def parserFactory() = {
+    val f = new SAXParserFactoryImpl
+    f.setNamespaceAware(true)
+    f.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    f.setValidating(false)// according to javadoc, just controls DTD validation
+    f.setFeature("http://xml.org/sax/features/validation";, true)
+    // not recognized by SAXParserFactory
+    // f.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    f.setFeature("http://apache.org/xml/features/honour-all-schemaLocations";, 
true)
+    f.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    
f.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    f
+  }
+
+  private lazy val noSchemaParser: SAXParser = {
+    parserFromFactory(parserFactory())
+  }
+
+  private def parserFromFactory(f: SAXParserFactory) = {
+    val p = f.newSAXParser()
+    //
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAvMESPACES_FEATURE, true)
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val xrdr = p.getXMLReader()
+    XMLUtils.setSecureDefaults(xrdr)
+    xrdr.setErrorHandler(errorHandler)
+    // not recognized by XMLReader
+    // xrdr.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    xrdr.setContentHandler(this)
+    //
+    // This is required to get the parse to really use our resolver.
+    // The setEntityResolver(resolver) does not work.
+    //
+    
xrdr.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
resolver)
+    p
+  }
+
+  /**
+   * This is the common routine called by all the load calls to actually
+   * carry out the loading of the schema.
+   *
+   * There are no calls to this in Daffodil code base as of this writing, but
+   * the base class scala.xml.factory.XMLLoader calls it.
+   *
    * Does (optional) validation,
+   *
+   * @param source The URI for the XML document which may be a XML or DFDL 
schema, or just XML data.
+   * @param optSchemaURI Optional URI for XML schema for the XML source 
document.
+   * @param addPositionAttributes True to add dafint:file dafint:line 
attributes to all elements.
+   *                              Defaults to false.
+   * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. 
This should usually be true,
+   *                          but some special case situations may require 
preservation of CRLF/CR.
+   * @return an scala.xml.Node (Element actually) which is the document 
element of the source.
    */
-  def load(source: DaffodilSchemaSource): scala.xml.Node = {
-    if (doValidation) {
-      val inputSource = source.newInputSource()
-      val xercesNode = xercesAdapter.load(inputSource) // validates
-      inputSource.getByteStream().close()
-
-      if (xercesNode == null) return null
-      // Note: we don't call xercesAdapter.validateSchema(source)
-      // here, because this is an XML loader, not necessarily
-      // just a DFDL schema loader. So for example the doValidation flag
-      // above could be telling us to validate a TDML file or not.
+  def load(source: DaffodilSchemaSource,
+    optSchemaURI: Option[URI],
+    addPositionAttributes: Boolean = false,
+    normalizeCRLFtoLF: Boolean = true): scala.xml.Node = {
+    //
+    // First we invoke the validator to explicitly validate the XML against
+    // the XML Schema (not necessarily a DFDL schema), via the
+    // javax.xml.validation.Validator's validate method.
+    //
+    optSchemaURI.foreach { schemaURI =>
+
+      val validator = XercesValidator.fromURIs(Seq(schemaURI))
+      val inputStream = source.uriForLoading.toURL.openStream()
+      validator.validateXML(inputStream, errorHandler)
+      inputStream.close()
+      //
+      // Next we have to invoke a regular xerces loader, setup for validation
+      // because that will actually interpret things like xsi:schemaLocation 
attributes
+      // of the root element.
+      //
+      // scopeStack.push(TopScope) // not in scala xml v2.0.0
+      val parser = parserFromURI(optSchemaURI)
+      val xrdr = parser.getXMLReader()
+      val saxSource = scala.xml.Source.fromSysId(source.uriForLoading.toString)
+      // parser.parse(source.uriForLoading.toURL.openStream(), this)
+      xrdr.parse(saxSource)
+      // scopeStack.pop() // not in scala xml v2.0.0
+      // no result, as the errors are reported separately
     }
     //
     // To get reliable xml nodes including conversion of CDATA syntax into
     // PCData nodes, we have to use a different loader.
     //
-    val constructingLoader = new 
DaffodilConstructingLoader(source.uriForLoading, errorHandler)
+    val constructingLoader =
+      new DaffodilConstructingLoader(source.uriForLoading,
+        errorHandler, addPositionAttributes, normalizeCRLFtoLF)
     val res = constructingLoader.load() // construct the XML objects for us.
     constructingLoader.input.close()
     res
   }
 
-  def validateSchema(source: DaffodilSchemaSource) = 
xercesAdapter.validateSchema(source)
+  // We disallow any of these to force us of our own
+  // load(source, optSchemaURI) entry point.
+  // $COVERAGE-OFF$
+  private def noWay = Assert.usageError("Operation is not supported. Use 
load(uri) or loadFile(file)")

Review comment:
       These are all inherited methods. Not clear we need the base/trait they 
come from. 

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
##########
@@ -538,47 +401,226 @@ trait SchemaAwareLoaderMixin {
  * do any validation either), however, once Daffodil starts processing the
  * DFDL schema nodes, it resolves references using the same one true XML 
catalog resolver.
  */
-class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) {
+class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
+  extends NoBindingFactoryAdapter {

Review comment:
       If we can avoid mixing this in, we should do so, because there are many 
non-usable methods on this class. 

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
##########
@@ -538,47 +401,226 @@ trait SchemaAwareLoaderMixin {
  * do any validation either), however, once Daffodil starts processing the
  * DFDL schema nodes, it resolves references using the same one true XML 
catalog resolver.
  */
-class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler) {
+class DaffodilXMLLoader(val errorHandler: org.xml.sax.ErrorHandler)
+  extends NoBindingFactoryAdapter {
 
   def this() = this(RethrowSchemaErrorHandler)
 
-  def xercesAdapter = new DFDLXercesAdapter(errorHandler)
+  private def resolver = DFDLCatalogResolver.get
 
-  //
-  // Controls whether we setup Xerces for validation or not.
-  //
-  final var doValidation: Boolean = true
+  /**
+   * UPA errors are detected by xerces if the schema-full-checking feature is
+   * turned on, AND if you inform xerces that it is reading an XML Schema
+   * (i.e., xsd).
+   *
+   * Detecting these requires that we do THREE passes
+   * 1) load the DFDL schema as an XML document. This validates it against the 
XML Schema
+   * for DFDL schemas.
+   * 2) load the DFDL schema as an XSD - xerces then does lots of more 
intensive checking
+   * of the schema
+   * 3) load the schema for our own consumption by Daffodil code. This uses the
+   * constructing parser so as to preserve CDATA regions (xerces just does the 
wrong
+   * thing with those,...fatally so). Then our own semantic checking is 
performed
+   * as part of compiling the DFDL schema.
+   *
+   * Checks like UPA are in step (2) above. They are coded algorithmically
+   * right into Xerces. This is accomplished by
+   * using the below SchemaFactory and SchemaFactory.newSchema calls.  The
+   * newSchema call is what forces schema validation to take place.
+   */
+  private lazy val schemaFactory = {
+    val sf = new org.apache.xerces.jaxp.validation.XMLSchemaFactory()
+    sf.setResourceResolver(resolver)
+    sf.setErrorHandler(errorHandler)
+    sf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    sf.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
+    // These are not recognized by a schemaFactory
+    // sf.setFeature("http://xml.org/sax/features/validation";, true)
+    // sf.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    // 
sf.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    // sf.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    sf
+  }
+
+  /**
+   * This loads the DFDL schema as an XML Schema. This will
+   * check many more things (ex: UPA) about the DFDL schema other than
+   * just whether it validates against the XML Schema for DFDL schemas.
+   *
+   * Unfortunately, we don't have control over how Xerces loads up these 
schemas
+   * (other than the resolver anyway), so we can't really redirect the way
+   * it issues error messages so that it properly lays blame at say, the 
schema fragments
+   * inside an embedded schema of a TDML file.
+   *
+   * So if we want good file/line/column info from this, we have to give
+   * it a plain old file or resource, and not try to play games to get it to
+   * pick up the file/line/col information from attributes of the elements.
+   *
+   * Due to limitations in the xerces newSchema() method
+   * this method should be called only after loading
+   * the schema as a regular XML file, which itself insists
+   * on the XMLUtils.setSecureDefaults, so we don't need to
+   * further check that here.
+   */
+  def validateAsXMLSchema(source: DaffodilSchemaSource): Unit = {
+    // first we load it, with validation explicitly against the
+    // schema for DFDL Schemas.
+    load(source, Some(XMLUtils.schemaForDFDLSchemas), addPositionAttributes = 
true)
+    //
+    // Then we validate explicitly so Xerces can check things
+    // such as for UPA violations
+    //
+    val inputSource = source.newInputSource()
+    val saxSource = new SAXSource(inputSource)
+    //
+    // We would like this saxSource to be created from an XMLReader
+    // so that we can call XMLUtils.setSecureDefaults on it.
+    // but we get strange errors if I do that, where every symbol
+    // in the schema has an unrecognized namespace prefix.
+    //
+    schemaFactory.newSchema(saxSource)
+    inputSource.getByteStream().close()
+  }
 
-  def setValidation(flag: Boolean): Unit = {
-    doValidation = flag
+  // $COVERAGE-OFF$
+  override def parser = {
+    Assert.usageError("not to be called.")
   }
+  // $COVERAGE-ON$
 
   /**
+   * Obtain and initialize parser which validates the schema is defined.
+   */
+  private def parserFromURI(optSchemaURI: Option[URI]): SAXParser = {
+    if (optSchemaURI.isEmpty) noSchemaParser
+    else {
+      val f = parserFactory()
+      val schema = schemaFromURI(optSchemaURI)
+      f.setSchema(schema)
+      parserFromFactory(f)
+    }
+  }
+
+  private def schemaFromURI(optSchemaURI: Option[URI]): Schema = {
+    val sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
+    sf.setErrorHandler(errorHandler)
+    sf.setResourceResolver(resolver)
+    val schema = sf.newSchema(new StreamSource(optSchemaURI.get.toString))
+    schema
+  }
+
+  private def parserFactory() = {
+    val f = new SAXParserFactoryImpl
+    f.setNamespaceAware(true)
+    f.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    f.setValidating(false)// according to javadoc, just controls DTD validation
+    f.setFeature("http://xml.org/sax/features/validation";, true)
+    // not recognized by SAXParserFactory
+    // f.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    f.setFeature("http://apache.org/xml/features/honour-all-schemaLocations";, 
true)
+    f.setFeature("http://apache.org/xml/features/validation/schema";, true)
+    
f.setFeature("http://apache.org/xml/features/validation/schema-full-checking";, 
true)
+    f
+  }
+
+  private lazy val noSchemaParser: SAXParser = {
+    parserFromFactory(parserFactory())
+  }
+
+  private def parserFromFactory(f: SAXParserFactory) = {
+    val p = f.newSAXParser()
+    //
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAvMESPACES_FEATURE, true)
+    // Not allowed on a SAXParser
+    // p.setProperty(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val xrdr = p.getXMLReader()
+    XMLUtils.setSecureDefaults(xrdr)
+    xrdr.setErrorHandler(errorHandler)
+    // not recognized by XMLReader
+    // xrdr.setFeature("http://xml.org/sax/features/validation/dynamic";, true)
+    xrdr.setContentHandler(this)
+    //
+    // This is required to get the parse to really use our resolver.
+    // The setEntityResolver(resolver) does not work.
+    //
+    
xrdr.setProperty("http://apache.org/xml/properties/internal/entity-resolver";, 
resolver)
+    p
+  }
+
+  /**
+   * This is the common routine called by all the load calls to actually
+   * carry out the loading of the schema.
+   *
+   * There are no calls to this in Daffodil code base as of this writing, but
+   * the base class scala.xml.factory.XMLLoader calls it.
+   *
    * Does (optional) validation,
+   *
+   * @param source The URI for the XML document which may be a XML or DFDL 
schema, or just XML data.
+   * @param optSchemaURI Optional URI for XML schema for the XML source 
document.
+   * @param addPositionAttributes True to add dafint:file dafint:line 
attributes to all elements.
+   *                              Defaults to false.
+   * @param normalizeCRLFtoLF True to normalize CRLF and isolated CR to LF. 
This should usually be true,
+   *                          but some special case situations may require 
preservation of CRLF/CR.
+   * @return an scala.xml.Node (Element actually) which is the document 
element of the source.
    */
-  def load(source: DaffodilSchemaSource): scala.xml.Node = {
-    if (doValidation) {
-      val inputSource = source.newInputSource()
-      val xercesNode = xercesAdapter.load(inputSource) // validates
-      inputSource.getByteStream().close()
-
-      if (xercesNode == null) return null
-      // Note: we don't call xercesAdapter.validateSchema(source)
-      // here, because this is an XML loader, not necessarily
-      // just a DFDL schema loader. So for example the doValidation flag
-      // above could be telling us to validate a TDML file or not.
+  def load(source: DaffodilSchemaSource,
+    optSchemaURI: Option[URI],
+    addPositionAttributes: Boolean = false,
+    normalizeCRLFtoLF: Boolean = true): scala.xml.Node = {
+    //

Review comment:
       Note @jw3 we're using the XercesValidator that is built in, here so that 
we have only one instance of Xerces validation now. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to