mbeckerle commented on a change in pull request #560:
URL: https://github.com/apache/daffodil/pull/560#discussion_r638807302



##########
File path: 
daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
##########
@@ -159,14 +163,18 @@ final class TDMLDFDLProcessorFactory private (
   override def getProcessor(
     schemaSource: DaffodilSchemaSource,
     useSerializedProcessor: Boolean,
-    optRootName: Option[String] = None,
-    optRootNamespace: Option[String] = None): TDML.CompileResult = {
+    optRootName: Option[String],
+    optRootNamespace: Option[String],
+    tunables: Map[String, String]): TDML.CompileResult = {

Review comment:
       Ticket created: https://issues.apache.org/jira/browse/DAFFODIL-2520

##########
File path: 
daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java
##########
@@ -48,9 +49,31 @@
 import org.apache.daffodil.japi.logger.ConsoleLogWriter;
 import org.apache.daffodil.japi.logger.LogLevel;
 import org.apache.daffodil.japi.io.InputSourceDataInputStream;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+
+import javax.xml.XMLConstants;

Review comment:
       XMLUtils is not for use in JAPI/SAPI, so we define a separate means to 
set secure defaults in these tests. 

##########
File path: 
daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java
##########
@@ -40,6 +40,7 @@
 import org.apache.commons.io.IOUtils;
 import org.apache.daffodil.japi.*;
 import org.apache.daffodil.japi.infoset.XMLTextInfosetOutputter;
+import org.apache.daffodil.xml.XMLUtils;

Review comment:
       removed

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/api/DaffodilSchemaSource.scala
##########
@@ -112,7 +118,7 @@ class URISchemaSource protected (val fileOrResource: URI) 
extends DaffodilSchema
 /**
  * For stdin, or other anonymous pipe-like source of schema.
  */
-case class InputStreamSchemaSource(is: java.io.InputStream, tmpDir: 
Option[File], blameName: String, extension: String) extends 
DaffodilSchemaSource {
+class InputStreamSchemaSource(is: java.io.InputStream, tmpDir: Option[File], 
blameName: String, extension: String) extends DaffodilSchemaSource {

Review comment:
       Needed to derive StringSchemaSource from it. 

##########
File path: 
daffodil-sapi/src/test/scala/org/apache/daffodil/example/TestScalaAPI.scala
##########
@@ -47,11 +47,33 @@ import 
org.apache.daffodil.sapi.DaffodilUnhandledSAXException
 import org.apache.daffodil.sapi.DaffodilUnparseErrorSAXException
 import org.apache.daffodil.sapi.SAXErrorHandlerForSAPITest
 import org.apache.daffodil.sapi.infoset.XMLTextInfosetOutputter
+import org.xml.sax.XMLReader
 
 import java.nio.ByteBuffer
+import javax.xml.XMLConstants
+
+object TestScalaAPI {
+  /**
+   * Best practices for XML loading are to turn off anything that could lead to
+   * insecurity.
+   *
+   * This is probably unnecessary in the case of these tests, but as these 
tests
+   * are also used to illustrate API usage, this exemplifies best practice.
+   */
+  def setSecureDefaults(xmlReader: XMLReader): Unit = {
+    xmlReader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+    // since we're not really sure what they mean by secure processing
+    // we make doubly sure by setting these ourselves also.
+    
xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true)
+    
xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities";, 
false)
+    
xmlReader.setFeature("http://xml.org/sax/features/external-general-entities";, 
false)
+  }
+}

Review comment:
       We're not factoring this out as part of our API. Hence this redundant 
code here, becauuse we're not providing this in an API-consumable method. 

##########
File path: 
daffodil-japi/src/test/java/org/apache/daffodil/example/TestJavaAPI.java
##########
@@ -48,9 +49,31 @@
 import org.apache.daffodil.japi.logger.ConsoleLogWriter;
 import org.apache.daffodil.japi.logger.LogLevel;
 import org.apache.daffodil.japi.io.InputSourceDataInputStream;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
+
+import javax.xml.XMLConstants;
 
 public class TestJavaAPI {
 
+    /**
+     * Best practices for XML loading are to turn off anything that could lead 
to
+     * insecurity.
+     *
+     * This is probably unnecessary in the case of these tests, but as these 
tests
+     * are also used to illustrate API usage, this exemplifies best practice.
+     */
+    public static void setSecureDefaults(XMLReader xmlReader)
+            throws SAXNotSupportedException, SAXNotRecognizedException {
+        xmlReader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        // since we're not really sure what they mean by secure processing
+        // we make doubly sure by setting these ourselves also.
+        
xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true);
+        
xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities";, 
false);
+        
xmlReader.setFeature("http://xml.org/sax/features/external-general-entities";, 
false);
+    }
+

Review comment:
       I would want to decompose daffodil-lib into smaller libraries - e.g., 
just the xml utils library, just the schema-compiler libraries (like the OOLAG 
stuff), etc. before making any of its features be part of a published API. 

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/cookers/EntityReplacer.scala
##########
@@ -493,24 +493,10 @@ sealed abstract class StringLiteralBase(propNameArg: 
String,
   extends AutoPropNameBase(propNameArg)
     with StringLiteralCookerMixin {
 
-  private val xmlEntityPattern = new Regex("""&(quot|amp|apos|lt|gt);""", 
"entity")
-
   /**
    * Thawed means XML entities have been replaced. So & is &, etc.
    */
-  private def thaw(raw: String) = {
-    val thawed: String = {
-      val res = xmlEntityPattern.replaceAllIn(raw, m => {
-        val sb = scala.xml.Utility.unescape(m.group("entity"), new 
StringBuilder())
-        // There really is no possibility for null to come back as we've made
-        // sure to only include valid xml entities in the xmlEntityPattern.
-        Assert.invariant(sb ne null)
-        sb.toString()
-      })
-      res
-    }
-    thawed
-  }
+  private def thaw(raw: String) = XMLUtils.unescape(raw)

Review comment:
       Just hoisting reusable XML utility into location where it can be reused. 
I needed to call this logic also from ScalaXMLInfosetInputter. 

##########
File path: 
daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseUnparseAPI.scala
##########
@@ -48,8 +47,7 @@ class TestSAXParseUnparseAPI {
     val pr = 
parseXMLReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
     assertTrue(!pr.isError)
     assertEquals(expectedInfoset, scala.xml.XML.loadString(baosParse.toString))
-
-    val unparseXMLReader = 
SAXParserFactory.newInstance().newSAXParser().getXMLReader
+    val unparseXMLReader = 
DaffodilSAXParserFactory().newSAXParser().getXMLReader

Review comment:
       So SAPI and JAPI get to create their own SAXParserFactory instances, SAX 
parsers, etc. We don't provide constructors for those to construct our 
secure-enforcing variants, but we include code in the SAPI/JAPI tests showing 
how to set things up to be secure.  This code is not shared with our XMLUtils, 
because XMLUtils is not part of our external API. 
   
   I have gone into Schematron, and updated it to use DaffodilSAXParserFactory, 
which removes another redundant set of "security parameterization". 

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -257,7 +257,7 @@
  *  WritableByteChannel wbc = java.nio.channels.Channels.newChannel(os);
  *  DaffodilUnparseContentHandler unparseContentHandler = 
dp.newContentHandlerInstance(wbc);
  *  try {
- *   XMLReader xmlReader = 
SAXParserFactory.newInstance().newSAXParser().getXMLReader();
+ *   XMLReader xmlReader = 
SAXParserFactory.newInstance().newSAXParser().getXMLReader;

Review comment:
       Will fix. I have never been a fan of  having code snippets in 
javadoc/scaladoc. It is almost guaranteed to not work. I prefer to put it into 
examples or tests, and put a URL in the javadoc/scaladoc referring people to 
that code. 

##########
File path: 
daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/Schematron.scala
##########
@@ -59,7 +59,9 @@ object Schematron {
           throw ValidatorInitializationException(s"Error setting feature on 
parser: ${ex.getMessage}")
       }
       f.setValidating(false)
-      f.newSAXParser.getXMLReader
+      val xr = f.newSAXParser.getXMLReader
+      XMLUtils.setSecureDefaults(xr)
+      xr

Review comment:
       Yes. Change made. 

##########
File path: 
daffodil-core/src/main/scala/org/apache/daffodil/dsom/DFDLSchemaFile.scala
##########
@@ -134,20 +134,17 @@ final class DFDLSchemaFile(
 
   lazy val isDFDLSchemaFile = iiXMLSchemaDocument.isDFDLSchema
 
+  private lazy val loader = new DaffodilXMLLoader(this)
+
   lazy val iiXMLSchemaDocument = LV('iiXMLSchemaDocument) {
     val res = loadXMLSchemaDocument(seenBefore, Some(this))
     if (res.isDFDLSchema && sset.validateDFDLSchemas) {
       //
       // We validate DFDL schemas, only if validation is requested.
       // Some things, tests generally, want to turn this validation off.
       //
-
-      val ldr = new DaffodilXMLLoader(this)
-      ldr.setValidation(true)
-      try {
-        ldr.load(schemaSource) // validate as XML file with XML Schema for 
DFDL Schemas
-        ldr.validateSchema(schemaSource) // validate as XSD (catches UPA 
errors for example)
-      } catch {
+      try loader.validateAsXMLSchema(schemaSource) // validate as XSD (catches 
UPA errors for example)
+      catch {

Review comment:
       There was different conditional logic, and at least two more distinct 
kinds of loaders (External Variables had its own, Config files had their own), 
which definitely did not do all the passes. Now we're doing them all uniformly 
if there is a schema provided and everything uses the same loader logic. For an 
XML file such as a TDML file, we now do (1) validateXML to validate it against 
its XSD (2) schema-aware load - which catches somewhat different things (at 
least one thing anyway) (3) the actual loading that creates the XML node.  For 
DFDL Schemas, there is an additional check which is constructing a Xerces 
newSchema() object, which checks things like the Unique Particle Attribution 
and other highly schema-specific things. 

##########
File path: project/Dependencies.scala
##########
@@ -48,7 +48,7 @@ object Dependencies {
   lazy val test = Seq(
     "junit" % "junit" % "4.13.2" % "it,test",
     "com.novocode" % "junit-interface" % "0.11" % "it,test",
-    "org.scalacheck" %% "scalacheck" % "1.15.4" % "it,test"
+    "org.scalacheck" %% "scalacheck" % "1.15.4" % "it,test",
   )

Review comment:
       IDEA argues with me over these trailing commas, so I'll remove. 

##########
File path: 
daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilConstructingLoader.scala
##########
@@ -86,6 +105,34 @@ class DaffodilConstructingLoader(uri: URI, errorHandler: 
org.xml.sax.ErrorHandle
     Source.fromInputStream(is, enc)
   }, true) {
 
+  /**
+   * Public constructor insists on normalizingCRLFtoLF behavior.
+   */
+  def this (uri: URI,
+    errorHandler: org.xml.sax.ErrorHandler,
+    addPositionAttributes: Boolean = false) =
+    this(uri, errorHandler, addPositionAttributes, normalizeCRLFtoLF = true)
+

Review comment:
       This is definitely called. I think this codecov warning is spurious. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to