steveloughran commented on code in PR #4940:
URL: https://github.com/apache/hadoop/pull/4940#discussion_r984671783


##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java:
##########
@@ -24,6 +24,7 @@
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   it, add a newline



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java:
##########
@@ -101,6 +101,7 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   can you add a newline



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -61,4 +67,45 @@ public static void transform(
     // and send the output to a Result object.
     transformer.transform(new StreamSource(xml), new StreamResult(out));
   }
+
+  public static DocumentBuilderFactory newSecureDocumentBuilderFactory()
+          throws ParserConfigurationException {
+    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+    dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true);

Review Comment:
   why not make these constant and share across all methods



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.SAXParser;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestXMLUtils {

Review Comment:
   1. prefer assertJ assertThat() asserts, which are much more informative. 
they save writing the assert failure messages which i would otherwise be asking 
for...
   
   2.  what about a test for security: that you cannot do entity expansion or 
references? as that is what we want from the production code



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java:
##########
@@ -31,6 +31,7 @@
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   put below L44



##########
hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java:
##########
@@ -17,36 +17,34 @@
  */
 package org.apache.hadoop.tools.rumen;
 
+import java.io.StringReader;
 import java.util.Properties;
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 
-import java.nio.charset.Charset;
-
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.ParserConfigurationException;
 
 import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.util.XMLUtils;

Review Comment:
   add a newline



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;
+import org.w3c.dom.Document;
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.SAXParser;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import java.io.StringReader;
+import java.io.StringWriter;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class TestXMLUtils {
+
+  @Test
+  public void testSecureDocumentBuilderFactory() throws Exception {
+    DocumentBuilder db = 
XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    assertNotNull(doc);
+  }
+
+  @Test
+  public void testSecureSAXParserFactory() throws Exception {
+    SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser();
+    parser.parse(new InputSource(new StringReader("<root/>")), new 
DefaultHandler());
+  }
+
+  @Test
+  public void testSecureTransformerFactory() throws Exception {
+    Transformer transformer = 
XMLUtils.newSecureTransformerFactory().newTransformer();
+    DocumentBuilder db = 
XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder();
+    Document doc = db.parse(new InputSource(new StringReader("<root/>")));
+    try (StringWriter stringWriter = new StringWriter()) {
+      transformer.transform(new DOMSource(doc), new 
StreamResult(stringWriter));
+      assertTrue(stringWriter.toString().contains("<root"));

Review Comment:
   use assertJ string asserts



##########
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java:
##########
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.junit.Test;

Review Comment:
   afraid your imports are out of sync with most of the hadoop stuff, which is 
   
   java.*
   javax
   
   non-asf
   
   asf
   
   static



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -61,4 +67,45 @@ public static void transform(
     // and send the output to a Result object.
     transformer.transform(new StreamSource(xml), new StreamResult(out));
   }
+
+  public static DocumentBuilderFactory newSecureDocumentBuilderFactory()

Review Comment:
   javadocs, including SHOULD BE way to create new xml parsers



##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java:
##########
@@ -18,11 +18,17 @@
 
 package org.apache.hadoop.util;
 
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.*;
+import javax.xml.transform.sax.SAXTransformerFactory;
 import javax.xml.transform.stream.*;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.xml.sax.SAXException;

Review Comment:
   nit, separate group from apache imports



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to