Author: kiwiwings Date: Wed Jan 6 12:39:02 2021 New Revision: 1885197 URL: http://svn.apache.org/viewvc?rev=1885197&view=rev Log: #65061 - Handle VmlDrawings containing spreadsheet-ml default namespace
Modified: poi/site/src/documentation/content/xdocs/changes.xml poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java Modified: poi/site/src/documentation/content/xdocs/changes.xml URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/changes.xml?rev=1885197&r1=1885196&r2=1885197&view=diff ============================================================================== --- poi/site/src/documentation/content/xdocs/changes.xml (original) +++ poi/site/src/documentation/content/xdocs/changes.xml Wed Jan 6 12:39:02 2021 @@ -127,6 +127,7 @@ <action type="fix" fixes-bug="64876" context="XSLF">Unable to convert pptx to pdf</action> <action type="fix" fixes-bug="65026" context="POI_Overall">Migrate tests to Junit 5</action> <action type="add" fixes-bug="github-207" context="POI_Overall">Use SLF4J instead of commons-logging - use jcl-over-slf4j</action> + <action type="fix" fixes-bug="65061" context="XSSF">Handle VmlDrawings containing spreadsheet-ml default namespace</action> </actions> </release> Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java?rev=1885197&r1=1885196&r2=1885197&view=diff ============================================================================== --- poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java (original) +++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java Wed Jan 6 12:39:02 2021 @@ -18,6 +18,7 @@ package org.apache.poi.xssf.usermodel; import static org.apache.poi.ooxml.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; +import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; import java.io.IOException; import java.io.InputStream; @@ -46,7 +47,6 @@ import com.microsoft.schemas.vml.CTShape import com.microsoft.schemas.vml.STExt; import com.microsoft.schemas.vml.STStrokeJoinStyle; import org.apache.poi.ooxml.POIXMLDocumentPart; -import org.apache.poi.ooxml.util.DocumentHelper; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.schemas.vmldrawing.XmlDocument; import org.apache.poi.util.ReplacingInputStream; @@ -55,8 +55,6 @@ import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; import org.apache.xmlbeans.XmlOptions; import org.openxmlformats.schemas.officeDocument.x2006.sharedTypes.STTrueFalse; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; /** * Represents a SpreadsheetML VML drawing. @@ -129,23 +127,26 @@ public final class XSSFVMLDrawing extend protected void read(InputStream is) throws IOException, XmlException { - Document doc; - try { - /* - * This is a seriously sick fix for the fact that some .xlsx files contain raw bits - * of HTML, without being escaped or properly turned into XML. - * The result is that they contain things like >br<, which breaks the XML parsing. - * This very sick InputStream wrapper attempts to spot these go past, and fix them. - */ - doc = DocumentHelper.readDocument(new ReplacingInputStream(is, "<br>", "<br/>")); - } catch (SAXException e) { - throw new XmlException(e.getMessage(), e); - } - XmlOptions xopt = new XmlOptions(DEFAULT_XML_OPTIONS); xopt.setLoadSubstituteNamespaces(Collections.singletonMap("", QNAME_VMLDRAWING.getNamespaceURI())); + xopt.setDocumentType(XmlDocument.type); + + /* + * This is a seriously sick fix for the fact that some .xlsx files contain raw bits + * of HTML, without being escaped or properly turned into XML. + * The result is that they contain things like >br<, which breaks the XML parsing. + * This very sick InputStream wrapper attempts to spot these go past, and fix them. + * + * Furthermore some documents contain a default namespace of + * http://schemas.openxmlformats.org/spreadsheetml/2006/main for the namespace-less "xml" document type. + * this definition is wrong and removed. + */ + root = XmlDocument.Factory.parse( + new ReplacingInputStream( + new ReplacingInputStream(is, "<br>", "<br/>"), + " xmlns=\""+NS_SPREADSHEETML+"\"", "") + , xopt); - root = XmlDocument.Factory.parse(doc, xopt); XmlCursor cur = root.getXml().newCursor(); try { Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java?rev=1885197&r1=1885196&r2=1885197&view=diff ============================================================================== --- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java (original) +++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFVMLDrawing.java Wed Jan 6 12:39:02 2021 @@ -29,6 +29,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; @@ -42,6 +43,8 @@ import com.microsoft.schemas.vml.CTShado import com.microsoft.schemas.vml.CTShape; import com.microsoft.schemas.vml.CTShapetype; import com.microsoft.schemas.vml.STExt; +import com.microsoft.schemas.vml.STStrokeJoinStyle; +import com.microsoft.schemas.vml.impl.CTShapetypeImpl; import org.apache.poi.POIDataSamples; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlObject; @@ -181,4 +184,30 @@ public class TestXSSFVMLDrawing { } assertEquals(16, count); } + + @Test + public void bug65061_InvalidXmlns() throws IOException, XmlException { + // input hasn't no <?xml... declaration - as in the sample file + String input = + "<xml xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:x=\"urn:schemas-microsoft-com:office:excel\">\n" + + "<v:shapetype id=\"_x0000_t202\" coordsize=\"21600,21600\" path=\"m,l,21600r21600,l21600,xe\" o:spt=\"202\">\n" + + "<v:stroke joinstyle=\"miter\"/>\n" + + "<v:path o:connecttype=\"rect\" gradientshapeok=\"t\"/>\n" + + "</v:shapetype>\n" + + "</xml>"; + + XSSFVMLDrawing vml = new XSSFVMLDrawing(); + vml.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); + + // check that the xml beans parsed correctly + assertNotNull(vml.getDocument().getXml()); + + // check the parsed child + List<XmlObject> objs = vml.getItems(); + assertEquals(1, objs.size()); + XmlObject xst = objs.get(0); + assertTrue(xst instanceof CTShapetypeImpl); + CTShapetype st = (CTShapetype)xst; + assertEquals(STStrokeJoinStyle.MITER, st.getStrokeArray(0).getJoinstyle()); + } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@poi.apache.org For additional commands, e-mail: commits-h...@poi.apache.org