Author: michiel
Date: 2010-04-01 14:34:12 +0200 (Thu, 01 Apr 2010)
New Revision: 41722
Added:
mmbase/trunk/utils/src/test/files/test.html
mmbase/trunk/utils/src/test/files/test.xhtml
Modified:
mmbase/trunk/utils/src/main/config/magic.xml
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/XmlDetector.java
mmbase/trunk/utils/src/test/java/org/mmbase/util/magicfile/MagicFileTest.java
Log:
Implemented matching on public id, testcases for html and xhtml
Modified: mmbase/trunk/utils/src/main/config/magic.xml
===================================================================
--- mmbase/trunk/utils/src/main/config/magic.xml 2010-04-01 12:17:18 UTC
(rev 41721)
+++ mmbase/trunk/utils/src/main/config/magic.xml 2010-04-01 12:34:12 UTC
(rev 41722)
@@ -2660,6 +2660,12 @@
</detector>
</childlist>
</detector>
+ <detector class="org.mmbase.util.magicfile.XmlDetector">
+ <mimetype>application/xml+xhtml</mimetype>
+ <extension>xhtml</extension>
+ <designation>XHTML</designation>
+ <param name="xmlns">http://www.w3.org/1999/xhtml</param>
+ </detector>
<detector>
<mimetype>text/html</mimetype>
<extension>html</extension>
@@ -2714,6 +2720,12 @@
<designation>Word XML file</designation>
<param
name="xmlns">http://schemas.microsoft.com/office/word/2003/wordml</param>
</detector>
+ <detector class="org.mmbase.util.magicfile.XmlDetector">
+ <mimetype>text/html</mimetype>
+ <extension>html</extension>
+ <designation>HTML</designation>
+ <param name="publicId">(?i)-//W3C//DTD HTML .*</param>
+ </detector>
<detector>
<mimetype>text/xml</mimetype>
<extension>xml</extension>
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
2010-04-01 12:17:18 UTC (rev 41721)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
2010-04-01 12:34:12 UTC (rev 41722)
@@ -17,7 +17,7 @@
/**
- * @version $Id: Detector.java 41036 2010-02-15 22:30:54Z michiel $
+ * @version $Id$
*/
public abstract class AbstractDetector implements Detector {
Property changes on:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
___________________________________________________________________
Name: svn:keywords
+ Id
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
2010-04-01 12:17:18 UTC (rev 41721)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
2010-04-01 12:34:12 UTC (rev 41722)
@@ -48,7 +48,7 @@
*<br />
* Not supported by magic file:<br />
* - StarOffice<br />
- * @version $Id: Detector.java 41036 2010-02-15 22:30:54Z michiel $
+ * @version $Id$
*/
public class BasicDetector extends AbstractDetector {
Property changes on:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
___________________________________________________________________
Name: svn:keywords
+ Id
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/XmlDetector.java
===================================================================
--- mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/XmlDetector.java
2010-04-01 12:17:18 UTC (rev 41721)
+++ mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/XmlDetector.java
2010-04-01 12:34:12 UTC (rev 41722)
@@ -14,29 +14,31 @@
import org.w3c.dom.Element;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
+import java.util.regex.*;
import org.mmbase.util.xml.ErrorHandler;
import org.mmbase.util.xml.*;
import org.mmbase.util.logging.*;
/**
-
- * @version $Id: Detector.java 41036 2010-02-15 22:30:54Z michiel $
+ * A detector which can match on XML namespaces, doctypes.
+ *
+ * @version $Id$
+ * @author Michiel Meeuwissen
*/
public class XmlDetector extends AbstractDetector {
private static final Logger log =
Logging.getLoggerInstance(XmlDetector.class);
-
protected String namespace = null;
- protected String doctype = null;
+ protected Pattern publicId = null;
public void setXmlns(String xmlns) {
namespace = xmlns;
}
- public void setDocType(String dt) {
- doctype = dt;
+ public void setPublicId(String dt) {
+ publicId = Pattern.compile(dt);
}
/**
@@ -47,11 +49,14 @@
XMLReader parser = XMLReaderFactory.createXMLReader();
Handler handler = new Handler();
parser.setContentHandler(handler);
+ parser.setDTDHandler(handler);
+ parser.setEntityResolver(handler);
parser.setErrorHandler(new ErrorHandler(false,
ErrorHandler.FATAL_ERROR));
InputSource source = new InputSource(new
ByteArrayInputStream(lithmus));
parser.parse(source);
return false;
} catch (Matched m) {
+ log.debug("Matched " + m.getMessage());
return true;
} catch (SAXException e) {
return false;
@@ -64,26 +69,34 @@
@Override
public void configure(Element el) {
super.configure(el);
- if (namespace == null && doctype == null) {
- throw new IllegalStateException("Not configured with either
namespace or doctype");
+ if (namespace == null && publicId == null) {
+ throw new IllegalStateException("Not configured with either
namespace or publicId");
}
- if (doctype != null) {
- throw new UnsupportedOperationException("Needs implementing");
- }
}
protected class Matched extends RuntimeException {
+ public Matched(String mes) {
+ super(mes);
+ }
}
+
protected class Handler extends DefaultHandler {
-
@Override
public void startPrefixMapping(String prefix, String uri) {
if (uri.equals(XmlDetector.this.namespace)) {
- throw new Matched();
+ throw new Matched("Namespace " + uri);
}
}
+ @Override
+ public InputSource resolveEntity(String publicId, String systemId) {
+ if (XmlDetector.this.publicId != null &&
XmlDetector.this.publicId.matcher(publicId).matches()) {
+ throw new Matched("publicId " + publicId);
+ }
+ return new InputSource(new ByteArrayInputStream(new byte[0]));
+ }
+
}
}
Property changes on:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/XmlDetector.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added: mmbase/trunk/utils/src/test/files/test.html
===================================================================
--- mmbase/trunk/utils/src/test/files/test.html (rev 0)
+++ mmbase/trunk/utils/src/test/files/test.html 2010-04-01 12:34:12 UTC (rev
41722)
@@ -0,0 +1,10 @@
+<?xml version="1.0" ?>
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<html>
+ <head>
+ <title>Test</title>
+ </head>
+ <body>
+ <p>foobar</p>
+ </body>
+</html>
Added: mmbase/trunk/utils/src/test/files/test.xhtml
===================================================================
--- mmbase/trunk/utils/src/test/files/test.xhtml
(rev 0)
+++ mmbase/trunk/utils/src/test/files/test.xhtml 2010-04-01 12:34:12 UTC
(rev 41722)
@@ -0,0 +1,8 @@
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <title>Test</title>
+ </head>
+ <body>
+ <p>foobar</p>
+ </body>
+</html>
Modified:
mmbase/trunk/utils/src/test/java/org/mmbase/util/magicfile/MagicFileTest.java
===================================================================
---
mmbase/trunk/utils/src/test/java/org/mmbase/util/magicfile/MagicFileTest.java
2010-04-01 12:17:18 UTC (rev 41721)
+++
mmbase/trunk/utils/src/test/java/org/mmbase/util/magicfile/MagicFileTest.java
2010-04-01 12:34:12 UTC (rev 41722)
@@ -33,6 +33,8 @@
new Object[] {"cx.png", "image/png"},
new Object[] {"wordxml.doc", "application/msword"},
+ new Object[] {"test.xhtml", "application/xml+xhtml"},
+ new Object[] {"test.html", "text/html"},
new Object[] {"test.xml", "text/xml"},
new Object[] {"dot.", MagicFile.FAILED
}
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs