Author: michiel
Date: 2010-04-01 10:58:34 +0200 (Thu, 01 Apr 2010)
New Revision: 41710
Added:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/Detector.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicParser.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicXMLReader.java
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic.dtd
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic_1_0.dtd
Log:
moved some code around, made Detector an interface. Targeting MMB-1947
Added:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
(rev 0)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/AbstractDetector.java
2010-04-01 08:58:34 UTC (rev 41710)
@@ -0,0 +1,90 @@
+/*
+
+This software is OSI Certified Open Source Software.
+OSI Certified is a certification mark of the Open Source Initiative.
+
+The license (Mozilla version 1.0) can be read at the MMBase site.
+See http://www.MMBase.org/license
+
+*/
+
+package org.mmbase.util.magicfile;
+import java.util.*;
+import java.io.*;
+import org.mmbase.util.logging.*;
+
+/**
+
+ * @version $Id: Detector.java 41036 2010-02-15 22:30:54Z michiel $
+ */
+
+public abstract class AbstractDetector implements Detector {
+ private static final Logger log =
Logging.getLoggerInstance(BasicDetector.class);
+
+ /**
+ * Designation for this type in 'magic' file
+ */
+ protected String message = "Unknown";
+ /**
+ * Possible file extensions for this type
+ */
+ private final List<String> extensions = new ArrayList<String>();
+
+ private String mimetype = "application/octet-stream";
+
+ protected final List<Detector> childList= new ArrayList<Detector>();
+
+ /**
+ * Set this if parsing of magic file fails
+ */
+ protected boolean valid = true;
+
+ /**
+ * Add an embedded detector object that searches for more details after an
initial match.
+ */
+ public void addChild(Detector detector, int level) {
+ if (level == 1) {
+ childList.add(detector);
+ } else if (level > 1) {
+ if (childList.size() == 0) {
+ log.debug("Hm. level = " + level + ", but childList is empty");
+ } else {
+ (childList.get(childList.size() - 1)).addChild(detector, level
- 1);
+ }
+ }
+ }
+
+ /**
+ * Adds a possible extension. The last added one is the default (returned
by 'getExtension').
+ */
+ public void setExtension(String extension) {
+ extensions.add(0, extension);
+ }
+ public String getExtension() {
+ if (extensions.size() == 0) {
+ return "";
+ }
+ return extensions.get(0);
+ }
+ public List<String> getExtensions() {
+ return extensions;
+ }
+
+ public void setMimeType(String mimetype) {
+ this.mimetype = mimetype;
+ }
+ public String getMimeType() {
+ if (mimetype.equals("???")) {
+ return "application/octet-stream";
+ } else {
+ return mimetype;
+ }
+ }
+ public void setDesignation(String designation) {
+ this.message = designation;
+ }
+
+ public void configure(org.w3c.dom.Element el) {
+ // nothing to do.
+ }
+}
Added:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
(rev 0)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/BasicDetector.java
2010-04-01 08:58:34 UTC (rev 41710)
@@ -0,0 +1,612 @@
+/*
+
+This software is OSI Certified Open Source Software.
+OSI Certified is a certification mark of the Open Source Initiative.
+
+The license (Mozilla version 1.0) can be read at the MMBase site.
+See http://www.MMBase.org/license
+
+*/
+
+package org.mmbase.util.magicfile;
+import java.util.*;
+import java.io.*;
+import org.w3c.dom.Element;
+import org.mmbase.util.xml.DocumentReader;
+import org.mmbase.util.logging.*;
+
+/**
+ * A Detector stores one entry from the magic.xml file, and contains
+ * the functionality to determines if a certain byte[] satisfies it.
+ *
+ * Implementation made on the basis of actual magic file and its manual.<br />
+ *
+ * TODO:<br />
+ * - link the info with mimetypes<br />
+ * - add test modifiers<br />
+ * - add commandline switches for warning, error and debugging messages<br />
+ *<br />
+ * Ignored features of magic:<br />
+ * - date types<br />
+ * - indirect offsets (prefix of '&' in sublevel match or (address+bytes)
where offset = value of address plus bytes<br />
+ * - AND'ing of type<br />
+ *<br />
+ * BUGS:<br />
+ * - test string isn't read when end of line is reached in absence of a
message string<br />
+ * <br />
+ *
+ * Tested:<br />
+ * - .doc<br />
+ * - .rtf<br />
+ * - .pdf<br />
+ * - .sh<br />
+ * - .gz<br />
+ * - .bz2<br />
+ * - .html<br />
+ * - .rpm<br />
+ * - .wav<br />
+ *<br />
+ * Not supported by magic file:<br />
+ * - StarOffice<br />
+ * @version $Id: Detector.java 41036 2010-02-15 22:30:54Z michiel $
+ */
+
+public class BasicDetector extends AbstractDetector {
+ private static final Logger log =
Logging.getLoggerInstance(BasicDetector.class);
+
+ // No configuration below
+ private static final int BIG_ENDIAN = 0;
+ private static final int LITTLE_ENDIAN = 1;
+ private static final String[] label = new String[] { "big endian", "little
endian" };
+
+ private String rawinput; // Original input line
+ private int offset = -1;
+ private String type;
+ // types: byte, short, long, string, date, beshort, belong, bedate,
leshort, lelong, ledate
+ private String typeAND;
+ // Some types are defined as e.g. "belong&0x0000ff70", then
typeAND=0x0000ff70 (NOT IMPLEMENTED!)
+ private String test; // Test value
+ private char testComparator; // What the test is like,
+
+ // What are these?
+ private String xString;
+ private int xInt;
+ private char xChar;
+
+
+ private boolean hasX; // Is set when an 'x' value is matched
+
+
+ public void setOffset(String offset) {
+ this.offset = Integer.parseInt(offset);
+ }
+ public int getOffset() {
+ return offset;
+ }
+ public void setType(String type) {
+ this.type = type;
+ }
+ public String getType() {
+ return type;
+ }
+ public void setTest(String test) {
+ this.test = test;
+ }
+ public String getTest() {
+ return test;
+ }
+ public void setComparator(char comparator) {
+ this.testComparator = comparator;
+ }
+ public char getComparator() {
+ return testComparator;
+ }
+
+ /**
+ * @return Whether detector matches the prefix/lithmus of the file
+ */
+ public boolean test(byte[] lithmus) {
+ if (lithmus == null || lithmus.length == 0 || offset == -1) {
+ return false;
+ }
+ boolean hit;
+ //log.debug("TESTING "+rawinput);
+ if (type.equals("string")) {
+ hit = testString(lithmus);
+ } else if (type.equals("beshort")) {
+ hit = testShort(lithmus, BIG_ENDIAN);
+ } else if (type.equals("belong")) {
+ hit = testLong(lithmus, BIG_ENDIAN);
+ } else if (type.equals("leshort")) {
+ hit = testShort(lithmus, LITTLE_ENDIAN);
+ } else if (type.equals("lelong")) {
+ hit = testLong(lithmus, LITTLE_ENDIAN);
+ } else if (type.equals("byte")) {
+ hit = testByte(lithmus);
+ } else {
+ // Date types are not supported
+ hit = false;
+ }
+ if (hit) {
+ log.debug("Detector " + this + " hit");
+ for (Detector child : childList) {
+ if (child.test(lithmus)) {
+ String s = child.getDesignation();
+ if (s.startsWith("\\b")) {
+ s = s.substring(2);
+ }
+ this.message = this.message + " " + s;
+ }
+ }
+ }
+ return hit;
+ }
+
+ /**
+ * todo: I noticed there is also a %5.5s variation in magic...
+ */
+ public String getDesignation() {
+ if (hasX) {
+ int n = message.indexOf("%d");
+ if (n >= 0) {
+ return message.substring(0, n) + xInt + message.substring(n +
2);
+ }
+
+ n = message.indexOf("%s");
+ if (n >= 0) {
+ return message.substring(0, n) + xString + message.substring(n
+ 2);
+ }
+
+ n = message.indexOf("%c");
+ if (n >= 0) {
+ return message.substring(0, n) + xChar + message.substring(n +
2);
+ }
+ }
+ return message;
+ }
+
+ public void setInvalid() {
+ valid = false;
+ }
+
+ /**
+ * @return Whether parsing of magic line for this detector succeeded
+ */
+ public boolean valid() {
+ return valid;
+ }
+
+ /**
+ * @return Conversion of 2 byte array to integer
+ */
+ private int byteArrayToInt(byte[] ar) {
+ StringBuilder buf = new StringBuilder();
+ for (byte element : ar) {
+ buf.append(Integer.toHexString(element & 0x000000ff));
+ }
+ return Integer.decode("0x" + buf.toString()).intValue();
+ }
+
+ /**
+ * @return Conversion of 4 byte array to long
+ */
+ private long byteArrayToLong(byte[] ar) {
+ StringBuilder buf = new StringBuilder();
+ for (byte element : ar) {
+ buf.append(Integer.toHexString(element & 0x000000ff));
+ }
+ return Long.decode("0x" + buf.toString()).longValue();
+ }
+
+ /**
+ * Test whether a string matches
+ */
+ protected boolean testString(byte[] lithmus) {
+
+ if (test.length() == 0) {
+ log.warn("TEST STRING LENGTH ZERO FOR [" + rawinput + "]");
+ return false;
+ }
+
+ int maxNeeded = offset + test.length();
+
+ if (maxNeeded > lithmus.length) {
+ return false;
+ }
+
+ try {
+ xString = new String(lithmus, offset, test.length(), "US-ASCII");
+ // US-ASCII: fixate the charset, do not depend on platform default:
+ // US-ASCCII: one byte = one char, so length can be
predicted
+ } catch (java.io.UnsupportedEncodingException usee) { // could not
happen: US-ASCII is supported
+ }
+
+ log.debug("test string = '" + test + "' (" + message + ") comparing
with '" + xString + "'");
+ int n = xString.compareTo(test);
+ switch (testComparator) {
+ case '=' :
+ return n == 0;
+ case '>' :
+ hasX = true;
+ return n > 0;
+ case '<' :
+ hasX = true;
+ return n < 0;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Test whether a short matches
+ */
+ protected boolean testShort(byte[] lithmus, int endian) {
+ if (lithmus.length < offset + 1) return false;
+ log.debug("testing " + label[endian] + " short for " + rawinput);
+ int found = 0;
+ if (endian == BIG_ENDIAN) {
+ found = byteArrayToInt(new byte[] { lithmus[offset],
lithmus[offset + 1] });
+ } else if (endian == LITTLE_ENDIAN) {
+ found = byteArrayToInt(new byte[] { lithmus[offset + 1],
lithmus[offset] });
+ }
+ xInt = found;
+
+ if (test.equals("x")) {
+ hasX = true;
+ return true;
+ } else if (test.equals("")) {
+ return false;
+ } else {
+ int v = Integer.decode(test).intValue();
+ // Hm. How did that binary arithmatic go?
+ log.debug(
+ "dumb string conversion: 0x"
+ + Integer.toHexString(lithmus[offset] & 0x000000ff)
+ + Integer.toHexString(lithmus[offset + 1] & 0x000000ff));
+
+ switch (testComparator) {
+ case '=' :
+ log.debug(
+ Integer.toHexString(v)
+ + " = "
+ + Integer.toHexString(found));
+ return v == found;
+ case '>' :
+ hasX = true;
+ return found > v;
+ case '<' :
+ hasX = true;
+ return found < v;
+ default:
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Test whether a long matches
+ */
+ protected boolean testLong(byte[] lithmus, int endian) {
+ if (lithmus.length < 4) return false;
+ log.debug("testing " + label[endian] + " long for " + rawinput);
+ long found = 0;
+ try {
+ if (endian == BIG_ENDIAN) {
+ found = byteArrayToLong(
+ new byte[] {
+ lithmus[offset],
+ lithmus[offset + 1],
+ lithmus[offset + 2],
+ lithmus[offset + 3] });
+ } else if (endian == LITTLE_ENDIAN) {
+ found =
+ byteArrayToLong(
+ new byte[] {
+ lithmus[offset + 3],
+ lithmus[offset + 2],
+ lithmus[offset + 1],
+ lithmus[offset] });
+ }
+ } catch (ArrayIndexOutOfBoundsException e) {
+ if (!message.equals("")) {
+ log.error("Failed to test " + label[endian] + " long for " +
message);
+ } else {
+ log.error("Failed to test " + label[endian] + " long:");
+ }
+ log.error("Offset out of bounds: " + offset + " while max is "
/*+BUFSIZE*/ );
+ return false;
+ }
+ xInt = (int) found;
+ // If it really is a long, we wouldn't want to know about it
+
+ if (test.equals("x")) {
+ hasX = true;
+ return true;
+ } else if (test.equals("")) {
+ return false;
+ } else {
+ long v = Long.decode(test).longValue();
+
+ // Hm. How did that binary arithmatic go?
+
+ switch (testComparator) {
+ case '=' :
+ log.debug("checking " + label[endian] + " long: " +
Long.toHexString(v)
+ + " = " + Long.toHexString(found));
+ return v == found;
+ case '>' :
+ hasX = true;
+ return found > v;
+ case '<' :
+ hasX = true;
+ return found < v;
+ default:
+ return false;
+ }
+ }
+ }
+
+ /**
+ * Test whether a byte matches
+ */
+ protected boolean testByte(byte[] lithmus) {
+ log.debug("testing byte for " + rawinput);
+ if (test.equals("x")) {
+ hasX = true;
+ xInt = lithmus[offset];
+ xChar = (char) lithmus[offset];
+ xString = "" + xChar;
+ return true;
+ } else if (test.equals("")) {
+ return false;
+ } else {
+ byte b = (byte) Integer.decode(test).intValue();
+ switch (testComparator) {
+ // DOES THIS MAKE ANY SENSE AT ALL!!
+ case '=' :
+ return b == lithmus[offset];
+ case '&' :
+ // All bits in the test byte should be set in the found byte
+ //log.debug("byte test as string = '"+test+"'");
+ byte filter = (byte) (lithmus[offset] & b);
+ //log.debug("lithmus = "+lithmus[offset]+"; test = "+b+";
filter = "+filter);
+ return filter == b;
+ default :
+ return false;
+ }
+ }
+ }
+
+ /**
+ * @return Original unprocessed input line
+ * @since MMBase-1.7
+ */
+ public String getRawInput() {
+ return rawinput;
+ }
+
+ protected String xmlEntities(String s) {
+ StringBuilder res = new StringBuilder();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ switch (c) {
+ case '>' :
+ res.append(">");
+ break;
+ case '<' :
+ res.append("<");
+ break;
+ case '&' :
+ res.append("&");
+ break;
+ default :
+ // Convert all characters not in the allowed XML character set
+ int n = c;
+ /* -- below is actual xml standard definition of allowed
characters
+ if (n == 0x9 || n == 0xA || n == 0xD || (n >= 0x20 && n <=
0xD7FF) || (n >= 0xE000 && n <= 0xFFFD) ||
+ (n >= 0x10000 && n <= 0x10FFFF)) {
+ */
+ if (n == 0x9
+ || n == 0xA
+ || n == 0xD
+ || (n >= 0x20 && n < 128)) {
+ res.append(c);
+ } else {
+ // octal representation of number; pad with zeros
+ String oct = Integer.toOctalString(n);
+ res.append("\\");
+ for (int j = 3; j > oct.length(); j--) {
+ res.append("0");
+ }
+ res.append(oct);
+ }
+ }
+ }
+ return res.toString();
+ }
+
+ /**
+ * XML notatie:
+ * <detector>
+ * <mimetype>foo/bar</mimetype>
+ * <extension>bar</extension>
+ * <designation>blablabla</designation>
+ * <test offset="bla" type="bla" comparator="=">test string</test>
+ * <childlist>
+ * <detector>etc</detector>
+ * </childlist>
+ * </detector>
+ *
+ */
+ public void toXML(FileWriter f) throws IOException {
+ toXML(f, 0);
+ }
+
+ /**
+ * @param level Indicates depth of (child) element
+ */
+ public void toXML(FileWriter f, int level) throws IOException {
+ StringBuilder s = new StringBuilder();
+ String comparatorEntity;
+
+ char[] pad;
+ if (level > 0) {
+ pad = new char[level * 4];
+ for (int i = 0; i < level * 4; i++) {
+ pad[i] = ' ';
+ }
+ } else {
+ pad = new char[] { };
+ }
+ String padStr = new String(pad);
+
+ if (testComparator == '>') {
+ comparatorEntity = ">";
+ } else
+ if (testComparator == '<') {
+ comparatorEntity = "<";
+ } else if (testComparator == '&') {
+ comparatorEntity = "&";
+ } else {
+ comparatorEntity = "" + testComparator;
+ }
+ s.append(
+ padStr
+ + "<detector>\n"
+ + padStr
+ + " <mimetype>" + getMimeType() + "</mimetype>\n"
+ + padStr
+ + " <extension>" + getExtension() + "</extension>\n"
+ + padStr
+ + " <designation>"
+ + xmlEntities(message)
+ + "</designation>\n"
+ + padStr
+ + " <test offset=\""
+ + offset
+ + "\" type=\""
+ + type
+ + "\" comparator=\""
+ + comparatorEntity
+ + "\">"
+ + xmlEntities(test)
+ + "</test>\n");
+ f.write(s.toString());
+ if (childList.size() > 0) {
+ f.write(padStr + " <childlist>\n");
+ for (Detector detector : childList) {
+ if (detector instanceof BasicDetector) {
+ ((BasicDetector) detector).toXML(f, level + 1);
+ } else {
+ log.warn("" + detector);
+ }
+ }
+ f.write(padStr + " </childlist>\n");
+ }
+ f.write(padStr + "</detector>\n");
+
+ }
+
+ /**
+ * Replaces octal representations of bytes, written as \ddd to actual byte
values.
+ */
+ private String convertOctals(String s) {
+ int p = 0;
+ int stoppedAt = 0;
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
+ char c;
+ try {
+ while (p < s.length()) {
+ c = s.charAt(p);
+ if (c == '\\') {
+ if (p > s.length() - 4) {
+ // Can't be a full octal representation here, let's
cut it off
+ break;
+ } else {
+ char c0;
+ boolean failed = false;
+ for (int p0 = p + 1; p0 < p + 4; p0++) {
+ c0 = s.charAt(p0);
+ if (!(c0 >= '0' && c0 <= '7')) {
+ failed = true;
+ }
+ }
+ if (!failed) {
+ byte[] bytes = s.substring(stoppedAt,
p).getBytes("US-ASCII");
+ buf.write(bytes, 0, bytes.length);
+ buf.write(Integer.parseInt(s.substring(p + 1, p +
4), 8));
+ stoppedAt = p + 4;
+ p = p + 4;
+ } else {
+ p++;
+ }
+ }
+ } else {
+ p++;
+ }
+ }
+ byte[] bytes = s.substring(stoppedAt, p).getBytes("US-ASCII");
+ buf.write(bytes, 0, bytes.length);
+ return buf.toString("US-ASCII");
+ } catch (java.io.UnsupportedEncodingException use) { // could not
happen US-ASCII is supported
+ return "";
+ }
+ }
+
+
+ @Override
+ public void configure(Element e) {
+ Element e1;
+
+ e1 = DocumentReader.getElementByPath(e, "detector.mimetype");
+ setMimeType(DocumentReader.getElementValue(e1));
+
+ e1 = DocumentReader.getElementByPath(e, "detector.extension");
+ setExtension(DocumentReader.getElementValue(e1));
+
+ e1 = DocumentReader.getElementByPath(e, "detector.designation");
+ setDesignation(DocumentReader.getElementValue(e1));
+
+ e1 = DocumentReader.getElementByPath(e, "detector.test");
+ if (e1 != null) {
+ setTest(convertOctals(DocumentReader.getElementValue(e1)));
+ setOffset(e1.getAttribute("offset"));
+ setType(e1.getAttribute("type"));
+ String comparator = e1.getAttribute("comparator");
+ if (comparator.equals(">")) {
+ setComparator('>');
+ } else if (comparator.equals("<")) {
+ setComparator('<');
+ } else if (comparator.equals("&")) {
+ setComparator('&');
+ } else if (comparator.length() == 1) {
+ setComparator(comparator.charAt(0));
+ } else {
+ setComparator('=');
+ }
+ }
+ }
+
+ /**
+ * @return String representation of Detector object.
+ */
+ @Override
+ public String toString() {
+ if (!valid) {
+ return "parse error";
+ } else {
+ StringBuilder res = new StringBuilder("[" + offset + "] {" + type);
+ if (typeAND != null) {
+ res.append("[" + typeAND + "]");
+ }
+ res.append("} " + testComparator + "(" + test + ") " + message);
+ if (childList.size() > 0) {
+ res.append("\n");
+ for (int i = 0; i < childList.size(); i++) {
+ res.append("> ").append(childList.get(i).toString());
+ }
+ }
+ return res.toString();
+ }
+ }
+}
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/Detector.java
===================================================================
--- mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/Detector.java
2010-04-01 07:41:34 UTC (rev 41709)
+++ mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/Detector.java
2010-04-01 08:58:34 UTC (rev 41710)
@@ -11,574 +11,31 @@
package org.mmbase.util.magicfile;
import java.util.*;
import java.io.*;
+import org.w3c.dom.Element;
import org.mmbase.util.logging.*;
/**
* A Detector stores one entry from the magic.xml file, and contains
* the functionality to determines if a certain byte[] satisfies it.
*
- * Implementation made on the basis of actual magic file and its manual.<br />
- *
- * TODO:<br />
- * - link the info with mimetypes<br />
- * - add test modifiers<br />
- * - add commandline switches for warning, error and debugging messages<br />
- *<br />
- * Ignored features of magic:<br />
- * - date types<br />
- * - indirect offsets (prefix of '&' in sublevel match or (address+bytes)
where offset = value of address plus bytes<br />
- * - AND'ing of type<br />
- *<br />
- * BUGS:<br />
- * - test string isn't read when end of line is reached in absence of a
message string<br />
- * <br />
- *
- * Tested:<br />
- * - .doc<br />
- * - .rtf<br />
- * - .pdf<br />
- * - .sh<br />
- * - .gz<br />
- * - .bz2<br />
- * - .html<br />
- * - .rpm<br />
- * - .wav<br />
- *<br />
- * Not supported by magic file:<br />
- * - StarOffice<br />
* @version $Id$
*/
-public class Detector {
- private static final Logger log =
Logging.getLoggerInstance(Detector.class);
+public interface Detector {
+ void setExtension(String extension);
+ String getExtension();
+ List<String> getExtensions();
+ void setMimeType(String mimetype);
+ String getMimeType();
+ String getDesignation();
+ void addChild(Detector detector, int level);
+ boolean test(byte[] lithmus);
- // No configuration below
- private static final int BIG_ENDIAN = 0;
- private static final int LITTLE_ENDIAN = 1;
- private static final String[] label = new String[] { "big endian", "little
endian" };
+ void setInvalid();
+ boolean valid();
- private String rawinput; // Original input line
- private int offset = -1;
- private String type;
- // types: byte, short, long, string, date, beshort, belong, bedate,
leshort, lelong, ledate
- private String typeAND;
- // Some types are defined as e.g. "belong&0x0000ff70", then
typeAND=0x0000ff70 (NOT IMPLEMENTED!)
- private String test; // Test value
- private char testComparator; // What the test is like,
- private String message; // Designation for this type in 'magic' file
- private List<String> extensions; // Possible file extensions for this type
- private String mimetype; // MimeType for this type
- // What are these?
- private String xString;
- private int xInt;
- private char xChar;
+ void configure(Element el);
- private List<Detector> childList;
- private boolean valid; // Set this if parsing of magic file fails
- private boolean hasX; // Is set when an 'x' value is matched
-
- /**
- * Add an embedded detector object that searches for more details after an
initial match.
- */
- public void addChild(Detector detector, int level) {
- if (level == 1) {
- childList.add(detector);
- } else if (level > 1) {
- if (childList.size() == 0) {
- log.debug("Hm. level = " + level + ", but childList is empty");
- } else {
- (childList.get(childList.size() - 1)).addChild(detector, level
- 1);
- }
- }
- }
- /**
- * Detectors are instanciated by MagicXMLReader, and by Parser.
- */
- Detector() {
- childList = new ArrayList<Detector>();
- extensions = new ArrayList<String>();
- mimetype = "application/octet-stream";
- message = "Unknown";
- valid = true;
- }
-
- /**
- * Adds a possible extension. The last added one is the default (returned
by 'getExtension').
- */
- public void setExtension(String extension) {
- extensions.add(0, extension);
- }
- public String getExtension() {
- if (extensions.size() == 0) {
- return "";
- }
- return extensions.get(0);
- }
- public List<String> getExtensions() {
- return extensions;
- }
-
- public void setMimeType(String mimetype) {
- this.mimetype = mimetype;
- }
- public String getMimeType() {
- if (mimetype.equals("???")) {
- return "application/octet-stream";
- } else {
- return mimetype;
- }
- }
- public void setDesignation(String designation) {
- this.message = designation;
- }
- public void setOffset(String offset) {
- this.offset = Integer.parseInt(offset);
- }
- public int getOffset() {
- return offset;
- }
- public void setType(String type) {
- this.type = type;
- }
- public String getType() {
- return type;
- }
- public void setTest(String test) {
- this.test = test;
- }
- public String getTest() {
- return test;
- }
- public void setComparator(char comparator) {
- this.testComparator = comparator;
- }
- public char getComparator() {
- return testComparator;
- }
-
- /**
- * @return Whether detector matches the prefix/lithmus of the file
- */
- public boolean test(byte[] lithmus) {
- if (lithmus == null || lithmus.length == 0 || offset == -1) {
- return false;
- }
- boolean hit;
- //log.debug("TESTING "+rawinput);
- if (type.equals("string")) {
- hit = testString(lithmus);
- } else if (type.equals("beshort")) {
- hit = testShort(lithmus, BIG_ENDIAN);
- } else if (type.equals("belong")) {
- hit = testLong(lithmus, BIG_ENDIAN);
- } else if (type.equals("leshort")) {
- hit = testShort(lithmus, LITTLE_ENDIAN);
- } else if (type.equals("lelong")) {
- hit = testLong(lithmus, LITTLE_ENDIAN);
- } else if (type.equals("byte")) {
- hit = testByte(lithmus);
- } else {
- // Date types are not supported
- hit = false;
- }
- if (hit) {
- log.debug("Detector " + this + " hit");
- for (int i = 0; i < childList.size(); i++) {
- Detector child = childList.get(i);
- if (child.test(lithmus)) {
- String s = child.getDesignation();
- if (s.startsWith("\\b")) {
- s = s.substring(2);
- }
- this.message = this.message + " " + s;
- }
- }
- }
- return hit;
- }
-
- /**
- * todo: I noticed there is also a %5.5s variation in magic...
- */
- public String getDesignation() {
- if (hasX) {
- int n = message.indexOf("%d");
- if (n >= 0) {
- return message.substring(0, n) + xInt + message.substring(n +
2);
- }
-
- n = message.indexOf("%s");
- if (n >= 0) {
- return message.substring(0, n) + xString + message.substring(n
+ 2);
- }
-
- n = message.indexOf("%c");
- if (n >= 0) {
- return message.substring(0, n) + xChar + message.substring(n +
2);
- }
- }
- return message;
- }
-
- public void setInvalid() {
- valid = false;
- }
-
- /**
- * @return Whether parsing of magic line for this detector succeeded
- */
- public boolean valid() {
- return valid;
- }
-
- /**
- * @return Conversion of 2 byte array to integer
- */
- private int byteArrayToInt(byte[] ar) {
- StringBuilder buf = new StringBuilder();
- for (byte element : ar) {
- buf.append(Integer.toHexString(element & 0x000000ff));
- }
- return Integer.decode("0x" + buf.toString()).intValue();
- }
-
- /**
- * @return Conversion of 4 byte array to long
- */
- private long byteArrayToLong(byte[] ar) {
- StringBuilder buf = new StringBuilder();
- for (byte element : ar) {
- buf.append(Integer.toHexString(element & 0x000000ff));
- }
- return Long.decode("0x" + buf.toString()).longValue();
- }
-
- /**
- * Test whether a string matches
- */
- protected boolean testString(byte[] lithmus) {
-
- if (test.length() == 0) {
- log.warn("TEST STRING LENGTH ZERO FOR [" + rawinput + "]");
- return false;
- }
-
- int maxNeeded = offset + test.length();
-
- if (maxNeeded > lithmus.length) {
- return false;
- }
-
- try {
- xString = new String(lithmus, offset, test.length(), "US-ASCII");
- // US-ASCII: fixate the charset, do not depend on platform default:
- // US-ASCCII: one byte = one char, so length can be
predicted
- } catch (java.io.UnsupportedEncodingException usee) { // could not
happen: US-ASCII is supported
- }
-
- log.debug("test string = '" + test + "' (" + message + ") comparing
with '" + xString + "'");
- int n = xString.compareTo(test);
- switch (testComparator) {
- case '=' :
- return n == 0;
- case '>' :
- hasX = true;
- return n > 0;
- case '<' :
- hasX = true;
- return n < 0;
- default:
- return false;
- }
- }
-
- /**
- * Test whether a short matches
- */
- protected boolean testShort(byte[] lithmus, int endian) {
- if (lithmus.length < offset + 1) return false;
- log.debug("testing " + label[endian] + " short for " + rawinput);
- int found = 0;
- if (endian == BIG_ENDIAN) {
- found = byteArrayToInt(new byte[] { lithmus[offset],
lithmus[offset + 1] });
- } else if (endian == LITTLE_ENDIAN) {
- found = byteArrayToInt(new byte[] { lithmus[offset + 1],
lithmus[offset] });
- }
- xInt = found;
-
- if (test.equals("x")) {
- hasX = true;
- return true;
- } else if (test.equals("")) {
- return false;
- } else {
- int v = Integer.decode(test).intValue();
- // Hm. How did that binary arithmatic go?
- log.debug(
- "dumb string conversion: 0x"
- + Integer.toHexString(lithmus[offset] & 0x000000ff)
- + Integer.toHexString(lithmus[offset + 1] & 0x000000ff));
-
- switch (testComparator) {
- case '=' :
- log.debug(
- Integer.toHexString(v)
- + " = "
- + Integer.toHexString(found));
- return v == found;
- case '>' :
- hasX = true;
- return found > v;
- case '<' :
- hasX = true;
- return found < v;
- default:
- return false;
- }
- }
- }
-
- /**
- * Test whether a long matches
- */
- protected boolean testLong(byte[] lithmus, int endian) {
- if (lithmus.length < 4) return false;
- log.debug("testing " + label[endian] + " long for " + rawinput);
- long found = 0;
- try {
- if (endian == BIG_ENDIAN) {
- found = byteArrayToLong(
- new byte[] {
- lithmus[offset],
- lithmus[offset + 1],
- lithmus[offset + 2],
- lithmus[offset + 3] });
- } else if (endian == LITTLE_ENDIAN) {
- found =
- byteArrayToLong(
- new byte[] {
- lithmus[offset + 3],
- lithmus[offset + 2],
- lithmus[offset + 1],
- lithmus[offset] });
- }
- } catch (ArrayIndexOutOfBoundsException e) {
- if (!message.equals("")) {
- log.error("Failed to test " + label[endian] + " long for " +
message);
- } else {
- log.error("Failed to test " + label[endian] + " long:");
- }
- log.error("Offset out of bounds: " + offset + " while max is "
/*+BUFSIZE*/ );
- return false;
- }
- xInt = (int) found;
- // If it really is a long, we wouldn't want to know about it
-
- if (test.equals("x")) {
- hasX = true;
- return true;
- } else if (test.equals("")) {
- return false;
- } else {
- long v = Long.decode(test).longValue();
-
- // Hm. How did that binary arithmatic go?
-
- switch (testComparator) {
- case '=' :
- log.debug("checking " + label[endian] + " long: " +
Long.toHexString(v)
- + " = " + Long.toHexString(found));
- return v == found;
- case '>' :
- hasX = true;
- return found > v;
- case '<' :
- hasX = true;
- return found < v;
- default:
- return false;
- }
- }
- }
-
- /**
- * Test whether a byte matches
- */
- protected boolean testByte(byte[] lithmus) {
- log.debug("testing byte for " + rawinput);
- if (test.equals("x")) {
- hasX = true;
- xInt = lithmus[offset];
- xChar = (char) lithmus[offset];
- xString = "" + xChar;
- return true;
- } else if (test.equals("")) {
- return false;
- } else {
- byte b = (byte) Integer.decode(test).intValue();
- switch (testComparator) {
- // DOES THIS MAKE ANY SENSE AT ALL!!
- case '=' :
- return b == lithmus[offset];
- case '&' :
- // All bits in the test byte should be set in the found byte
- //log.debug("byte test as string = '"+test+"'");
- byte filter = (byte) (lithmus[offset] & b);
- //log.debug("lithmus = "+lithmus[offset]+"; test = "+b+";
filter = "+filter);
- return filter == b;
- default :
- return false;
- }
- }
- }
-
- /**
- * @return Original unprocessed input line
- * @since MMBase-1.7
- */
- public String getRawInput() {
- return rawinput;
- }
-
- protected String xmlEntities(String s) {
- StringBuilder res = new StringBuilder();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- switch (c) {
- case '>' :
- res.append(">");
- break;
- case '<' :
- res.append("<");
- break;
- case '&' :
- res.append("&");
- break;
- default :
- // Convert all characters not in the allowed XML character set
- int n = c;
- /* -- below is actual xml standard definition of allowed
characters
- if (n == 0x9 || n == 0xA || n == 0xD || (n >= 0x20 && n <=
0xD7FF) || (n >= 0xE000 && n <= 0xFFFD) ||
- (n >= 0x10000 && n <= 0x10FFFF)) {
- */
- if (n == 0x9
- || n == 0xA
- || n == 0xD
- || (n >= 0x20 && n < 128)) {
- res.append(c);
- } else {
- // octal representation of number; pad with zeros
- String oct = Integer.toOctalString(n);
- res.append("\\");
- for (int j = 3; j > oct.length(); j--) {
- res.append("0");
- }
- res.append(oct);
- }
- }
- }
- return res.toString();
- }
-
- /**
- * XML notatie:
- * <detector>
- * <mimetype>foo/bar</mimetype>
- * <extension>bar</extension>
- * <designation>blablabla</designation>
- * <test offset="bla" type="bla" comparator="=">test string</test>
- * <childlist>
- * <detector>etc</detector>
- * </childlist>
- * </detector>
- *
- */
- public void toXML(FileWriter f) throws IOException {
- toXML(f, 0);
- }
-
- /**
- * @param level Indicates depth of (child) element
- */
- public void toXML(FileWriter f, int level) throws IOException {
- StringBuilder s = new StringBuilder();
- String comparatorEntity;
-
- char[] pad;
- if (level > 0) {
- pad = new char[level * 4];
- for (int i = 0; i < level * 4; i++) {
- pad[i] = ' ';
- }
- } else {
- pad = new char[] { };
- }
- String padStr = new String(pad);
-
- if (testComparator == '>') {
- comparatorEntity = ">";
- } else
- if (testComparator == '<') {
- comparatorEntity = "<";
- } else if (testComparator == '&') {
- comparatorEntity = "&";
- } else {
- comparatorEntity = "" + testComparator;
- }
- s.append(
- padStr
- + "<detector>\n"
- + padStr
- + " <mimetype>" + getMimeType() + "</mimetype>\n"
- + padStr
- + " <extension>" + getExtension() + "</extension>\n"
- + padStr
- + " <designation>"
- + xmlEntities(message)
- + "</designation>\n"
- + padStr
- + " <test offset=\""
- + offset
- + "\" type=\""
- + type
- + "\" comparator=\""
- + comparatorEntity
- + "\">"
- + xmlEntities(test)
- + "</test>\n");
- f.write(s.toString());
- if (childList.size() > 0) {
- f.write(padStr + " <childlist>\n");
- for (Detector detector : childList) {
- detector.toXML(f, level + 1);
- }
- f.write(padStr + " </childlist>\n");
- }
- f.write(padStr + "</detector>\n");
-
- }
-
- /**
- * @return String representation of Detector object.
- */
- public String toString() {
- if (!valid) {
- return "parse error";
- } else {
- StringBuilder res = new StringBuilder("[" + offset + "] {" + type);
- if (typeAND != null) {
- res.append("[" + typeAND + "]");
- }
- res.append("} " + testComparator + "(" + test + ") " + message);
- if (childList.size() > 0) {
- res.append("\n");
- for (int i = 0; i < childList.size(); i++) {
- res.append("> ").append(childList.get(i).toString());
- }
- }
- return res.toString();
- }
- }
}
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicParser.java
===================================================================
--- mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicParser.java
2010-04-01 07:41:34 UTC (rev 41709)
+++ mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicParser.java
2010-04-01 08:58:34 UTC (rev 41710)
@@ -19,6 +19,9 @@
/**
* This Parser translates the configuration file of UNIX's file to a
* list of Detectors (and to a magic.xml)
+
+ * @TODO, this is unused, and probably cannot be used while magic.xml
+ * is maintained manually.
*
* @version $Id$
*/
@@ -434,7 +437,7 @@
}
private Detector createDetector(String line) {
- Detector detector = new Detector();
+ BasicDetector detector = new BasicDetector();
// rawinput = line;
// hasX = false;
@@ -444,27 +447,20 @@
// parse line
log.debug("parse: " + line);
- int n;
- String level = "start";
try {
- level = "parseOffsetString";
- n = parseOffsetString(line, 0);
- level = "parseTypeString";
+ int n = parseOffsetString(line, 0);
n = parseTypeString(line, n);
- level = "parseTestString";
n = parseTestString(line, n);
// If there are multiple test level, an upper one doesn't have to
have a message string
if (n > 0) {
- level = "parseMessageString";
parseMessageString(line, n);
} else {
message = "";
}
- level = "end";
} catch (UnsupportedOperationException e) {
log.warn(e.getMessage());
} catch (Exception e) {
- log.error("parse failure at " + level + ": " + e.getMessage() + "
for [" + line + "]");
+ log.error("parse failure: " + e.getMessage() + " for [" + line +
"]", e);
}
detector.setType(type);
detector.setOffset("" + offset);
@@ -487,12 +483,18 @@
* @throws IOException Throws an exception when parsing failed
*/
public boolean toXML(File f) throws IOException {
+
+
+ // TODO should something like StAX or so.
+
FileWriter writer = new FileWriter(f);
writer.write(
"<!DOCTYPE magic PUBLIC \"-//MMBase//DTD magic config 1.0//EN\"
\"http://www.mmbase.org/dtd/magic_1_0.dtd\">\n<magic>\n<info>\n<version>0.1</version>\n<author>[email protected]</author>\n<description>Conversion
of the UNIX 'magic' file with added mime types and
extensions.</description>\n</info>\n<detectorlist>\n");
for (Detector detector : getDetectors()) {
- detector.toXML(writer);
+ if (detector instanceof BasicDetector) {
+ ((BasicDetector) detector).toXML(writer); // see TODO
+ }
}
writer.write("</detectorlist>\n</magic>\n");
writer.close();
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicXMLReader.java
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicXMLReader.java
2010-04-01 07:41:34 UTC (rev 41709)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/MagicXMLReader.java
2010-04-01 08:58:34 UTC (rev 41710)
@@ -6,6 +6,7 @@
import org.mmbase.util.*;
import org.mmbase.util.logging.*;
import org.mmbase.util.xml.DocumentReader;
+import org.mmbase.util.xml.Instantiator;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import java.util.concurrent.CopyOnWriteArrayList;
@@ -80,7 +81,7 @@
/**
* Returns all 'Detectors'.
*/
- public List<Detector> getDetectors() {
+ public List<Detector> getDetectors() {
if (detectors == null) {
detectors = new CopyOnWriteArrayList<Detector>();
Element e = getElementByPath("magic.detectorlist");
@@ -90,93 +91,28 @@
return detectors;
}
for (Element element : getChildElements(e)) {
- Detector d = getOneDetector(element);
- detectors.add(d);
+ try {
+ Detector d = getOneDetector(element);
+ detectors.add(d);
+ } catch (Exception ex) {
+ log.error(ex.getMessage() + ": " + element, ex);
+ }
}
}
+
return detectors;
}
- /**
- * Replaces octal representations of bytes, written as \ddd to actual byte
values.
- */
- private String convertOctals(String s) {
- int p = 0;
- int stoppedAt = 0;
- ByteArrayOutputStream buf = new ByteArrayOutputStream();
- char c;
- try {
- while (p < s.length()) {
- c = s.charAt(p);
- if (c == '\\') {
- if (p > s.length() - 4) {
- // Can't be a full octal representation here, let's
cut it off
- break;
- } else {
- char c0;
- boolean failed = false;
- for (int p0 = p + 1; p0 < p + 4; p0++) {
- c0 = s.charAt(p0);
- if (!(c0 >= '0' && c0 <= '7')) {
- failed = true;
- }
- }
- if (!failed) {
- byte[] bytes = s.substring(stoppedAt,
p).getBytes("US-ASCII");
- buf.write(bytes, 0, bytes.length);
- buf.write(Integer.parseInt(s.substring(p + 1, p +
4), 8));
- stoppedAt = p + 4;
- p = p + 4;
- } else {
- p++;
- }
- }
- } else {
- p++;
- }
- }
- byte[] bytes = s.substring(stoppedAt, p).getBytes("US-ASCII");
- buf.write(bytes, 0, bytes.length);
- return buf.toString("US-ASCII");
- } catch (java.io.UnsupportedEncodingException use) { // could not
happen US-ASCII is supported
- return "";
- }
- }
+ private Detector getOneDetector(Element e) throws
+ org.xml.sax.SAXException, ClassNotFoundException,
NoSuchMethodException, InstantiationException, IllegalAccessException,
+ java.lang.reflect.InvocationTargetException {
+ Detector d = (Detector) Instantiator.getInstance(e);
- private Detector getOneDetector(Element e) {
- Detector d = new Detector();
- Element e1;
- e1 = getElementByPath(e, "detector.mimetype");
- d.setMimeType(getElementValue(e1));
+ d.configure(e);
- e1 = getElementByPath(e, "detector.extension");
- d.setExtension(getElementValue(e1));
-
- e1 = getElementByPath(e, "detector.designation");
- d.setDesignation(getElementValue(e1));
-
- e1 = getElementByPath(e, "detector.test");
+ Element e1 = getElementByPath(e, "detector.childlist");
if (e1 != null) {
- d.setTest(convertOctals(getElementValue(e1)));
- d.setOffset(getElementAttributeValue(e1, "offset"));
- d.setType(getElementAttributeValue(e1, "type"));
- String comparator = getElementAttributeValue(e1, "comparator");
- if (comparator.equals(">")) {
- d.setComparator('>');
- } else if (comparator.equals("<")) {
- d.setComparator('<');
- } else if (comparator.equals("&")) {
- d.setComparator('&');
- } else if (comparator.length() == 1) {
- d.setComparator(comparator.charAt(0));
- } else {
- d.setComparator('=');
- }
- }
-
- e1 = getElementByPath(e, "detector.childlist");
- if (e1 != null) {
for (Element element: getChildElements(e1)) {
Detector child = getOneDetector(element);
d.addChild(child, 1); // Not sure if this is the right thing
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic.dtd
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic.dtd
2010-04-01 07:41:34 UTC (rev 41709)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic.dtd
2010-04-01 08:58:34 UTC (rev 41710)
@@ -4,7 +4,8 @@
<!ELEMENT author (#PCDATA)>
<!ELEMENT description (#PCDATA)>
<!ELEMENT detectorlist (detector*)>
-<!ELEMENT detector (mimetype,extension,designation,test,childlist?)>
+<!ELEMENT detector (mimetype,extension,designation,test,param?,childlist?)>
+<!ATTLIST detector class CDATA "org.mmbase.util.magicfile.BasicDetector">
<!ELEMENT extension (#PCDATA)>
<!ELEMENT mimetype (#PCDATA)>
<!ELEMENT designation (#PCDATA)>
@@ -13,5 +14,8 @@
<!ATTLIST test type CDATA "string">
<!ATTLIST test comparator CDATA "=">
<!ELEMENT childlist (detector+)>
-
-
+
+<!ELEMENT param (#PCDATA)>
+<!ATTLIST param name CDATA "=">
+
+
Modified:
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic_1_0.dtd
===================================================================
---
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic_1_0.dtd
2010-04-01 07:41:34 UTC (rev 41709)
+++
mmbase/trunk/utils/src/main/java/org/mmbase/util/magicfile/resources/magic_1_0.dtd
2010-04-01 08:58:34 UTC (rev 41710)
@@ -4,7 +4,8 @@
<!ELEMENT author (#PCDATA)>
<!ELEMENT description (#PCDATA)>
<!ELEMENT detectorlist (detector*)>
-<!ELEMENT detector (mimetype,extension,designation,test?,childlist?)>
+<!ELEMENT detector (mimetype,extension,designation,test,param?,childlist?)>
+<!ATTLIST detector class CDATA "org.mmbase.util.magicfile.BasicDetector">
<!ELEMENT extension (#PCDATA)>
<!ELEMENT mimetype (#PCDATA)>
<!ELEMENT designation (#PCDATA)>
@@ -13,5 +14,8 @@
<!ATTLIST test type CDATA "string">
<!ATTLIST test comparator CDATA "=">
<!ELEMENT childlist (detector+)>
-
-
+
+<!ELEMENT param (#PCDATA)>
+<!ATTLIST param name CDATA "=">
+
+
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs