Repository: marmotta Updated Branches: refs/heads/develop b91cfb8c2 -> f3bf97b34
more hashing improvements Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/f3bf97b3 Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/f3bf97b3 Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/f3bf97b3 Branch: refs/heads/develop Commit: f3bf97b34c167ad4479c0b83535cbd75e8e4263b Parents: b91cfb8 Author: Sebastian Schaffert <[email protected]> Authored: Wed Oct 22 11:58:16 2014 +0200 Committer: Sebastian Schaffert <[email protected]> Committed: Wed Oct 22 11:58:16 2014 +0200 ---------------------------------------------------------------------- .../commons/sesame/model/LiteralCommons.java | 83 +++++++------------- 1 file changed, 29 insertions(+), 54 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/marmotta/blob/f3bf97b3/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/sesame/model/LiteralCommons.java ---------------------------------------------------------------------- diff --git a/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/sesame/model/LiteralCommons.java b/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/sesame/model/LiteralCommons.java index 3eee735..a847c6b 100644 --- a/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/sesame/model/LiteralCommons.java +++ b/commons/marmotta-commons/src/main/java/org/apache/marmotta/commons/sesame/model/LiteralCommons.java @@ -37,6 +37,9 @@ import java.util.TimeZone; * Author: Sebastian Schaffert */ public class LiteralCommons { + private static int HASH_BITS=128; + + private static DatatypeFactory dtf; static { try { @@ -54,8 +57,8 @@ public class LiteralCommons { * @param type datatype URI of the literal (optional) * @return a 64bit hash key for the literal */ - public static String createCacheKey(String content, Locale language, URI type) { - return createCacheKey(content, language, type != null ? type.stringValue() : null); + public static final String createCacheKey(String content, Locale language, URI type) { + return createCacheKey(content, language != null ? language.getLanguage() : null, type != null ? type.stringValue() : null); } /** @@ -66,18 +69,12 @@ public class LiteralCommons { * @param type datatype URI of the literal (optional) * @return a 64bit hash key for the literal */ - public static String createCacheKey(String content, Locale language, String type) { - Hasher hasher = Hashing.goodFastHash(64).newHasher(); - hasher.putString(content, Charset.defaultCharset()); - if(type != null) { - hasher.putString(type, Charset.defaultCharset()); - } - if(language != null) { - hasher.putString(language.getLanguage().toLowerCase(), Charset.defaultCharset()); - } - return hasher.hash().toString(); + public static final String createCacheKey(String content, Locale language, String type) { + return createCacheKey(content, language != null ? language.getLanguage() : null, type); } + + /** * Create a cache key for the date literal with the given date. Converts the date * to a XMLGregorianCalendar with UTC timezone and then calls the method above. @@ -86,14 +83,14 @@ public class LiteralCommons { * @param type datatype URI of the literal * @return a 64bit hash key for the literal */ - public static String createCacheKey(Date date, String type) { + public static final String createCacheKey(Date date, String type) { GregorianCalendar cal = new GregorianCalendar(TimeZone.getTimeZone("UTC")); cal.setTime(date); XMLGregorianCalendar xml_cal = dtf.newXMLGregorianCalendar(cal).normalize(); xml_cal.setTimezone(0); - return createCacheKey(xml_cal.toXMLFormat(), null, type); + return createCacheKey(xml_cal.toXMLFormat(), (String)null, type); } /** @@ -103,51 +100,29 @@ public class LiteralCommons { * @param l the literal to create the hash for * @return a 64bit hash key for the literal */ - public static String createCacheKey(Literal l) { - Hasher hasher = Hashing.goodFastHash(128).newHasher(); - hasher.putString(l.getLabel(), Charset.defaultCharset()); - if(l.getDatatype() != null) { - hasher.putString(l.getDatatype().stringValue(), Charset.defaultCharset()); - } - if(l.getLanguage() != null) { - hasher.putString(l.getLanguage().toLowerCase(), Charset.defaultCharset()); - } - return hasher.hash().toString(); + public static final String createCacheKey(Literal l) { + return createCacheKey(l.getLabel(), l.getLanguage(), l.getDatatype() != null ? l.getDatatype().stringValue() : null); } + /** - * Get an appropriate RDF type for the mime type passed as argument. - * @param mime_type - * @return + * Create a cache key for a literal with the given content, locale and type + * + * @param content string content representing the literal (can be an MD5 sum for binary types) + * @param language language of the literal (optional) + * @param type datatype URI of the literal (optional) + * @return a 64bit hash key for the literal */ - public static String getRDFType(String mime_type) { - String iw_type = "MultimediaObject"; - if (mime_type.startsWith("image")) { - iw_type = "Image"; - } else if (mime_type.startsWith("video/flash")) { - iw_type = "FlashVideo"; - } else if (mime_type.startsWith("video")) { - iw_type = "Video"; - } else if (mime_type.startsWith("application/pdf")) { - iw_type = "PDFDocument"; - } else if (mime_type.startsWith("application/msword")) { - iw_type = "MSWordDocument"; - } else if (mime_type - .startsWith("application/vnd.oasis.opendocument") - || mime_type.startsWith("application/postscript") - || mime_type.startsWith("application/vnd.ms-")) { - iw_type = "Document"; - } else if (mime_type.startsWith("audio/mpeg") - || mime_type.startsWith("audio/mp3")) { - iw_type = "MP3Audio"; - } else if (mime_type.startsWith("audio")) { - iw_type = "Audio"; - } else if (mime_type.startsWith("text/html")) { - iw_type = "HTML"; - } else if (mime_type.startsWith("text")) { - iw_type = "TEXT"; + public static final String createCacheKey(String content, String language, String type) { + Hasher hasher = Hashing.goodFastHash(HASH_BITS).newHasher(); + hasher.putString(content, Charset.defaultCharset()); + if(type != null) { + hasher.putString(type, Charset.defaultCharset()); } - return Namespaces.NS_KIWI_CORE + iw_type; + if(language != null) { + hasher.putString(language.toLowerCase(), Charset.defaultCharset()); + } + return hasher.hash().toString(); } /**
