Author: jukka
Date: Fri Feb 17 15:09:21 2012
New Revision: 1245600
URL: http://svn.apache.org/viewvc?rev=1245600&view=rev
Log:
TIKA-864: Metadata.formatDate causes blocking in concurrent use
Implement date calculation with GregorianCalendar instead of the thread-unsafe
SimpleDateFormat in formatDate().
Improved documentation for the time zone constants we're using.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java?rev=1245600&r1=1245599&r2=1245600&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
Fri Feb 17 15:09:21 2012
@@ -21,8 +21,10 @@ import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
+import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -45,11 +47,22 @@ public class Metadata implements Creativ
private Map<String, String[]> metadata = null;
/**
- * The ISO-8601 format string we use for Dates.
- * All dates are represented as UTC
+ * The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)}
+ * understands "UTC" in all environments, but it'll fall back to GMT
+ * in such cases, which is in practice equivalent to UTC.
+ */
+ private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+
+ /**
+ * Custom time zone used to interpret date values without a time
+ * component in a way that most likely falls within the same day
+ * regardless of in which time zone it is later interpreted. For
+ * example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z"
+ * (instead of the default "2012-02-17T00:00:00Z"), which would still
+ * map to "2012-02-17" if interpreted in say Pacific time (while the
+ * default mapping would result in "2012-02-16" for UTC-8).
*/
- private static final DateFormat iso8601Format =
- createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", "UTC");
+ private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
/**
* Some parsers will have the date as a ISO-8601 string
@@ -61,23 +74,23 @@ public class Metadata implements Creativ
*/
private static final DateFormat[] iso8601InputFormats = new DateFormat[] {
// yyyy-mm-ddThh...
- iso8601Format, // UTC/Zulu
+ createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without
timezone
// yyyy-mm-dd hh...
- createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", "UTC"), // UTC/Zulu
+ createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without
timezone
// Date without time, set to Midday UTC
- createDateFormat("yyyy-MM-dd", "GMT-12:00"), // Normal date format
- createDateFormat("yyyy:MM:dd", "GMT-12:00"), // Image (IPTC/EXIF)
format
+ createDateFormat("yyyy-MM-dd", MIDDAY), // Normal date
format
+ createDateFormat("yyyy:MM:dd", MIDDAY), // Image
(IPTC/EXIF) format
};
- private static DateFormat createDateFormat(String format, String timezone)
{
+ private static DateFormat createDateFormat(String format, TimeZone
timezone) {
SimpleDateFormat sdf =
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
- sdf.setTimeZone(TimeZone.getTimeZone(timezone));
+ sdf.setTimeZone(timezone);
}
return sdf;
}
@@ -118,8 +131,17 @@ public class Metadata implements Creativ
* @param date given date
* @return ISO 8601 date string
*/
- private static synchronized String formatDate(Date date) {
- return iso8601Format.format(date);
+ private static String formatDate(Date date) {
+ Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
+ calendar.setTime(date);
+ return String.format(
+ "%04d-%02d-%02dT%02d:%02d:%02dZ",
+ calendar.get(Calendar.YEAR),
+ calendar.get(Calendar.MONTH) + 1,
+ calendar.get(Calendar.DAY_OF_MONTH),
+ calendar.get(Calendar.HOUR_OF_DAY),
+ calendar.get(Calendar.MINUTE),
+ calendar.get(Calendar.SECOND));
}
/**