Author: nick
Date: Wed Aug 13 16:59:08 2014
New Revision: 1617765
URL: http://svn.apache.org/r1617765
Log:
For places formatting numbers in fixed formats, or case-insensitive comparing
Ascii strings, use Locale.ROOT not Locale.getDefault() to ensure predictable
behaviour, and avoid issues in locales like Turkish. TIKA-1387
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
Wed Aug 13 16:59:08 2014
@@ -46,7 +46,7 @@ import java.util.Locale;
}
public static String asAlphaNumericLower(int i) {
- return asAlphaNumeric(i).toLowerCase(Locale.getDefault());
+ return asAlphaNumeric(i).toLowerCase(Locale.ROOT);
}
/*
@@ -75,7 +75,7 @@ import java.util.Locale;
}
public static String asRomanNumeralsLower(int i) {
- return asRomanNumerals(i).toLowerCase(Locale.getDefault());
+ return asRomanNumerals(i).toLowerCase(Locale.ROOT);
}
private static int i2r(StringBuffer sbuff, int i,
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
Wed Aug 13 16:59:08 2014
@@ -84,7 +84,7 @@ class PDF2XHTML extends PDFTextStripper
/**
* format used for signature dates
*/
- private final SimpleDateFormat dateFormat = new
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.getDefault());
+ private final SimpleDateFormat dateFormat = new
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.ROOT);
/**
* Maximum recursive depth during AcroForm processing.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
Wed Aug 13 16:59:08 2014
@@ -242,7 +242,7 @@ public class PDFParser extends AbstractP
metadata.set("pdfaid:part",
Integer.toString(pdfaxmp.getPart()));
if (pdfaxmp.getConformance() != null) {
metadata.set("pdfaid:conformance",
pdfaxmp.getConformance());
- String version =
"A-"+pdfaxmp.getPart()+pdfaxmp.getConformance().toLowerCase(Locale.getDefault());
+ String version =
"A-"+pdfaxmp.getPart()+pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
metadata.set("pdfa:PDFVersion", version );
metadata.add(TikaCoreProperties.FORMAT.getName(),
MEDIA_TYPE.toString()+"; version=\""+version+"\""
);
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
Wed Aug 13 16:59:08 2014
@@ -338,9 +338,9 @@ public class PDFParserConfig implements
if (p == null){
return defaultMissing;
}
- if (p.toLowerCase(Locale.getDefault()).equals("true")) {
+ if (p.toLowerCase(Locale.ROOT).equals("true")) {
return true;
- } else if (p.toLowerCase(Locale.getDefault()).equals("false")) {
+ } else if (p.toLowerCase(Locale.ROOT).equals("false")) {
return false;
} else {
return defaultMissing;
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
Wed Aug 13 16:59:08 2014
@@ -59,7 +59,7 @@ import org.xml.sax.SAXException;
public class ExternalEmbedderTest {
protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
- new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.getDefault());
+ new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
protected static final String DEFAULT_CHARSET = "UTF-8";
private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION =
"dc:description";
private static final String TEST_TXT_PATH = "/test-documents/testTXT.txt";
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
Wed Aug 13 16:59:08 2014
@@ -75,7 +75,7 @@ public class ImageMetadataExtractorTest
public void testExifHandlerParseDate() throws MetadataException {
ExifSubIFDDirectory exif = mock(ExifSubIFDDirectory.class);
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
- GregorianCalendar calendar = new
GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+ GregorianCalendar calendar = new
GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
calendar.setTimeInMillis(0);
calendar.set(2000, 0, 1, 0, 0, 0);
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
@@ -91,7 +91,7 @@ public class ImageMetadataExtractorTest
public void testExifHandlerParseDateFallback() throws MetadataException {
ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true);
- GregorianCalendar calendar = new
GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+ GregorianCalendar calendar = new
GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
calendar.setTimeInMillis(0);
calendar.set(1999, 0, 1, 0, 0, 0);
when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)).thenReturn(
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Wed Aug 13 16:59:08 2014
@@ -567,7 +567,7 @@ public class PDFParserTest extends TikaT
Set<String> knownContentDiffs = new HashSet<String>();
for (File f : testDocs.listFiles()) {
- if (!
f.getName().toLowerCase(Locale.getDefault()).endsWith(".pdf")) {
+ if (! f.getName().toLowerCase(Locale.ROOT).endsWith(".pdf")) {
continue;
}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
Wed Aug 13 16:59:08 2014
@@ -205,7 +205,7 @@ public static void fillMetadata(AutoDete
throw new WebApplicationException(e);
} catch (EncryptedDocumentException e) {
logger.warn(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s: Encrypted document",
info.getPath()
), e);
@@ -213,7 +213,7 @@ public static void fillMetadata(AutoDete
throw new WebApplicationException(e, Response.status(422).build());
} catch (TikaException e) {
logger.warn(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s: Text extraction failed",
info.getPath()
), e);
@@ -306,7 +306,7 @@ public static void fillMetadata(AutoDete
}
catch (EncryptedDocumentException e) {
logger.warn(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s: Encrypted document",
info.getPath()
), e);
@@ -314,7 +314,7 @@ public static void fillMetadata(AutoDete
}
catch (TikaException e) {
logger.warn(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s: Text extraction failed",
info.getPath()
), e);
@@ -340,13 +340,13 @@ public static void fillMetadata(AutoDete
public static void logRequest(Log logger, UriInfo info, Metadata metadata) {
if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null)
{
logger.info(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s (autodetecting type)",
info.getPath()
));
} else {
logger.info(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s (%s)",
info.getPath(),
metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1617765&r1=1617764&r2=1617765&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
Wed Aug 13 16:59:08 2014
@@ -127,7 +127,7 @@ public class UnpackerResource {
parser.parse(is, ch, metadata, pc);
} catch (TikaException ex) {
logger.warn(String.format(
- Locale.getDefault(),
+ Locale.ROOT,
"%s: Unpacker failed",
info.getPath()
), ex);