Author: jukka
Date: Tue Nov 17 13:43:48 2009
New Revision: 881285
URL: http://svn.apache.org/viewvc?rev=881285&view=rev
Log:
TIKA-321: Optimize type detection speed
Add a simple benchmark class for testing type detection speed.
Added:
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
(with props)
Added:
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java?rev=881285&view=auto
==============================================================================
---
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
(added)
+++
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
Tue Nov 17 13:43:48 2009
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.tika.io.IOUtils;
+
+public class TypeDetectionBenchmark {
+
+ private static final Tika tika = new Tika();
+
+ public static void main(String[] args) throws Exception {
+ long start = System.currentTimeMillis();
+ if (args.length > 0) {
+ for (String arg : args) {
+ benchmark(new File(arg));
+ }
+ } else {
+ benchmark(new File(
+ "../tika-parsers/src/test/resources/test-documents"));
+ }
+ System.out.println(
+ "Total benchmark time: "
+ + (System.currentTimeMillis() - start) + "ms");
+ }
+
+ private static void benchmark(File file) throws Exception {
+ if (file.isHidden()) {
+ // ignore
+ } else if (file.isFile()) {
+ InputStream input = new FileInputStream(file);
+ try {
+ byte[] content = IOUtils.toByteArray(input);
+ String type =
+ tika.detect(new ByteArrayInputStream(content));
+ long start = System.currentTimeMillis();
+ for (int i = 0; i < 1000; i++) {
+ tika.detect(new ByteArrayInputStream(content));
+ }
+ System.out.printf(
+ "%6dns per Tika.detect(%s) = %s%n",
+ System.currentTimeMillis() - start, file, type);
+ } finally {
+ input.close();
+ }
+ } else if (file.isDirectory()) {
+ for (File child : file.listFiles()) {
+ benchmark(child);
+ }
+ }
+ }
+
+}
Propchange:
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TypeDetectionBenchmark.java
------------------------------------------------------------------------------
svn:eol-style = native