Feedback incorporated - longer keynote + confidence output to metadata
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/f3e9d828 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/f3e9d828 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/f3e9d828 Branch: refs/heads/master Commit: f3e9d82865f141ef990d3f2ddc93af9be52c829f Parents: 0305cfb Author: Thamme Gowda <[email protected]> Authored: Sun Jun 12 13:11:23 2016 -0700 Committer: Thamme Gowda <[email protected]> Committed: Sun Jun 12 13:11:23 2016 -0700 ---------------------------------------------------------------------- .../tika/parser/recognition/ObjectRecognitionParser.java | 8 ++++++-- .../parser/recognition/ObjectRecognitionParserTest.java | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/f3e9d828/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java index 648693e..4d5c14b 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/recognition/ObjectRecognitionParser.java @@ -67,6 +67,8 @@ public class ObjectRecognitionParser extends AbstractParser { public static final Logger LOG = LoggerFactory.getLogger(ObjectRecognitionParser.class); public static final String MD_KEY = "OBJECT"; + public static final String MD_REC_IMPL_KEY = + ObjectRecognitionParser.class.getPackage().getName() + ".object.rec.impl"; private static final Comparator<RecognisedObject> DESC_CONFIDENCE_SORTER = new Comparator<RecognisedObject>() { @Override @@ -108,7 +110,7 @@ public class ObjectRecognitionParser extends AbstractParser { LOG.warn("{} is not available for service", recogniser.getClass()); return; } - metadata.set("object.rec.impl", recogniser.getClass().getSimpleName()); + metadata.set(MD_REC_IMPL_KEY, recogniser.getClass().getName()); long start = System.currentTimeMillis(); List<RecognisedObject> objects = recogniser.recognise(stream, handler, metadata, context); LOG.debug("Found {} objects", objects != null ? objects.size() : 0); @@ -123,7 +125,9 @@ public class ObjectRecognitionParser extends AbstractParser { if (object.getConfidence() >= minConfidence) { LOG.debug("Add {}", object); count++; - metadata.add(MD_KEY, object.getLabel()); + String mdValue = String.format(Locale.ENGLISH, "%s (%.5f)", + object.getLabel(), object.getConfidence()); + metadata.add(MD_KEY, mdValue); //writing to handler xhtml.startElement("li", "id", object.getId()); String text = String.format(Locale.ENGLISH, " %s [%s](confidence = %f )", http://git-wip-us.apache.org/repos/asf/tika/blob/f3e9d828/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java index fc96b1d..d94eea6 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/recognition/ObjectRecognitionParserTest.java @@ -43,7 +43,8 @@ public class ObjectRecognitionParserTest { private static final String CAT_IMAGE = "test-documents/testJPEG.jpg"; private static final ClassLoader loader = ObjectRecognitionParserTest.class.getClassLoader(); - @Ignore("If tensorflow not available Ignore") @Test + @Ignore("If tensorflow not available Ignore") + @Test public void jpegTesorflowTest() throws IOException, TikaException, SAXException { try(InputStream stream = loader.getResourceAsStream(CONFIG_FILE)){ @@ -55,13 +56,13 @@ public class ObjectRecognitionParserTest { List<String> lines = IOUtils.readLines(reader); String text = StringUtils.join(lines, " "); String[] expectedObjects = {"Egyptian cat", "Border collie"}; - HashSet<String> objects = new HashSet<>(); - objects.addAll(Arrays.asList(metadata.getValues(ObjectRecognitionParser.MD_KEY))); + String metaValues = StringUtils.join(metadata.getValues(ObjectRecognitionParser.MD_KEY), " "); for (String expectedObject : expectedObjects) { String message = "'" + expectedObject + "' must have been detected"; Assert.assertTrue(message, text.contains(expectedObject)); - Assert.assertTrue(message, objects.contains(expectedObject)); + Assert.assertTrue(message, metaValues.contains(expectedObject)); } + System.out.println(metadata); } } }
