This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 34d2d20 NUTCH-2525 Metadata indexer cannot handle uppercase parse
metadata - apply patch contributed by Jurian Broertjes
new 5de2679 Merge pull request #493 from sebastian-nagel/NUTCH-2525
34d2d20 is described below
commit 34d2d20afc3ff06626723ab868548362271b0fee
Author: Sebastian Nagel <[email protected]>
AuthorDate: Mon Sep 30 17:55:14 2019 +0200
NUTCH-2525 Metadata indexer cannot handle uppercase parse metadata
- apply patch contributed by Jurian Broertjes
---
.../org/apache/nutch/indexer/metadata/MetadataIndexer.java | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git
a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
index be56377..3d4f9c5 100644
---
a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
+++
b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
@@ -42,7 +42,7 @@ import org.apache.nutch.parse.Parse;
public class MetadataIndexer implements IndexingFilter {
private Configuration conf;
private String[] dbFieldnames;
- private Map<String, String> parseFieldnames;
+ private String[] parseFieldnames;
private String[] contentFieldnames;
private String separator;
private Set<String> mvFields;
@@ -70,10 +70,10 @@ public class MetadataIndexer implements IndexingFilter {
// add the fields from parsemd
if (parseFieldnames != null) {
- for (String metatag : parseFieldnames.keySet()) {
+ for (String metatag : parseFieldnames) {
for (String value : parse.getData().getParseMeta().getValues(metatag))
{
if (value != null)
- add(doc, parseFieldnames.get(metatag), value);
+ add(doc, metatag, value);
}
}
}
@@ -111,14 +111,12 @@ public class MetadataIndexer implements IndexingFilter {
public void setConf(Configuration conf) {
this.conf = conf;
dbFieldnames = conf.getStrings(db_CONF_PROPERTY);
- parseFieldnames = new HashMap<String, String>();
- for (String metatag : conf.getStrings(parse_CONF_PROPERTY)) {
- parseFieldnames.put(metatag.toLowerCase(Locale.ROOT), metatag);
- }
+ parseFieldnames = conf.getStrings(parse_CONF_PROPERTY);
contentFieldnames = conf.getStrings(content_CONF_PROPERTY);
separator = conf.get(separator_CONF_PROPERTY, null);
- mvFields = new
HashSet(Arrays.asList(conf.getStrings(mvfields_CONF_PROPERTY, new String[0])));
+ mvFields = new HashSet<>(
+ Arrays.asList(conf.getStrings(mvfields_CONF_PROPERTY, new String[0])));
// TODO check conflict between field names e.g. could have same label
// from different sources