This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 7fb201c NUTCH-2902 Jexl parsing error on statements (contributed by Max Ockner) - use JexlScript instead of JexlExpression in Generator, CrawlDb/HostDb reader, Jexl exchange and indexing filter new e837324 Merge branch 'NUTCH-2902' (contributed by Max Ockner) 7fb201c is described below commit 7fb201c03318a3137df0c353a022d8a13acc6143 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Thu Nov 18 10:55:44 2021 +0100 NUTCH-2902 Jexl parsing error on statements (contributed by Max Ockner) - use JexlScript instead of JexlExpression in Generator, CrawlDb/HostDb reader, Jexl exchange and indexing filter --- src/java/org/apache/nutch/crawl/CrawlDatum.java | 6 +++--- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 8 ++++---- src/java/org/apache/nutch/crawl/Generator.java | 14 +++++++------- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 8 ++++---- src/java/org/apache/nutch/util/JexlUtil.java | 6 +++--- .../java/org/apache/nutch/exchange/jexl/JexlExchange.java | 6 +++--- .../org/apache/nutch/indexer/jexl/JexlIndexingFilter.java | 6 +++--- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java b/src/java/org/apache/nutch/crawl/CrawlDatum.java index bf51eb0..f32cec1 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDatum.java +++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java @@ -26,7 +26,7 @@ import java.util.Map; import java.util.Map.Entry; import org.apache.commons.jexl3.JexlContext; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.jexl3.MapContext; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -558,7 +558,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { } } - public boolean evaluate(JexlExpression expr, String url) { + public boolean execute(JexlScript expr, String url) { if (expr != null && url != null) { // Create a context and add data JexlContext jcontext = new MapContext(); @@ -601,7 +601,7 @@ public class CrawlDatum implements WritableComparable<CrawlDatum>, Cloneable { } try { - if (Boolean.TRUE.equals(expr.evaluate(jcontext))) { + if (Boolean.TRUE.equals(expr.execute(jcontext))) { return true; } } catch (Exception e) { diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java index 3af63d3..2a20a56 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java +++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java @@ -74,7 +74,7 @@ import org.apache.nutch.util.NutchJob; import org.apache.nutch.util.SegmentReaderUtil; import org.apache.nutch.util.StringUtil; import org.apache.nutch.util.TimingUtil; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import com.fasterxml.jackson.core.JsonGenerationException; import com.fasterxml.jackson.core.JsonGenerator; @@ -864,7 +864,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { Matcher matcher = null; String status = null; Integer retry = null; - JexlExpression expr = null; + JexlScript expr = null; float sample; @Override @@ -913,7 +913,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { // check expr if (expr != null) { - if (!value.evaluate(expr, key.toString())) { + if (!value.execute(expr, key.toString())) { return; } } @@ -1033,7 +1033,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { System.err.println( "\t\t[-status <status>]\tfilter records by CrawlDatum status"); System.err.println( - "\t\t[-expr <expr>]\tJexl expression to evaluate for this record"); + "\t\t[-expr <expr>]\tJexl expression to execute for this record"); System.err.println( "\t\t[-sample <fraction>]\tOnly process a random sample with this ratio"); System.err diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java index 00eb18f..9fec0ec 100644 --- a/src/java/org/apache/nutch/crawl/Generator.java +++ b/src/java/org/apache/nutch/crawl/Generator.java @@ -34,7 +34,7 @@ import java.util.Random; import org.apache.hadoop.conf.Configurable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.jexl3.JexlContext; import org.apache.commons.jexl3.MapContext; import org.apache.hadoop.mapreduce.Counter; @@ -184,7 +184,7 @@ public class Generator extends NutchTool implements Tool { private float scoreThreshold = 0f; private int intervalThreshold = -1; private byte restrictStatus = -1; - private JexlExpression expr = null; + private JexlScript expr = null; @Override public void setup( @@ -255,7 +255,7 @@ public class Generator extends NutchTool implements Tool { // check expr if (expr != null) { - if (!crawlDatum.evaluate(expr, key.toString())) { + if (!crawlDatum.execute(expr, key.toString())) { context.getCounter("Generator", "EXPR_REJECTED").increment(1); return; } @@ -308,8 +308,8 @@ public class Generator extends NutchTool implements Tool { private URLNormalizers normalizers; private static boolean normalise; private SequenceFile.Reader[] hostdbReaders = null; - private JexlExpression maxCountExpr = null; - private JexlExpression fetchDelayExpr = null; + private JexlScript maxCountExpr = null; + private JexlScript fetchDelayExpr = null; public void open() { if (conf.get(GENERATOR_HOSTDB) != null) { @@ -437,7 +437,7 @@ public class Generator extends NutchTool implements Tool { } else { if (maxCountExpr != null) { long variableMaxCount = Math - .round((double) maxCountExpr.evaluate(createContext(host))); + .round((double) maxCountExpr.execute(createContext(host))); LOG.info("Generator: variable maxCount: {} for {}", variableMaxCount, hostname); maxCount = (int) variableMaxCount; @@ -445,7 +445,7 @@ public class Generator extends NutchTool implements Tool { if (fetchDelayExpr != null) { long variableFetchDelay = Math - .round((double) fetchDelayExpr.evaluate(createContext(host))); + .round((double) fetchDelayExpr.execute(createContext(host))); LOG.info("Generator: variable fetchDelay: {} ms for {}", variableFetchDelay, hostname); variableFetchDelayWritable = new LongWritable(variableFetchDelay); diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java index be9f459..f4bd742 100644 --- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java +++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java @@ -46,7 +46,7 @@ import org.apache.nutch.util.SegmentReaderUtil; import org.apache.commons.jexl3.JexlBuilder; import org.apache.commons.jexl3.JexlContext; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.jexl3.JexlEngine; import org.apache.commons.jexl3.MapContext; @@ -68,7 +68,7 @@ public class ReadHostDb extends Configured implements Tool { protected boolean dumpHomepages = false; protected boolean fieldHeader = true; protected Text emptyText = new Text(); - protected JexlExpression expr = null; + protected JexlScript expr = null; @Override public void setup(Context context) { @@ -81,7 +81,7 @@ public class ReadHostDb extends Configured implements Tool { JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create(); // Create an expression object - this.expr = jexl.createExpression(expr); + this.expr = jexl.createScript(expr); } } @@ -128,7 +128,7 @@ public class ReadHostDb extends Configured implements Tool { // Filter this record if evaluation did not pass try { - if (!Boolean.TRUE.equals(expr.evaluate(jcontext))) { + if (!Boolean.TRUE.equals(expr.execute(jcontext))) { return; } } catch (Exception e) { diff --git a/src/java/org/apache/nutch/util/JexlUtil.java b/src/java/org/apache/nutch/util/JexlUtil.java index 24f9fe6..370ba7a 100644 --- a/src/java/org/apache/nutch/util/JexlUtil.java +++ b/src/java/org/apache/nutch/util/JexlUtil.java @@ -23,7 +23,7 @@ import java.util.regex.Pattern; import org.apache.commons.jexl3.JexlBuilder; import org.apache.commons.jexl3.JexlEngine; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.lang.time.DateUtils; import org.slf4j.Logger; @@ -47,7 +47,7 @@ public class JexlUtil { * @param expr string JEXL expression * @return parsed JEXL expression or null in case of parse error */ - public static JexlExpression parseExpression(String expr) { + public static JexlScript parseExpression(String expr) { if (expr == null) return null; try { @@ -68,7 +68,7 @@ public class JexlUtil { JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create(); - return jexl.createExpression(expr); + return jexl.createScript(expr); } catch (Exception e) { LOG.error(e.getMessage()); } diff --git a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java index e889040..a555575 100644 --- a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java +++ b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java @@ -16,7 +16,7 @@ */ package org.apache.nutch.exchange.jexl; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.jexl3.JexlContext; import org.apache.commons.jexl3.MapContext; import org.apache.hadoop.conf.Configuration; @@ -32,7 +32,7 @@ public class JexlExchange implements Exchange { private Configuration conf; - private JexlExpression expression; + private JexlScript expression; /** * Initializes the internal variables. @@ -57,7 +57,7 @@ public class JexlExchange implements Exchange { jexlContext.set("doc", doc); try { - if (Boolean.TRUE.equals(expression.evaluate(jexlContext))) { + if (Boolean.TRUE.equals(expression.execute(jexlContext))) { return true; } } catch (Exception ignored) { diff --git a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java index ac387c0..e1fa792 100644 --- a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java +++ b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java @@ -20,7 +20,7 @@ import java.lang.invoke.MethodHandles; import java.util.List; import java.util.Map.Entry; -import org.apache.commons.jexl3.JexlExpression; +import org.apache.commons.jexl3.JexlScript; import org.apache.commons.jexl3.JexlContext; import org.apache.commons.jexl3.MapContext; import org.apache.hadoop.conf.Configuration; @@ -48,7 +48,7 @@ public class JexlIndexingFilter implements IndexingFilter { .getLogger(MethodHandles.lookup().lookupClass()); private Configuration conf; - private JexlExpression expr; + private JexlScript expr; @Override public NutchDocument filter(NutchDocument doc, Parse parse, Text url, @@ -92,7 +92,7 @@ public class JexlIndexingFilter implements IndexingFilter { jcontext.set("doc", context); try { - if (Boolean.TRUE.equals(expr.evaluate(jcontext))) { + if (Boolean.TRUE.equals(expr.execute(jcontext))) { return doc; } } catch (Exception e) {