This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 7fb201c NUTCH-2902 Jexl parsing error on statements (contributed by
Max Ockner) - use JexlScript instead of JexlExpression in Generator,
CrawlDb/HostDb reader, Jexl exchange and indexing filter
new e837324 Merge branch 'NUTCH-2902' (contributed by Max Ockner)
7fb201c is described below
commit 7fb201c03318a3137df0c353a022d8a13acc6143
Author: Sebastian Nagel <[email protected]>
AuthorDate: Thu Nov 18 10:55:44 2021 +0100
NUTCH-2902 Jexl parsing error on statements
(contributed by Max Ockner)
- use JexlScript instead of JexlExpression
in Generator, CrawlDb/HostDb reader,
Jexl exchange and indexing filter
---
src/java/org/apache/nutch/crawl/CrawlDatum.java | 6 +++---
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 8 ++++----
src/java/org/apache/nutch/crawl/Generator.java | 14 +++++++-------
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 8 ++++----
src/java/org/apache/nutch/util/JexlUtil.java | 6 +++---
.../java/org/apache/nutch/exchange/jexl/JexlExchange.java | 6 +++---
.../org/apache/nutch/indexer/jexl/JexlIndexingFilter.java | 6 +++---
7 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java
b/src/java/org/apache/nutch/crawl/CrawlDatum.java
index bf51eb0..f32cec1 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDatum.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java
@@ -26,7 +26,7 @@ import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.jexl3.JexlContext;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
@@ -558,7 +558,7 @@ public class CrawlDatum implements
WritableComparable<CrawlDatum>, Cloneable {
}
}
- public boolean evaluate(JexlExpression expr, String url) {
+ public boolean execute(JexlScript expr, String url) {
if (expr != null && url != null) {
// Create a context and add data
JexlContext jcontext = new MapContext();
@@ -601,7 +601,7 @@ public class CrawlDatum implements
WritableComparable<CrawlDatum>, Cloneable {
}
try {
- if (Boolean.TRUE.equals(expr.evaluate(jcontext))) {
+ if (Boolean.TRUE.equals(expr.execute(jcontext))) {
return true;
}
} catch (Exception e) {
diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
index 3af63d3..2a20a56 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java
@@ -74,7 +74,7 @@ import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.SegmentReaderUtil;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.TimingUtil;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
@@ -864,7 +864,7 @@ public class CrawlDbReader extends AbstractChecker
implements Closeable {
Matcher matcher = null;
String status = null;
Integer retry = null;
- JexlExpression expr = null;
+ JexlScript expr = null;
float sample;
@Override
@@ -913,7 +913,7 @@ public class CrawlDbReader extends AbstractChecker
implements Closeable {
// check expr
if (expr != null) {
- if (!value.evaluate(expr, key.toString())) {
+ if (!value.execute(expr, key.toString())) {
return;
}
}
@@ -1033,7 +1033,7 @@ public class CrawlDbReader extends AbstractChecker
implements Closeable {
System.err.println(
"\t\t[-status <status>]\tfilter records by CrawlDatum status");
System.err.println(
- "\t\t[-expr <expr>]\tJexl expression to evaluate for this record");
+ "\t\t[-expr <expr>]\tJexl expression to execute for this record");
System.err.println(
"\t\t[-sample <fraction>]\tOnly process a random sample with this
ratio");
System.err
diff --git a/src/java/org/apache/nutch/crawl/Generator.java
b/src/java/org/apache/nutch/crawl/Generator.java
index 00eb18f..9fec0ec 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -34,7 +34,7 @@ import java.util.Random;
import org.apache.hadoop.conf.Configurable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.jexl3.JexlContext;
import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.mapreduce.Counter;
@@ -184,7 +184,7 @@ public class Generator extends NutchTool implements Tool {
private float scoreThreshold = 0f;
private int intervalThreshold = -1;
private byte restrictStatus = -1;
- private JexlExpression expr = null;
+ private JexlScript expr = null;
@Override
public void setup(
@@ -255,7 +255,7 @@ public class Generator extends NutchTool implements Tool {
// check expr
if (expr != null) {
- if (!crawlDatum.evaluate(expr, key.toString())) {
+ if (!crawlDatum.execute(expr, key.toString())) {
context.getCounter("Generator", "EXPR_REJECTED").increment(1);
return;
}
@@ -308,8 +308,8 @@ public class Generator extends NutchTool implements Tool {
private URLNormalizers normalizers;
private static boolean normalise;
private SequenceFile.Reader[] hostdbReaders = null;
- private JexlExpression maxCountExpr = null;
- private JexlExpression fetchDelayExpr = null;
+ private JexlScript maxCountExpr = null;
+ private JexlScript fetchDelayExpr = null;
public void open() {
if (conf.get(GENERATOR_HOSTDB) != null) {
@@ -437,7 +437,7 @@ public class Generator extends NutchTool implements Tool {
} else {
if (maxCountExpr != null) {
long variableMaxCount = Math
- .round((double) maxCountExpr.evaluate(createContext(host)));
+ .round((double) maxCountExpr.execute(createContext(host)));
LOG.info("Generator: variable maxCount: {} for {}",
variableMaxCount, hostname);
maxCount = (int) variableMaxCount;
@@ -445,7 +445,7 @@ public class Generator extends NutchTool implements Tool {
if (fetchDelayExpr != null) {
long variableFetchDelay = Math
- .round((double)
fetchDelayExpr.evaluate(createContext(host)));
+ .round((double) fetchDelayExpr.execute(createContext(host)));
LOG.info("Generator: variable fetchDelay: {} ms for {}",
variableFetchDelay, hostname);
variableFetchDelayWritable = new
LongWritable(variableFetchDelay);
diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index be9f459..f4bd742 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -46,7 +46,7 @@ import org.apache.nutch.util.SegmentReaderUtil;
import org.apache.commons.jexl3.JexlBuilder;
import org.apache.commons.jexl3.JexlContext;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.jexl3.JexlEngine;
import org.apache.commons.jexl3.MapContext;
@@ -68,7 +68,7 @@ public class ReadHostDb extends Configured implements Tool {
protected boolean dumpHomepages = false;
protected boolean fieldHeader = true;
protected Text emptyText = new Text();
- protected JexlExpression expr = null;
+ protected JexlScript expr = null;
@Override
public void setup(Context context) {
@@ -81,7 +81,7 @@ public class ReadHostDb extends Configured implements Tool {
JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
// Create an expression object
- this.expr = jexl.createExpression(expr);
+ this.expr = jexl.createScript(expr);
}
}
@@ -128,7 +128,7 @@ public class ReadHostDb extends Configured implements Tool {
// Filter this record if evaluation did not pass
try {
- if (!Boolean.TRUE.equals(expr.evaluate(jcontext))) {
+ if (!Boolean.TRUE.equals(expr.execute(jcontext))) {
return;
}
} catch (Exception e) {
diff --git a/src/java/org/apache/nutch/util/JexlUtil.java
b/src/java/org/apache/nutch/util/JexlUtil.java
index 24f9fe6..370ba7a 100644
--- a/src/java/org/apache/nutch/util/JexlUtil.java
+++ b/src/java/org/apache/nutch/util/JexlUtil.java
@@ -23,7 +23,7 @@ import java.util.regex.Pattern;
import org.apache.commons.jexl3.JexlBuilder;
import org.apache.commons.jexl3.JexlEngine;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.lang.time.DateUtils;
import org.slf4j.Logger;
@@ -47,7 +47,7 @@ public class JexlUtil {
* @param expr string JEXL expression
* @return parsed JEXL expression or null in case of parse error
*/
- public static JexlExpression parseExpression(String expr) {
+ public static JexlScript parseExpression(String expr) {
if (expr == null) return null;
try {
@@ -68,7 +68,7 @@ public class JexlUtil {
JexlEngine jexl = new JexlBuilder().silent(true).strict(true).create();
- return jexl.createExpression(expr);
+ return jexl.createScript(expr);
} catch (Exception e) {
LOG.error(e.getMessage());
}
diff --git
a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
index e889040..a555575 100644
---
a/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
+++
b/src/plugin/exchange-jexl/src/java/org/apache/nutch/exchange/jexl/JexlExchange.java
@@ -16,7 +16,7 @@
*/
package org.apache.nutch.exchange.jexl;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.jexl3.JexlContext;
import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.conf.Configuration;
@@ -32,7 +32,7 @@ public class JexlExchange implements Exchange {
private Configuration conf;
- private JexlExpression expression;
+ private JexlScript expression;
/**
* Initializes the internal variables.
@@ -57,7 +57,7 @@ public class JexlExchange implements Exchange {
jexlContext.set("doc", doc);
try {
- if (Boolean.TRUE.equals(expression.evaluate(jexlContext))) {
+ if (Boolean.TRUE.equals(expression.execute(jexlContext))) {
return true;
}
} catch (Exception ignored) {
diff --git
a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
index ac387c0..e1fa792 100644
---
a/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
+++
b/src/plugin/index-jexl-filter/src/java/org/apache/nutch/indexer/jexl/JexlIndexingFilter.java
@@ -20,7 +20,7 @@ import java.lang.invoke.MethodHandles;
import java.util.List;
import java.util.Map.Entry;
-import org.apache.commons.jexl3.JexlExpression;
+import org.apache.commons.jexl3.JexlScript;
import org.apache.commons.jexl3.JexlContext;
import org.apache.commons.jexl3.MapContext;
import org.apache.hadoop.conf.Configuration;
@@ -48,7 +48,7 @@ public class JexlIndexingFilter implements IndexingFilter {
.getLogger(MethodHandles.lookup().lookupClass());
private Configuration conf;
- private JexlExpression expr;
+ private JexlScript expr;
@Override
public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
@@ -92,7 +92,7 @@ public class JexlIndexingFilter implements IndexingFilter {
jcontext.set("doc", context);
try {
- if (Boolean.TRUE.equals(expr.evaluate(jcontext))) {
+ if (Boolean.TRUE.equals(expr.execute(jcontext))) {
return doc;
}
} catch (Exception e) {