This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new f4b6b37  NUTCH-2706 NUTCH-2650 -addBinaryContent -base64 flag can 
cause "String length must be a multiple of four" error in IndexingJob - use 
conversion to base64 encoding which works for various versions of   the 
commons-codec libary (1.4 and 1.11) and does never return a   chunked string
f4b6b37 is described below

commit f4b6b37ca79099468d9ba2817b018a2a576bbef6
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Fri May 3 23:25:14 2019 +0200

    NUTCH-2706 NUTCH-2650 -addBinaryContent -base64 flag can cause "String 
length must be a multiple of four" error in IndexingJob
    - use conversion to base64 encoding which works for various versions of
      the commons-codec libary (1.4 and 1.11) and does never return a
      chunked string
---
 src/java/org/apache/nutch/indexer/IndexerMapReduce.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java 
b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
index 6fa2032..fedfeb7 100644
--- a/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
+++ b/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
@@ -22,6 +22,7 @@ import java.util.Collection;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.binary.StringUtils;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -394,7 +395,10 @@ public class IndexerMapReduce extends Configured {
         String binary;
         if (base64) {
           // optionally encode as base64
-          binary = Base64.encodeBase64String(content.getContent());
+          // Note: we need a form which works with many versions of 
commons-code (1.4, 1.11 and upwards),
+          // cf. NUTCH-2706.  The following returns a chunked string for 
commons-coded 1.4:
+          //   binary = Base64.encodeBase64String(content.getContent());
+          binary = 
StringUtils.newStringUtf8(Base64.encodeBase64(content.getContent(), false, 
false));
         } else {
           binary = new String(content.getContent());
         }

Reply via email to