Author: jmssiera
Date: Fri Jun 25 13:40:38 2021
New Revision: 1891042
URL: http://svn.apache.org/viewvc?rev=1891042&view=rev
Log:
CONNECTORS-1667: Add specific interval param when tika is down
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
Fri Jun 25 13:40:38 2021
@@ -30,12 +30,14 @@ public class TikaConfig {
public static final String PARAM_CONNECTIONTIMEOUT = "connectionTimeout";
public static final String PARAM_SOCKETTIMEOUT = "socketTimeout";
public static final String PARAM_RETRYINTERVAL = "retryInterval";
+ public static final String PARAM_RETRYINTERVALTIKADOWN =
"retryIntervalTikaDown";
public static final String PARAM_RETRYNUMBER = "retryNumber";
public static final String TIKAHOSTNAME_DEFAULT = "localhost";
public static final String TIKAPORT_DEFAULT = "9998";
public static final String CONNECTIONTIMEOUT_DEFAULT = "60000";
public static final String SOCKETTIMEOUT_DEFAULT = "60000";
public static final String RETRYINTERVAL_DEFAULT = "20000";
+ public static final String RETRYINTERVALTIKADOWN_DEFAULT = "120000";
public static final String RETRYNUMBER_DEFAULT = "1";
// Specification nodes and values
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
Fri Jun 25 13:40:38 2021
@@ -126,6 +126,9 @@ public class TikaExtractor extends org.a
/** Retry interval */
private String retryIntervalString = null;
+ /** Retry interval when Tika seems down */
+ private String retryIntervalTikaDownString = null;
+
/** Retry number */
private String retryNumberString = null;
@@ -146,6 +149,9 @@ public class TikaExtractor extends org.a
/** Retry interval */
private long retryInterval = -1L;
+ /** Retry interval */
+ private long retryIntervalTikaDown = -1L;
+
/** Retry number */
private int retryNumber = -1;
@@ -221,6 +227,7 @@ public class TikaExtractor extends org.a
connectionTimeoutString =
configParameters.getParameter(TikaConfig.PARAM_CONNECTIONTIMEOUT);
socketTimeoutString =
configParameters.getParameter(TikaConfig.PARAM_SOCKETTIMEOUT);
retryIntervalString =
configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVAL);
+ retryIntervalTikaDownString =
configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
retryNumberString =
configParameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
}
@@ -235,6 +242,7 @@ public class TikaExtractor extends org.a
connectionTimeoutString = null;
socketTimeoutString = null;
retryIntervalString = null;
+ retryIntervalTikaDownString = null;
retryNumberString = null;
super.disconnect();
@@ -293,6 +301,11 @@ public class TikaExtractor extends org.a
throw new ManifoldCFException("Bad retry interval number: " +
retryIntervalString);
}
try {
+ this.retryIntervalTikaDown =
Long.parseLong(retryIntervalTikaDownString);
+ } catch (final NumberFormatException e) {
+ throw new ManifoldCFException("Bad retry interval when tika is down
number: " + retryIntervalTikaDownString);
+ }
+ try {
this.retryNumber = Integer.parseInt(retryNumberString);
} catch (final NumberFormatException e) {
throw new ManifoldCFException("Bad retry number: " +
retryNumberString);
@@ -448,6 +461,11 @@ public class TikaExtractor extends org.a
parameters.setParameter(TikaConfig.PARAM_RETRYINTERVAL, retryInterval);
}
+ final String retryIntervalTikaDown =
variableContext.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown != null) {
+ parameters.setParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN,
retryIntervalTikaDown);
+ }
+
final String retryNumber =
variableContext.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber != null) {
parameters.setParameter(TikaConfig.PARAM_RETRYNUMBER, retryNumber);
@@ -497,6 +515,11 @@ public class TikaExtractor extends org.a
retryInterval = TikaConfig.RETRYINTERVAL_DEFAULT;
}
+ String retryIntervalTikaDown =
parameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown == null) {
+ retryIntervalTikaDown = TikaConfig.RETRYINTERVALTIKADOWN_DEFAULT;
+ }
+
String retryNumber = parameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber == null) {
retryNumber = TikaConfig.RETRYNUMBER_DEFAULT;
@@ -508,6 +531,7 @@ public class TikaExtractor extends org.a
velocityContext.put("CONNECTIONTIMEOUT", connectionTimeout);
velocityContext.put("SOCKETTIMEOUT", socketTimeout);
velocityContext.put("RETRYINTERVAL", retryInterval);
+ velocityContext.put("RETRYINTERVALTIKADOWN", retryIntervalTikaDown);
velocityContext.put("RETRYNUMBER", retryNumber);
}
@@ -584,7 +608,7 @@ public class TikaExtractor extends org.a
// work
Logging.ingest.warn("Tika Server unreachable while trying to process " +
documentURI + ", retrying...", e);
final long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Tika Server connection down: " +
e.getMessage(), e, currentTime + retryInterval, -1L, -1, false);
+ throw new ServiceInterruption("Tika Server connection down: " +
e.getMessage(), e, currentTime + retryIntervalTikaDown, -1L, retryNumber,
false);
}
private void retryWithoutAbort(final Exception e) throws ServiceInterruption
{
@@ -723,6 +747,9 @@ public class TikaExtractor extends org.a
} else { // The tika server seams to be down : retry
{retryNumber} times and abort the
// job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Tika seemed to be down when requested to
process document " + documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
} catch (final NoHttpResponseException e) {
@@ -733,6 +760,9 @@ public class TikaExtractor extends org.a
} catch (final IOException e) { // Unknown problem with the Tika
Server. Retry {retryNumber} times and abort
// the job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Unknown Tika problem when processing document " +
documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
if (response != null) {
@@ -1174,7 +1204,7 @@ public class TikaExtractor extends org.a
final List<Map<String, String>> fieldMappings = new ArrayList<>();
String keepAllMetadataValue = "true";
String lowernamesValue = "true";
- String writeLimitValue = "1000000";
+ String writeLimitValue = "1000000"; // 1Mo by default
String extractArchives = "false";
String maxEmbeddedResources = "";
for (int i = 0; i < os.getChildCount(); i++) {
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Intervalle entre les tentatives (en ms):
+TikaExtractor.RetryIntervalTikaDown=Intervalle entre les tentatives quand Tika
est injoignable (in ms):
TikaExtractor.RetryNumber=Nombre de tentatives:
TikaExtractor.ExtractArchives=Extraire le contenu des archives:
TikaExtractor.ConnectionTimeout=Connexion timeout:
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
Fri Jun 25 13:40:38 2021
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
Fri Jun 25 13:40:38 2021
@@ -50,6 +50,12 @@
</td>
</tr>
<tr>
+ <td
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td class="value"><input name="retryIntervalTikaDown" type="text"
+ value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)" size="20" />
+ </td>
+ </tr>
+ <tr>
<td
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><input name="retryNumber" type="text"
value="$Encoder.attributeEscape($RETRYNUMBER)" size="5" />
@@ -63,6 +69,7 @@
<input type="hidden" name="connectionTimeout"
value="$Encoder.attributeEscape($CONNECTIONTIMEOUT)"/>
<input type="hidden" name="socketTimeout"
value="$Encoder.attributeEscape($SOCKETTIMEOUT)"/>
<input type="hidden" name="retryInterval"
value="$Encoder.attributeEscape($RETRYINTERVAL)"/>
+<input type="hidden" name="retryIntervalTikaDown"
value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)"/>
<input type="hidden" name="retryNumber"
value="$Encoder.attributeEscape($RETRYNUMBER)"/>
#end
\ No newline at end of file
Modified:
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html?rev=1891042&r1=1891041&r2=1891042&view=diff
==============================================================================
---
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
(original)
+++
manifoldcf/branches/CONNECTORS-1667/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
Fri Jun 25 13:40:38 2021
@@ -37,6 +37,10 @@
<td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVAL)</nobr></td>
</tr>
<tr>
+ <td
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td
class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVALTIKADOWN)</nobr></td>
+ </tr>
+ <tr>
<td
class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><nobr>$Encoder.bodyEscape($RETRYNUMBER)</nobr></td>
</tr>