Author: kwright
Date: Wed Sep 5 11:38:11 2018
New Revision: 1840122
URL: http://svn.apache.org/viewvc?rev=1840122&view=rev
Log:
CONNECTORS-1528: Add canonicalization mode local, and get the tab partly
functioning again, other than a second Add and a Delete, which still do not
work.
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
Wed Sep 5 11:38:11 2018
@@ -170,6 +170,8 @@ public class WebcrawlerConfig
public static final String ATTR_PHPSESSIONREMOVAL = "phpsessionremoval";
/** bvsessionremoval attribute */
public static final String ATTR_BVSESSIONREMOVAL = "bvsessionremoval";
+ /** map to lower case */
+ public static final String ATTR_LOWERCASE = "lowercase";
/** name attribute */
public static final String ATTR_NAME = "name";
/** token attribute */
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Wed Sep 5 11:38:11 2018
@@ -2385,7 +2385,7 @@ public class WebcrawlerConnector extends
allowReorderOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
}
else
- allowReorderOutput = allowReorder;
+ allowReorderOutput =
allowReorder.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");
String allowJavaSessionRemoval =
specNode.getAttributeValue(WebcrawlerConfig.ATTR_JAVASESSIONREMOVAL);
String allowJavaSessionRemovalOutput;
if (allowJavaSessionRemoval == null ||
allowJavaSessionRemoval.length() == 0)
@@ -2394,7 +2394,7 @@ public class WebcrawlerConnector extends
allowJavaSessionRemovalOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
}
else
- allowJavaSessionRemovalOutput = allowJavaSessionRemoval;
+ allowJavaSessionRemovalOutput =
allowJavaSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");;
String allowASPSessionRemoval =
specNode.getAttributeValue(WebcrawlerConfig.ATTR_ASPSESSIONREMOVAL);
String allowASPSessionRemovalOutput;
if (allowASPSessionRemoval == null || allowASPSessionRemoval.length()
== 0)
@@ -2403,7 +2403,7 @@ public class WebcrawlerConnector extends
allowASPSessionRemovalOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
}
else
- allowASPSessionRemovalOutput = allowASPSessionRemoval;
+ allowASPSessionRemovalOutput =
allowASPSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");;
String allowPHPSessionRemoval =
specNode.getAttributeValue(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL);
String allowPHPSessionRemovalOutput;
if (allowPHPSessionRemoval == null || allowPHPSessionRemoval.length()
== 0)
@@ -2412,7 +2412,7 @@ public class WebcrawlerConnector extends
allowPHPSessionRemovalOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
}
else
- allowPHPSessionRemovalOutput = allowPHPSessionRemoval;
+ allowPHPSessionRemovalOutput =
allowPHPSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");;
String allowBVSessionRemoval =
specNode.getAttributeValue(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL);
String allowBVSessionRemovalOutput;
if (allowBVSessionRemoval == null || allowBVSessionRemoval.length() ==
0)
@@ -2421,7 +2421,16 @@ public class WebcrawlerConnector extends
allowBVSessionRemovalOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
}
else
- allowBVSessionRemovalOutput = allowBVSessionRemoval;
+ allowBVSessionRemovalOutput =
allowBVSessionRemoval.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");;
+ String allowLowercasing =
specNode.getAttributeValue(WebcrawlerConfig.ATTR_LOWERCASE);
+ String allowLowercasingOutput;
+ if (allowLowercasing == null || allowLowercasing.length() == 0)
+ {
+ allowLowercasing = WebcrawlerConfig.ATTRVALUE_NO;
+ allowLowercasingOutput = Messages.getBodyString(locale,
"WebcrawlerConnector.no");
+ }
+ else
+ allowLowercasingOutput =
allowLowercasing.equals(WebcrawlerConfig.ATTRVALUE_NO)?Messages.getBodyString(locale,
"WebcrawlerConnector.no"):Messages.getBodyString(locale,
"WebcrawlerConnector.yes");;
canonicalizationMap.put("regexpString",regexpString);
canonicalizationMap.put("description",description);
@@ -2435,7 +2444,9 @@ public class WebcrawlerConnector extends
canonicalizationMap.put("allowPHPSessionRemovalOutput",allowPHPSessionRemovalOutput);
canonicalizationMap.put("allowBVSessionRemoval",allowBVSessionRemoval);
canonicalizationMap.put("allowBVSessionRemovalOutput",allowBVSessionRemovalOutput);
-
+ canonicalizationMap.put("allowLowercasing",allowLowercasing);
+
canonicalizationMap.put("allowLowercasingOutput",allowLowercasingOutput);
+
canonicalizationMapList.add(canonicalizationMap);
}
}
@@ -2937,6 +2948,7 @@ public class WebcrawlerConnector extends
String aspSession =
variableContext.getParameter(seqPrefix+"urlregexpasp_"+Integer.toString(j));
String phpSession =
variableContext.getParameter(seqPrefix+"urlregexpphp_"+Integer.toString(j));
String bvSession =
variableContext.getParameter(seqPrefix+"urlregexpbv_"+Integer.toString(j));
+ String lowercasing =
variableContext.getParameter(seqPrefix+"urlregexplowercasing_"+Integer.toString(j));
SpecificationNode newSn = new
SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
newSn.setAttribute(WebcrawlerConfig.ATTR_REGEXP,regexp);
if (regexpDescription != null && regexpDescription.length() > 0)
@@ -2951,6 +2963,8 @@ public class WebcrawlerConnector extends
newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
if (bvSession != null && bvSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
+ if (lowercasing != null && lowercasing.length() > 0)
+ newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
ds.addChild(ds.getChildCount(),newSn);
}
j++;
@@ -2964,6 +2978,7 @@ public class WebcrawlerConnector extends
String aspSession =
variableContext.getParameter(seqPrefix+"urlregexpasp");
String phpSession =
variableContext.getParameter(seqPrefix+"urlregexpphp");
String bvSession =
variableContext.getParameter(seqPrefix+"urlregexpbv");
+ String lowercasing =
variableContext.getParameter(seqPrefix+"urlregexplowercasing");
// Add a new node at the end
SpecificationNode newSn = new
SpecificationNode(WebcrawlerConfig.NODE_URLSPEC);
@@ -2980,6 +2995,8 @@ public class WebcrawlerConnector extends
newSn.setAttribute(WebcrawlerConfig.ATTR_PHPSESSIONREMOVAL,phpSession);
if (bvSession != null && bvSession.length() > 0)
newSn.setAttribute(WebcrawlerConfig.ATTR_BVSESSIONREMOVAL,bvSession);
+ if (lowercasing != null && lowercasing.length() > 0)
+ newSn.setAttribute(WebcrawlerConfig.ATTR_LOWERCASE,lowercasing);
ds.addChild(ds.getChildCount(),newSn);
}
}
@@ -3656,6 +3673,11 @@ public class WebcrawlerConnector extends
// Put it back into the URL without the ref, and with the modified query
and path parts.
url = new
WebURL(url.getScheme(),url.getHost(),url.getPort(),pathString,queryString);
String rval = url.toASCIIString();
+ // Here is where we decide to bash to lowercase, if so indicated
+ if (p != null && p.canLowercase())
+ {
+ rval = rval.toLowerCase(Locale.ROOT);
+ }
return rval;
}
@@ -5383,9 +5405,10 @@ public class WebcrawlerConnector extends
protected final boolean removeAspSession;
protected final boolean removePhpSession;
protected final boolean removeBVSession;
+ protected final boolean lowercasing;
public CanonicalizationPolicy(Pattern matchPattern, boolean reorder,
boolean removeJavaSession, boolean removeAspSession,
- boolean removePhpSession, boolean removeBVSession)
+ boolean removePhpSession, boolean removeBVSession, boolean lowercasing)
{
this.matchPattern = matchPattern;
this.reorder = reorder;
@@ -5393,6 +5416,7 @@ public class WebcrawlerConnector extends
this.removeAspSession = removeAspSession;
this.removePhpSession = removePhpSession;
this.removeBVSession = removeBVSession;
+ this.lowercasing = lowercasing;
}
public boolean checkMatch(String url)
@@ -5426,6 +5450,11 @@ public class WebcrawlerConnector extends
return removeBVSession;
}
+ public boolean canLowercase()
+ {
+ return lowercasing;
+ }
+
}
/** Class representing a list of canonicalization rules */
@@ -5738,10 +5767,20 @@ public class WebcrawlerConnector extends
{
bvSessionValue = bvSession.equals(WebcrawlerConfig.ATTRVALUE_YES);
}
+
+ String lowercasing =
sn.getAttributeValue(WebcrawlerConfig.ATTR_LOWERCASE);
+ boolean lowercasingValue;
+ if (lowercasing == null)
+ lowercasingValue = false;
+ else
+ {
+ lowercasingValue =
lowercasing.equals(WebcrawlerConfig.ATTRVALUE_YES);
+ }
+
try
{
canonicalizationPolicies.addRule(new
CanonicalizationPolicy(Pattern.compile(urlRegexp),reorderValue,javaSessionValue,aspSessionValue,
- phpSessionValue, bvSessionValue));
+ phpSessionValue, bvSessionValue, lowercasingValue));
}
catch (java.util.regex.PatternSyntaxException e)
{
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
Wed Sep 5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=Trus
WebcrawlerConnector.NoTrustCertificates=No trust certificates
WebcrawlerConnector.Description=Description
WebcrawlerConnector.Reorder=Reorder?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
WebcrawlerConnector.RemoveJSPSessions=Remove JSP sessions?
WebcrawlerConnector.RemoveASPSessions=Remove ASP sessions?
WebcrawlerConnector.RemovePHPSessions=Remove PHP sessions?
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
Wed Sep 5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=La c
WebcrawlerConnector.NoTrustCertificates=No hay certificados de confianza
WebcrawlerConnector.Description=Descripción
WebcrawlerConnector.Reorder=reordenar?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
WebcrawlerConnector.RemoveJSPSessions=Retire sesiones JSP?
WebcrawlerConnector.RemoveASPSessions=Retire sesiones ASP?
WebcrawlerConnector.RemovePHPSessions=Retire las sesiones de PHP?
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
Wed Sep 5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=Fair
WebcrawlerConnector.NoTrustCertificates=Aucun certificat de confiance
WebcrawlerConnector.Description=Description
WebcrawlerConnector.Reorder=Reordonner (reorder)?
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
WebcrawlerConnector.RemoveJSPSessions=Retirer les sessions JSP?
WebcrawlerConnector.RemoveASPSessions=Retirer les sessions ASP?
WebcrawlerConnector.RemovePHPSessions=Retirer les sessions PHP?
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
Wed Sep 5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=ã
WebcrawlerConnector.NoTrustCertificates=ãã©ã¹ããµã¼ãã£ãã£ã±ã¼ããããã¾ãã
WebcrawlerConnector.Description=説æ
WebcrawlerConnector.Reorder=ã½ã¼ã
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
WebcrawlerConnector.RemoveJSPSessions=JSPã»ãã·ã§ã³ãåé¤
WebcrawlerConnector.RemoveASPSessions=ASPã»ãã·ã§ã³ãåé¤
WebcrawlerConnector.RemovePHPSessions=PHPã»ãã·ã§ã³ãåé¤
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
Wed Sep 5 11:38:11 2018
@@ -64,6 +64,7 @@ WebcrawlerConnector.TrustEverything=ä¿
WebcrawlerConnector.NoTrustCertificates=æ ä¿¡ä»»è¯ä¹¦
WebcrawlerConnector.Description=説æ
WebcrawlerConnector.Reorder=éæ
+WebcrawlerConnector.MapToLowercase=Map to lowercase?
WebcrawlerConnector.RemoveJSPSessions=å é¤JSPä¼è¯
WebcrawlerConnector.RemoveASPSessions=å é¤ASPä¼è¯
WebcrawlerConnector.RemovePHPSessions=å é¤PHPä¼è¯
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Canonicalization.html.vm
Wed Sep 5 11:38:11 2018
@@ -30,6 +30,7 @@
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.URLRegularExpression'))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.Description'))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.Reorder'))</th>
+
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.MapToLowercase'))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveJSPSessions'))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveASPSessions'))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemovePHPSessions'))</th>
@@ -41,21 +42,23 @@
<button class="btn btn-danger btn-xs" type="button"
title="$Encoder.attributeEscape($ResourceBundle.getString('WebcrawlerConnector.DeleteUrlRegexp'))$Encoder.attributeEscape($canonicalizationMap["regexpString"])"
onclick='javascript:${SEQPREFIX}URLRegexpDelete($foreach.index,"${SEQPREFIX}urlregexp_${foreach.index}");'><i
class="fa fa-minus-circle fa-fw"
aria-hidden="true"></i>$Encoder.attributeEscape($ResourceBundle.getString('WebcrawlerConnector.Delete'))</button>
</td>
<td>
- <input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap["regexpString"])"/>
- <input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap["description"])"/>
- <input type="hidden"
name="${SEQPREFIX}urlregexpreorder_$foreach.index"
value="$canonicalizationMap["allowReorder"]"/>
- <input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index"
value="$canonicalizationMap["allowJavaSessionRemoval"]"/>
- <input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index"
value="$canonicalizationMap["allowASPSessionRemoval"]"/>
- <input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index"
value="$canonicalizationMap["allowPHPSessionRemoval"]"/>
- <input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index"
value="$canonicalizationMap["allowBVSessionRemoval"]"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap['regexpString'])"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap['description'])"/>
+ <input type="hidden"
name="${SEQPREFIX}urlregexpreorder_$foreach.index"
value="$canonicalizationMap['allowReorder']"/>
+ <input type="hidden"
name="${SEQPREFIX}urlregexplowercasing_$foreach.index"
value="$canonicalizationMap['allowLowercasing']"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index"
value="$canonicalizationMap['allowJavaSessionRemoval']"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index"
value="$canonicalizationMap['allowASPSessionRemoval']"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index"
value="$canonicalizationMap['allowPHPSessionRemoval']"/>
+ <input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index"
value="$canonicalizationMap['allowBVSessionRemoval']"/>
$Encoder.bodyEscape($canonicalizationMap["regexpString"])
</td>
- <td>$Encoder.bodyEscape($canonicalizationMap["description"])</td>
- <td>$canonicalizationMap["allowReorderOutput"]</td>
- <td>$canonicalizationMap["allowJavaSessionRemovalOutput"]</td>
- <td>$canonicalizationMap["allowASPSessionRemovalOutput"]</td>
- <td>$canonicalizationMap["allowPHPSessionRemovalOutput"]</td>
- <td>$canonicalizationMap["allowBVSessionRemovalOutput"]</td>
+ <td>$Encoder.bodyEscape($canonicalizationMap['description'])</td>
+ <td>$canonicalizationMap['allowReorderOutput']</td>
+ <td>$canonicalizationMap['allowLowercasingOutput']</td>
+ <td>$canonicalizationMap['allowJavaSessionRemovalOutput']</td>
+ <td>$canonicalizationMap['allowASPSessionRemovalOutput']</td>
+ <td>$canonicalizationMap['allowPHPSessionRemovalOutput']</td>
+ <td>$canonicalizationMap['allowBVSessionRemovalOutput']</td>
</tr>
#end
</table>
@@ -65,6 +68,7 @@
<hr/>
<div class="row">
<div class="col-md-4">
+ <input type="hidden" name="${SEQPREFIX}urlregexpop" value="CONTINUE"/>
<input type="hidden" name="${SEQPREFIX}urlregexpcount" value="$COUNTER"/>
<div class="form-group">
<label
for="${SEQPREFIX}urlregexp">$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.URLRegularExpression'))</label>
@@ -82,6 +86,11 @@
</div>
<div class="checkbox">
<label>
+ <input type="checkbox" name="${SEQPREFIX}urlregexplowercasing"
value="yes"/>
$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.MapToLowercase'))
+ </label>
+ </div>
+ <div class="checkbox">
+ <label>
<input type="checkbox" name="${SEQPREFIX}urlregexpjava" value="yes"
checked="true"/>
$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.RemoveJSPSessions'))
</label>
</div>
@@ -108,13 +117,14 @@
</div>
#else
#foreach($canonicalizationMap in $CANONICALIZATIONMAPLIST)
-<input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap["regexpString"])"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap["description"])"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index"
value="$canonicalizationMap["allowReorder"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index"
value="$canonicalizationMap["allowJavaSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index"
value="$canonicalizationMap["allowASPSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index"
value="$canonicalizationMap["allowPHPSessionRemoval"]"/>
-<input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index"
value="$canonicalizationMap["allowBVSessionRemoval"]"/>
+<input type="hidden" name="${SEQPREFIX}urlregexp_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap['regexpString'])"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpdesc_$foreach.index"
value="$Encoder.attributeEscape($canonicalizationMap['description'])"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpreorder_$foreach.index"
value="$canonicalizationMap['allowReorder']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexplowercasing_$foreach.index"
value="$canonicalizationMap['allowLowercasing']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpjava_$foreach.index"
value="$canonicalizationMap['allowJavaSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpasp_$foreach.index"
value="$canonicalizationMap['allowASPSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpphp_$foreach.index"
value="$canonicalizationMap['allowPHPSessionRemoval']"/>
+<input type="hidden" name="${SEQPREFIX}urlregexpbv_$foreach.index"
value="$canonicalizationMap['allowBVSessionRemoval']"/>
#end
#set( $COUNTER = $CANONICALIZATIONMAPLIST.size())
<input type="hidden" name="${SEQPREFIX}urlregexpcount" value="$COUNTER"/>
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm?rev=1840122&r1=1840121&r2=1840122&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
Wed Sep 5 11:38:11 2018
@@ -28,6 +28,7 @@
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.URLRegexp"))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.Description"))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.Reorder"))</th>
+
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.MapToLowercase"))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemoveJSPSessions"))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemoveASPSessions"))</th>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.RemovePHPSessions"))</th>
@@ -38,6 +39,7 @@
<td>$Encoder.bodyEscape($canonicalizationMap["regexpString"])</td>
<td>$Encoder.bodyEscape($canonicalizationMap["description"])</td>
<td>$canonicalizationMap["allowReorderOutput"]</td>
+ <td>$canonicalizationMap["allowLowercasingOutput"]</td>
<td>$canonicalizationMap["allowJavaSessionRemovalOutput"]</td>
<td>$canonicalizationMap["allowASPSessionRemovalOutput"]</td>
<td>$canonicalizationMap["allowPHPSessionRemovalOutput"]</td>