Author: kwright
Date: Thu Sep 28 04:20:30 2023
New Revision: 1912568
URL: http://svn.apache.org/viewvc?rev=1912568&view=rev
Log:
Add pr-149.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Inclusions.html.vm
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
Modified: manifoldcf/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Sep 28 04:20:30 2023
@@ -3,6 +3,16 @@ $Id$
======================= 2.26-dev =====================
+pr-149: Add Webconnector feature:
+The "Force the inclusion of redirectionâ options allows you to include hosts
redirected from original seeds. You might want to use this option if the site
you are crawling is subject to redirections. Note that it is not required if
the previous option is not checked. Here are the possible behaviors:
+
+ - If the user checks the âInclude only hostsâ, but not the âForce the
inclusionâ option, then the redirected files will be filtered if their new
URL doesnât match the seed.
+ - If the user checks the Include only hosts, and checks the Force the
inclusion option, then when the job finds a url that is not in the same domain,
it is dropped EXCEPT if the url is originated by a 301 or 302 redirection in
the document queue.
+ - If the user does NOT check the include only hosts, but checks the Force
the inclusion option, then the job will crawl any url found, even if it is
originated by a 301 or 302 redirection.
+ - If the user does not check anything, then the behavior is the same as the
previous case.
+
+ If the admin checks the second option AND if the first option is checked,
then the job will check any host added in the Set. If a host is subject to
redirection, then we add the destination URL in the Set.
+(Emeric Bernet-Rollande)
======================= Release 2.25 =====================
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConfig.java
Thu Sep 28 04:20:30 2023
@@ -147,6 +147,10 @@ public class WebcrawlerConfig
/** Limit to seeds. When value attribute is true, only seed domains will be
permitted. */
public static final String NODE_LIMITTOSEEDS = "limittoseeds";
+
+ /** Force the inclusion of redirections. When value attribute is true,
redirected URL will be included. */
+ public static final String NODE_FORCEINCLUSION = "forceinclusionofredirects";
+
/** Canonicalization rule. Attributes are regexp, description, reorder,
*javasessionremoval, aspsessionremoval, phpsessionremoval, bvsessionremoval
*/
public static final String NODE_URLSPEC = "urlspec";
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Thu Sep 28 04:20:30 2023
@@ -2506,36 +2506,37 @@ public class WebcrawlerConnector extends
String inclusions = ".*\n";
String inclusionsIndex = ".*\n";
boolean includeMatching = true;
+ boolean forceInclusion = true;
+
i = 0;
- while (i < ds.getChildCount())
- {
+ while (i < ds.getChildCount()) {
SpecificationNode sn = ds.getChild(i++);
- if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDES))
- {
+ if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDES)) {
inclusions = sn.getValue();
if (inclusions == null)
inclusions = "";
- }
- else if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDESINDEX))
- {
+ } else if (sn.getType().equals(WebcrawlerConfig.NODE_INCLUDESINDEX)) {
inclusionsIndex = sn.getValue();
if (inclusionsIndex == null)
inclusionsIndex = "";
- }
- else if (sn.getType().equals(WebcrawlerConfig.NODE_LIMITTOSEEDS))
- {
+ } else if (sn.getType().equals(WebcrawlerConfig.NODE_LIMITTOSEEDS)) {
String value = sn.getAttributeValue(WebcrawlerConfig.ATTR_VALUE);
if (value == null || value.equals("false"))
includeMatching = false;
else
includeMatching = true;
+ } else if (sn.getType().equals(WebcrawlerConfig.NODE_FORCEINCLUSION)) {
+ final String value = sn.getAttributeValue(WebcrawlerConfig.ATTR_VALUE);
+ forceInclusion = value != null && !"false".equals(value);
}
}
velocityContext.put("INCLUSIONS",inclusions);
velocityContext.put("INCLUSIONSINDEX",inclusionsIndex);
velocityContext.put("INCLUDEMATCHING",includeMatching);
+ velocityContext.put("FORCEINCLUSION", forceInclusion);
+
}
@@ -2862,7 +2863,26 @@ public class WebcrawlerConnector extends
cn.setAttribute(WebcrawlerConfig.ATTR_VALUE,(matchingHosts==null||matchingHosts.equals("false"))?"false":"true");
ds.addChild(ds.getChildCount(),cn);
}
-
+
+ // Handle the force-inclusion switch
+ final String forceInclusionPresent =
variableContext.getParameter(seqPrefix + "forceinclusion_present");
+ if (forceInclusionPresent != null) {
+ // Delete existing switch record first
+ int i = 0;
+ while (i < ds.getChildCount()) {
+ final SpecificationNode sn = ds.getChild(i);
+ if (sn.getType().equals(WebcrawlerConfig.NODE_FORCEINCLUSION))
+ ds.removeChild(i);
+ else
+ i++;
+ }
+
+ final String forceInclusion = variableContext.getParameter(seqPrefix +
"forceInclusion");
+ final SpecificationNode cn = new
SpecificationNode(WebcrawlerConfig.NODE_FORCEINCLUSION);
+ cn.setAttribute(WebcrawlerConfig.ATTR_VALUE, (forceInclusion == null ||
forceInclusion.equals("false")) ? "false" : "true");
+ ds.addChild(ds.getChildCount(), cn);
+ }
+
// Get the exclusions
String exclusions = variableContext.getParameter(seqPrefix+"exclusions");
if (exclusions != null)
@@ -5701,6 +5721,7 @@ public class WebcrawlerConnector extends
List<String> packList = new ArrayList<String>();
String[] packStuff = new String[2];
boolean limitToSeeds = false;
+ boolean forceInclusion = false;
int i = 0;
while (i < spec.getChildCount())
{
@@ -5768,6 +5789,9 @@ public class WebcrawlerConnector extends
limitToSeeds = false;
else
limitToSeeds = true;
+ } else if (sn.getType().equals(WebcrawlerConfig.NODE_FORCEINCLUSION)) {
+ final String value =
sn.getAttributeValue(WebcrawlerConfig.ATTR_VALUE);
+ forceInclusion = value != null &&
!value.equals(WebcrawlerConfig.ATTRVALUE_FALSE);
}
else if (sn.getType().equals(WebcrawlerConfig.NODE_URLSPEC))
{
@@ -5883,8 +5907,18 @@ public class WebcrawlerConnector extends
String host = url.getHost();
- if (host != null)
+ if (host != null) {
seedHosts.add(host);
+ if (forceInclusion) {
+ // In case of redirection, if "Force the inclusion of
redirects" is set to true, we add the redirected url to seedHosts
+ try {
+ seedHosts.add(getFinalURL(urlCandidate));
+ } catch (IOException e) {
+ // Skip the entry
+ }
+ }
+ }
+
}
catch (java.net.URISyntaxException e)
{
@@ -6083,6 +6117,28 @@ public class WebcrawlerConnector extends
public Map<String,List<String>> headerData = null;
}
+
+ /**
+ * If the initial url is permanently or temporarly redirected (code 301 or
302), the method returns the destination url
+ * @param url The initial url
+ * @return the url after redirection
+ */
+ public static String getFinalURL(String url) throws IOException,
URISyntaxException {
+ URL formattedUrl = new URL(url);
+ URLConnection urlConnection = formattedUrl.openConnection();
+ HttpURLConnection con = (HttpURLConnection) urlConnection;
+ con.setInstanceFollowRedirects(false);
+ con.connect();
+ con.getInputStream();
+
+ if (con.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM ||
con.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
+ String redirectUrl = con.getHeaderField("Location");
+ return getFinalURL(redirectUrl);
+ }
+
+ final java.net.URI uri = new URI(url);
+ return uri.getHost();
+ }
}
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_en_US.properties
Thu Sep 28 04:20:30 2023
@@ -75,6 +75,7 @@ WebcrawlerConnector.AddUrlRegexp=Add url
WebcrawlerConnector.IncludeInCrawl=Include in crawl:
WebcrawlerConnector.IncludeInIndex=Include in index:
WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=Include only hosts matching
seeds?
+WebcrawlerConnector.ForceInclusionOfRedirects=Force the inclusion of
redirections (overwrites the Include only hosts option above)
WebcrawlerConnector.ExcludeFromCrawl=Exclude from crawl:
WebcrawlerConnector.ExcludeFromIndex=Exclude from index:
WebcrawlerConnector.ExcludeContentFromIndex=Exclude content from index:
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_es_ES.properties
Thu Sep 28 04:20:30 2023
@@ -72,12 +72,13 @@ WebcrawlerConnector.RemoveBVSessions=Ret
WebcrawlerConnector.DeleteUrlRegexp2=Eliminar regexp url
WebcrawlerConnector.NoCanonicalizationSpecified=No canonicalización
especificada - todas las direcciones URL se reordenan y se han eliminado todas
las sesiones
WebcrawlerConnector.AddUrlRegexp=Añadir regexp url
-WebcrawlerConnector.IncludeInCrawl=Incluir en rastreo:
-WebcrawlerConnector.IncludeInIndex=Incluir en el Ãndice:
-WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=Incluya sólo los hosts que
emparejan semillas?
-WebcrawlerConnector.ExcludeFromCrawl=Excluir de rastreo:
-WebcrawlerConnector.ExcludeFromIndex=Excluir del Ãndice:
-WebcrawlerConnector.ExcludeContentFromIndex=Excluir contenido del Ãndice:
+WebcrawlerConnector.IncludeInCrawl=Incluir en rastreo:
+WebcrawlerConnector.IncludeInIndex=Incluir en el Ãndice:
+WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=Incluya sólo los hosts que
emparejan semillas?
+WebcrawlerConnector.ForceInclusionOfRedirects=Force the inclusion of
redirections (overwrites the Include only hosts option above)
+WebcrawlerConnector.ExcludeFromCrawl=Excluir de rastreo:
+WebcrawlerConnector.ExcludeFromIndex=Excluir del Ãndice:
+WebcrawlerConnector.ExcludeContentFromIndex=Excluir contenido del Ãndice:
WebcrawlerConnector.DeleteToken=eliminar sÃmbolo #
WebcrawlerConnector.NoAccessTokensPresent=No hay tokens de acceso actuales
WebcrawlerConnector.AddAccessToken=Añadir token de acceso
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_fr_FR.properties
Thu Sep 28 04:20:30 2023
@@ -75,6 +75,7 @@ WebcrawlerConnector.AddUrlRegexp=Ajouter
WebcrawlerConnector.IncludeInCrawl=Inclure dans le crawl:
WebcrawlerConnector.IncludeInIndex=Inclure dans l'index:
WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=Inclure uniquement les seeds
de matching d'hôtes (hosts matching seeds)?
+WebcrawlerConnector.ForceInclusionOfRedirects=Forcer l'inclusion des
redirections (écrase l'option ci-dessus)
WebcrawlerConnector.ExcludeFromCrawl=Exclure du crawl:
WebcrawlerConnector.ExcludeFromIndex=Exclure de l'index:
WebcrawlerConnector.DeleteToken=Supprimer le jeton #
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_ja_JP.properties
Thu Sep 28 04:20:30 2023
@@ -75,6 +75,7 @@ WebcrawlerConnector.AddUrlRegexp=URLæ
WebcrawlerConnector.IncludeInCrawl=ã¯ãã¼ã«ã«å«ããï¼
WebcrawlerConnector.IncludeInIndex=ç´¢å¼ã«å«ããï¼
WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=ã·ã¼ãã¨ä¸è´ãããã¹ãã®ã¿å¯¾è±¡ã«ãã
+WebcrawlerConnector.ForceInclusionOfRedirects=Force the inclusion of
redirections (overwrites the Include only hosts option above)
WebcrawlerConnector.ExcludeFromCrawl=ã¯ãã¼ã«ããé¤å¤ï¼
WebcrawlerConnector.ExcludeFromIndex=ç´¢å¼ãé¤å¤ï¼
WebcrawlerConnector.DeleteToken=ãã¼ã¯ã³ãåé¤ #
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/native2ascii/org/apache/manifoldcf/crawler/connectors/webcrawler/common_zh_CN.properties
Thu Sep 28 04:20:30 2023
@@ -72,12 +72,13 @@ WebcrawlerConnector.RemoveBVSessions=å�
WebcrawlerConnector.DeleteUrlRegexp2=å é¤URLæ£å表达å¼
WebcrawlerConnector.NoCanonicalizationSpecified=è§èåæªæå® -
ææURLå°è¢«éæï¼ä¼è¯è¢«æé¤
WebcrawlerConnector.AddUrlRegexp=æ·»å URLæ£å表达å¼
-WebcrawlerConnector.IncludeInCrawl=å
å«äºç¬è«å
:
-WebcrawlerConnector.IncludeInIndex=å
å«äºç´¢å¼å
:
-WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=åªå
å«åç§åå¹é
ç主æº
-WebcrawlerConnector.ExcludeFromCrawl=æé¤äºç¬è«å¤:
-WebcrawlerConnector.ExcludeFromIndex=æé¤äºç´¢å¼å¤:
-WebcrawlerConnector.DeleteToken=å é¤ä»¤ç #
+WebcrawlerConnector.IncludeInCrawl=å
å«äºç¬è«å
:
+WebcrawlerConnector.IncludeInIndex=å
å«äºç´¢å¼å
:
+WebcrawlerConnector.IncludeOnlyHostsMatchingSeeds=åªå
å«åç§åå¹é
ç主æº
+WebcrawlerConnector.ForceInclusionOfRedirects=Force the inclusion of
redirections (overwrites the Include only hosts option above)
+WebcrawlerConnector.ExcludeFromCrawl=æé¤äºç¬è«å¤:
+WebcrawlerConnector.ExcludeFromIndex=æé¤äºç´¢å¼å¤:
+WebcrawlerConnector.DeleteToken=å é¤ä»¤ç #
WebcrawlerConnector.ExcludeContentFromIndex=Exclude content from index:
WebcrawlerConnector.NoAccessTokensPresent=访é®ä»¤çä¸åå¨
WebcrawlerConnector.AddAccessToken=æ·»å 访é®ä»¤ç
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Inclusions.html.vm
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Inclusions.html.vm?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Inclusions.html.vm
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/editSpecification_Inclusions.html.vm
Thu Sep 28 04:20:30 2023
@@ -33,11 +33,18 @@
<input type="hidden" name="${SEQPREFIX}matchinghosts_present"
value="true"/>
</div>
</div>
+ <div class="form-group">
+ <div class="checkbox">
+ <label><input type="checkbox" name="${SEQPREFIX}forceInclusion"
value="true" #if($FORCEINCLUSION) checked="yes" #end
/>$Encoder.bodyEscape($ResourceBundle.getString('WebcrawlerConnector.ForceInclusionOfRedirects'))</label>
+ <input type="hidden" name="${SEQPREFIX}forceinclusion_present"
value="true"/>
+ </div>
+ </div>
</div>
</div>
#else
<input type="hidden" name="${SEQPREFIX}inclusions"
value="$Encoder.attributeEscape($INCLUSIONS)"/>
<input type="hidden" name="${SEQPREFIX}inclusionsindex"
value="$Encoder.attributeEscape($INCLUSIONSINDEX)"/>
<input type="hidden" name="${SEQPREFIX}matchinghosts"
value="#if($INCLUDEMATCHING)true#{else}false#end" />
+<input type="hidden" name="${SEQPREFIX}forceInclusion"
value="#if($FORCEINCLUSION)true#{else}false#end" />
<input type="hidden" name="${SEQPREFIX}matchinghosts_present" value="true"/>
#end
Modified:
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm?rev=1912568&r1=1912567&r2=1912568&view=diff
==============================================================================
---
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
(original)
+++
manifoldcf/trunk/connectors/webcrawler/connector/src/main/resources/org/apache/manifoldcf/crawler/connectors/webcrawler/viewSpecification.html.vm
Thu Sep 28 04:20:30 2023
@@ -78,10 +78,13 @@
<td>#if($INCLUDEMATCHING)$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.yes"))#{else}$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.no"))#{end}</td>
</tr>
<tr>
-
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.IncludeInCrawl"))</th>
- <td><pre>$Encoder.bodyEscape($INCLUSIONS)</pre></td>
+
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.ForceInclusionOfRedirects"))</th>
+
<td>#if($FORCEINCLUSION)$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.yes"))#{else}$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.no"))#{end}</td>
</tr>
<tr>
+
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.IncludeInCrawl"))</th>
+ <td><pre>$Encoder.bodyEscape($INCLUSIONS)</pre></td> </tr>
+ <tr>
<th>$Encoder.bodyEscape($ResourceBundle.getString("WebcrawlerConnector.IncludeInIndex"))</th>
<td><pre>$Encoder.bodyEscape($INCLUSIONSINDEX)</pre></td>
</tr>