Author: rfrovarp
Date: Wed Nov 23 22:22:51 2011
New Revision: 1205637
URL: http://svn.apache.org/viewvc?rev=1205637&view=rev
Log:
Uses the Tika metadata to check to see if there was a meta robots tag present
to indicate that a page shouldn't be followed or indexed.
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205637&r1=1205636&r2=1205637&view=diff
==============================================================================
---
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
(original)
+++
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
Wed Nov 23 22:22:51 2011
@@ -12,4 +12,8 @@ public interface TikaParse extends Parse
public String getXml();
public String getPlainText();
+
+ public boolean isFollowed();
+
+ public boolean isIndexed();
}
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL:
http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205637&r1=1205636&r2=1205637&view=diff
==============================================================================
---
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
(original)
+++
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
Wed Nov 23 22:22:51 2011
@@ -50,4 +50,20 @@ public class TikaParseImpl extends Parse
return plainText;
}
+ @Override
+ public boolean isFollowed() {
+ if(metadata.get("robots") != null &&
metadata.get("robots").toLowerCase().contains("nofollow")) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public boolean isIndexed() {
+ if(metadata.get("robots") != null &&
metadata.get("robots").toLowerCase().contains("noindex")) {
+ return false;
+ }
+ return true;
+ }
+
}