Author: ivol37 at gmail.com
Date: Wed Jan 26 10:59:13 2011
New Revision: 708

Log:


Added:
   sandbox/ivol/amdatu-searchandindex/nutch/
   sandbox/ivol/amdatu-searchandindex/nutch/install_nutch.bat
   sandbox/ivol/amdatu-searchandindex/nutch/nutch-1.2.jar   (contents, props 
changed)
   sandbox/ivol/amdatu-searchandindex/nutch/pom.xml
   sandbox/ivol/amdatu-searchandindex/nutch/src/
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/impl/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/impl/NutchIndex.java
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/osgi/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/osgi/Activator.java
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/
   sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/common-terms.utf8
   
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/nutch-site.xml

Added: sandbox/ivol/amdatu-searchandindex/nutch/install_nutch.bat
==============================================================================
--- (empty file)
+++ sandbox/ivol/amdatu-searchandindex/nutch/install_nutch.bat  Wed Jan 26 
10:59:13 2011
@@ -0,0 +1 @@
+mvn install:install-file  -Dfile=nutch-1.2.jar -DgroupId=org.apache.nutch 
-DartifactId=nutch -Dversion=1.2 -Dpackaging=jar

Added: sandbox/ivol/amdatu-searchandindex/nutch/nutch-1.2.jar
==============================================================================
Binary file. No diff available.

Added: sandbox/ivol/amdatu-searchandindex/nutch/pom.xml
==============================================================================
--- (empty file)
+++ sandbox/ivol/amdatu-searchandindex/nutch/pom.xml    Wed Jan 26 10:59:13 2011
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.amdatu</groupId>
+    <artifactId>org.amdatu.searchandindex</artifactId>
+    <version>0.1.0-SNAPSHOT</version>
+  </parent>
+  <groupId>org.amdatu.searchandindex</groupId>
+  <artifactId>nutch</artifactId>
+  <packaging>bundle</packaging>
+  <name>Amdatu Search and Index - Nutch Service</name>
+  <version>${platform.version}</version>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.amdatu.searchandindex</groupId>
+      <artifactId>solr</artifactId>
+      <version>${platform.version}</version>
+      <scope>provided</scope>
+      <type>bundle</type>
+    </dependency>
+    <dependency>
+      <groupId>org.amdatu.core.config</groupId>
+      <artifactId>templates</artifactId>
+      <version>${platform.version}</version>
+      <scope>provided</scope>
+      <type>bundle</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.nutch</groupId>
+      <artifactId>nutch</artifactId>
+      <version>1.2</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-core</artifactId>
+      <version>0.20.2</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-core</artifactId>
+      <version>3.0.1</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-misc</artifactId>
+      <version>3.0.1</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <version>2.1</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>1.0.4</version>
+      <scope>compile</scope>
+      <type>jar</type>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            
<Bundle-Activator>org.amdatu.searchandindex.nutch.osgi.Activator</Bundle-Activator>
+            
<Bundle-SymbolicName>org.amdatu.searchandindex.nutch</Bundle-SymbolicName>
+            <Embed-Dependency>*;scope=compile</Embed-Dependency>
+            <Import-Package>
+              *;resolution:=optional
+            </Import-Package>
+            <Bundle-ClassPath>
+              conf,
+              .
+            </Bundle-ClassPath>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <tasks>
+                <copy
+                  
file="${project.build.directory}/${project.build.finalName}.jar"
+                  
todir="D:\Amdatu-svn\trunk\amdatu-release\target\org.amdatu.amdatu-release-0.1.0-SNAPSHOT-dev\deploy"
 overwrite="true" />
+              </tasks>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>

Added: 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/impl/NutchIndex.java
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/impl/NutchIndex.java
 Wed Jan 26 10:59:13 2011
@@ -0,0 +1,115 @@
+package org.amdatu.searchandindex.nutch.impl;
+
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URL;
+import java.util.Date;
+import java.util.Enumeration;
+
+import org.amdatu.core.config.templates.ConfigTemplateManager;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.searcher.Hit;
+import org.apache.nutch.searcher.HitDetails;
+import org.apache.nutch.searcher.Hits;
+import org.apache.nutch.searcher.NutchBean;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.util.NutchConfiguration;
+import org.osgi.framework.BundleContext;
+import org.osgi.service.log.LogService;
+
+public class NutchIndex {
+    // Services injected by the Felix dependency manager
+    private volatile LogService m_logService;
+    private volatile BundleContext m_bundleContext;
+    private volatile ConfigTemplateManager m_configTemplateManager;
+
+    private File m_workDir;
+
+    public void init() {
+        try {
+            m_logService.log(LogService.LOG_INFO, "Initializing Solr 
configuration");
+
+            File workBaseDir = new File(System.getProperty("user.dir"), 
"work");
+            m_workDir = new File(workBaseDir, "nutch");
+            m_workDir.mkdirs();
+
+            // Find all entries in our 'conf' directory.
+            final Enumeration<URL> resources = 
m_bundleContext.getBundle().findEntries("conf", "*.*", true);
+            if (resources != null) {
+                while (resources.hasMoreElements()) {
+                    final URL resource = resources.nextElement();
+                    File coreConfFile = new File(m_workDir, 
resource.getFile().replace("conf/", ""));
+                    if (!coreConfFile.exists()) {
+                        // Only write this file if it does not yet exist
+                        m_configTemplateManager.writeConfiguration(resource, 
coreConfFile);
+                    }
+                }
+            }
+
+            System.setProperty("nutch.site.plugin.folders", new 
File(m_workDir, "plugins").getAbsolutePath());
+            System.setProperty("nutch.site.searcher.dir", new File(m_workDir, 
"crawl").getAbsolutePath());
+
+        } catch (IOException e) {
+            m_logService.log(LogService.LOG_ERROR, "Could not replace 
configuration entries in storage-conf.xml", e);
+        }
+    }
+
+    public void start() {
+        // Save the current context classloader first
+        final ClassLoader contextClassLoader = 
Thread.currentThread().getContextClassLoader();
+        try {
+
+            // overwrite the context classloader, restore it later
+            final ClassLoader classLoader = this.getClass().getClassLoader();
+            Thread.currentThread().setContextClassLoader(classLoader);
+
+            try {
+                System.setProperty("hadoop.log.dir", 
m_workDir.getAbsolutePath());
+
+                // define a keyword for the search
+                String nutchSearchString = "smart";
+
+                // configure nutch
+                Configuration nutchConf = NutchConfiguration.create();
+                NutchBean nutchBean = new NutchBean(nutchConf);
+
+                // build the query
+                Query nutchQuery = Query.parse(nutchSearchString, nutchConf);
+                // optionally specify the maximum number of hits (default is 
10)
+                // nutchQuery.getParams().setNumHits(100);
+                // nutchQuery.getParams().setMaxHitsPerDup(100);
+                Hits nutchHits = nutchBean.search(nutchQuery);
+
+                // display the number of hits
+                System.out.println("Found " + nutchHits.getLength() + " 
hits.\n");
+
+                // get the details about each hit (includes title, URL, a 
summary
+                // and the date when this was fetched)
+                for (int i = 0; i < nutchHits.getLength(); i++) {
+                    Hit hit = nutchHits.getHit(i);
+                    HitDetails details = nutchBean.getDetails(hit);
+                    String title = details.getValue("title");
+                    String url = details.getValue("url");
+                    String summary = nutchBean.getSummary(details, nutchQuery)
+                    .toString();
+                    System.out.println("Title is: " + title);
+                    System.out.println("(" + url + ")");
+                    Date date = new Date(nutchBean.getFetchDate(details));
+                    System.out.println("Date Fetched: " + date);
+                    System.out.println(summary + "\n");
+                    
System.out.println("----------------------------------------");
+                }
+
+                // as usually, don't forget to close the resources
+                nutchBean.close();
+            } catch (Throwable e) {
+                e.printStackTrace();
+            }
+        } finally {
+            // Restore classloader
+            Thread.currentThread().setContextClassLoader(contextClassLoader);
+        }
+    }
+
+}

Added: 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/osgi/Activator.java
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/java/org/amdatu/searchandindex/nutch/osgi/Activator.java
  Wed Jan 26 10:59:13 2011
@@ -0,0 +1,50 @@
+/*
+    Copyright (C) 2010 Amdatu.org
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.amdatu.searchandindex.nutch.osgi;
+
+import org.amdatu.core.config.templates.ConfigTemplateManager;
+import org.amdatu.searchandindex.nutch.impl.NutchIndex;
+import org.amdatu.searchandindex.solr.SolrRestService;
+import org.apache.felix.dm.DependencyActivatorBase;
+import org.apache.felix.dm.DependencyManager;
+import org.osgi.framework.BundleContext;
+import org.osgi.service.log.LogService;
+
+/**
+ * This is the OSGi activator for this Solr service bundle.
+ *
+ * @author ivol
+ */
+public class Activator extends DependencyActivatorBase {
+    /**
+     * The resource identifier for this bundle. Resources are only considered 
to be 'ours' when it is prefixed with this
+     * id.
+     */
+    public static final String RESOURCE_ID = "index";
+
+    public void init(BundleContext context, DependencyManager manager) throws 
Exception {
+        // Create and register the Solr service.
+        manager.add(createComponent()
+            .setImplementation(NutchIndex.class)
+            
.add(createServiceDependency().setService(LogService.class).setRequired(true))
+            
.add(createServiceDependency().setService(ConfigTemplateManager.class).setRequired(true))
+            
.add(createServiceDependency().setService(SolrRestService.class).setRequired(true)));
+    }
+
+    public void destroy(BundleContext context, DependencyManager manager) 
throws Exception {
+    }
+}

Added: 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/common-terms.utf8
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/common-terms.utf8
  Wed Jan 26 10:59:13 2011
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Common terms and phrases which will be indexed in n-grams
+# in order to optimize search.
+content:a
+content:and
+content:for
+content:in
+content:of
+content:the
+content:to
+url:com
+url:http
+url:http-www
+url:www

Added: 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/nutch-site.xml
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/nutch/src/main/resources/conf/nutch-site.xml 
    Wed Jan 26 10:59:13 2011
@@ -0,0 +1,16 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+        <property>
+                <name>plugin.folders</name>
+                <value>${nutch.site.plugin.folders}</value>
+                <description />
+        </property>
+
+        <property>
+                <name>searcher.dir</name>
+                <value>${nutch.site.searcher.dir}</value>
+                <description />
+        </property>
+</configuration>
\ No newline at end of file

Reply via email to