http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/plugin.dtd ---------------------------------------------------------------------- diff --git a/src/plugin/plugin.dtd b/src/plugin/plugin.dtd deleted file mode 100644 index 9b67da7..0000000 --- a/src/plugin/plugin.dtd +++ /dev/null @@ -1,206 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - ! Licensed to the Apache Software Foundation (ASF) under one or more - ! contributor license agreements. See the NOTICE file distributed with - ! this work for additional information regarding copyright ownership. - ! The ASF licenses this file to You under the Apache License, Version 2.0 - ! (the "License"); you may not use this file except in compliance with - ! the License. You may obtain a copy of the License at - ! - ! http://www.apache.org/licenses/LICENSE-2.0 - ! - ! Unless required by applicable law or agreed to in writing, software - ! distributed under the License is distributed on an "AS IS" BASIS, - ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ! See the License for the specific language governing permissions and - ! limitations under the License. - ! - ! - ! Document : plugin.dtd - ! Created on : 14 avril 2006, 22:14 - ! Author : Chris Mattmann, Jerome Charron - ! Description: Nutch plug-in manifest DTD - ! - ! PUBLIC ID : -//Apache Software Fundation//DTD Nutch Plugin Manifest 1.0//EN - ! SYSTEM ID : http://lucene.apache.org/nutch/plugin.dtd ---> - - - -<!-- - ! The <plugin> element defines the body of the manifest. - ! It optionally contains definitions for the plug-in runtime, - ! definitions of other plug-ins required by this one, - ! declarations of any new extension points being introduced by the plug-in, - ! as well as configuration of functional extensions - ! (configured into extension points defined by other plug-ins, - ! or introduced by this plug-in). - !--> -<!ELEMENT plugin (runtime?, requires?, extension-point*, extension*)> - -<!-- A user displayable name for the plug-in --> -<!ATTLIST plugin name CDATA #REQUIRED> - -<!-- - ! A unique identifier for the plug-in. - ! To minimize potential for naming collisions, - ! the identifier should be derived from the internet domain id - ! of the supplying provider (reversing the domain name tokens and - ! appending additional name tokens separated by dot [.]). - ! For example, provider nutch.org could define plug-in identifier - ! org.nutch.myplugin - !--> -<!ATTLIST plugin id CDATA #REQUIRED> - -<!-- - ! The plug-in version number. - ! NOTE : Version numbers compatibility are not yet implemented. - !--> -<!ATTLIST plugin version CDATA #REQUIRED> - -<!-- The user-displayable name of the provider supplying the plug-in. --> -<!ATTLIST plugin provider-name CDATA #IMPLIED> - -<!-- - ! The name of the plug-in class for this plug-in. - ! The class must be a subclass of org.apache.nutch.plugin.Plugin - !--> -<!ATTLIST plugin class CDATA #IMPLIED> - - -<!-- - ! The <requires> section of the manifest declares - ! any dependencies on other plug-ins. - !--> -<!ELEMENT requires (import+)> - - -<!-- Each dependency is specified using an <import> element. --> -<!ELEMENT import EMPTY> - -<!-- The identifier of the required plug-in. --> -<!ATTLIST import plugin CDATA #REQUIRED> - - -<!-- - ! The <runtime> section of the manifest contains a definition of one or more - ! libraries that make up the plug-in runtime. - ! The referenced libraries are used by the plugin execution mechanisms - ! (the plug-in class loader) to load and execute the correct code required by - ! the plug-in. - !--> -<!ELEMENT runtime (library+)> - - -<!-- - !The <library> elements collectively define the plug-in runtime. - ! At least one <library> must be specified. - !--> -<!ELEMENT library (export*)> - -<!-- - ! A string reference to a library file or directory containing classes - ! (relative to the plug-in install directory). - ! Directory references must contain trailing file separator. - !--> -<!ATTLIST library name CDATA #REQUIRED> - - -<!-- - ! Each <library> element can specify which portion - ! of the library should be exported. - ! The export rules are specified as a set of export masks. - ! By default (no export rules specified), - ! the library is considered to be private. - ! Each export mask is specified using the name attribute. - !--> -<!ELEMENT export EMPTY> - -<!-- - ! The export mask can have the following values: - ! * - indicates all contents of library are exported (public) - ! package.name.* - indicates all classes in the specified package - ! are exported. The matching rules are the same as in the - ! Java import statement. - ! package.name.ClassName - fully qualified java class name - ! - ! NOTE : export mask is not yet implemented in Nutch. - !--> -<!ATTLIST export name CDATA #REQUIRED> - - -<!-- - ! Nutch's architecture is based on the notion of configurable extension points. - ! Nutch itself predefines a set of extension points that cover the task of - ! extending it (for example, adding parser, indexing filter, ...). - ! In addition to the predefined extension points, each supplied plug-in can - ! declare additional extension points. By declaring an extension point the - ! plug-in is essentially advertising the ability to configure the plug-in - ! function with externally supplied extensions. - !--> -<!ELEMENT extension-point EMPTY> - -<!-- A user-displayable name for the extension point. --> -<!ATTLIST extension-point name CDATA #REQUIRED> - -<!-- A simple id, unique within this plug-in --> -<!ATTLIST extension-point id CDATA #REQUIRED> - - -<!-- - ! Actual extensions are configured into extension points - ! (predefined, or newly declared in this plug-in) in the <extension> section. - ! - ! The configuration information is specified by at least one implementation - ! with some parameters. - !--> -<!ELEMENT extension (implementation+)> - -<!-- - ! A reference to an extension point being configured. - ! The extension point can be one defined in this plug-in or another plug-in. - !--> -<!ATTLIST extension point CDATA #REQUIRED> - -<!-- - ! Optional identifier for this extension point configuration instance. - ! This is used by extension points that need to uniquely identify - ! (rather than just enumerate) the specific configured extensions. - ! The identifier is specified as a simple token unique within the definition - ! of the declaring plug-in. When used globally, the extension identifier - ! is qualified by the plug-in identifier. - ! FIXME : Seems it is never read in the code. - !--> -<!ATTLIST extension id CDATA #IMPLIED> - -<!-- - ! A user-displayable name for the extension. - ! FIXME : Seems it is never read in the code. - !--> -<!ATTLIST extension name CDATA #IMPLIED> - - -<!-- - ! Defines a specific implementation for the extension. - ! This implementation can define some special name/value parameters - ! used at runtime. - !--> -<!ELEMENT implementation (parameter*)> - -<!-- A unique identifier for this implementation --> -<!ATTLIST implementation id CDATA #REQUIRED> - -<!-- The fully-qualified Java Class that implements this extension-point --> -<!ATTLIST implementation class CDATA #REQUIRED> - - -<!-- Defines a name/value parameter --> -<!ELEMENT parameter EMPTY> - -<!-- The parameter's name (should be unique for an extension) --> -<!ATTLIST parameter name CDATA #REQUIRED> - -<!-- The parameter's value --> -<!ATTLIST parameter value CDATA #REQUIRED> -
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/build.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/build.xml b/src/plugin/protocol-file/build.xml deleted file mode 100644 index 121b1fe..0000000 --- a/src/plugin/protocol-file/build.xml +++ /dev/null @@ -1,29 +0,0 @@ -<?xml version="1.0"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<project name="protocol-file" default="jar-core"> - - <import file="../build-plugin.xml"/> - - <!-- for junit test --> - <mkdir dir="${build.test}/data"/> - <copy todir="${build.test}/data"> - <fileset dir="sample"> - <include name="*.txt"/> - </fileset> - </copy> -</project> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/ivy.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/ivy.xml b/src/plugin/protocol-file/ivy.xml deleted file mode 100644 index 1a86d68..0000000 --- a/src/plugin/protocol-file/ivy.xml +++ /dev/null @@ -1,41 +0,0 @@ -<?xml version="1.0" ?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<ivy-module version="1.0"> - <info organisation="org.apache.nutch" module="${ant.project.name}"> - <license name="Apache 2.0"/> - <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/> - <description> - Apache Nutch - </description> - </info> - - <configurations> - <include file="../../..//ivy/ivy-configurations.xml"/> - </configurations> - - <publications> - <!--get the artifact from our module name--> - <artifact conf="master"/> - </publications> - - <dependencies> - </dependencies> - -</ivy-module> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/plugin.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/plugin.xml b/src/plugin/protocol-file/plugin.xml deleted file mode 100644 index 1647ce4..0000000 --- a/src/plugin/protocol-file/plugin.xml +++ /dev/null @@ -1,46 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<plugin - id="protocol-file" - name="File Protocol Plug-in" - version="1.0.0" - provider-name="nutch.org"> - - - <runtime> - <library name="protocol-file.jar"> - <export name="*"/> - </library> - </runtime> - - <requires> - <import plugin="nutch-extensionpoints"/> - </requires> - - <extension id="org.apache.nutch.protocol.file" - name="FileProtocol" - point="org.apache.nutch.protocol.Protocol"> - - <implementation id="org.apache.nutch.protocol.file.File" - class="org.apache.nutch.protocol.file.File"> - <parameter name="protocolName" value="file"/> - </implementation> - - </extension> - -</plugin> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/sample/testprotocolfile.txt ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/sample/testprotocolfile.txt b/src/plugin/protocol-file/sample/testprotocolfile.txt deleted file mode 100644 index fbe8a8a..0000000 --- a/src/plugin/protocol-file/sample/testprotocolfile.txt +++ /dev/null @@ -1 +0,0 @@ -Protocol File Test http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt deleted file mode 100644 index fbe8a8a..0000000 --- a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt +++ /dev/null @@ -1 +0,0 @@ -Protocol File Test http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java deleted file mode 100644 index 2712218..0000000 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java +++ /dev/null @@ -1,228 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.file; - -import java.net.URL; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; - -import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.net.protocols.Response; -import org.apache.nutch.protocol.Content; -import org.apache.nutch.protocol.Protocol; -import org.apache.nutch.protocol.ProtocolOutput; -import org.apache.nutch.protocol.ProtocolStatus; -import org.apache.nutch.protocol.RobotRulesParser; -import org.apache.nutch.util.NutchConfiguration; - -import crawlercommons.robots.BaseRobotRules; - -/** - * This class is a protocol plugin used for file: scheme. It creates - * {@link FileResponse} object and gets the content of the url from it. - * Configurable parameters are {@code file.content.limit} and - * {@code file.crawl.parent} in nutch-default.xml defined under - * "file properties" section. - * - * @author John Xing - */ -public class File implements Protocol { - - public static final Logger LOG = LoggerFactory.getLogger(File.class); - - static final int MAX_REDIRECTS = 5; - - int maxContentLength; - boolean crawlParents; - - /** - * if true return a redirect for symbolic links and do not resolve the links - * internally - */ - boolean symlinksAsRedirects = true; - - private Configuration conf; - - public File() { - } - - /** - * Set the {@link Configuration} object - */ - public void setConf(Configuration conf) { - this.conf = conf; - this.maxContentLength = conf.getInt("file.content.limit", 64 * 1024); - this.crawlParents = conf.getBoolean("file.crawl.parent", true); - this.symlinksAsRedirects = conf.getBoolean( - "file.crawl.redirect_noncanonical", true); - } - - /** - * Get the {@link Configuration} object - */ - public Configuration getConf() { - return this.conf; - } - - /** - * Set the length after at which content is truncated. - */ - public void setMaxContentLength(int maxContentLength) { - this.maxContentLength = maxContentLength; - } - - /** - * Creates a {@link FileResponse} object corresponding to the url and return a - * {@link ProtocolOutput} object as per the content received - * - * @param url - * Text containing the url - * @param datum - * The CrawlDatum object corresponding to the url - * - * @return {@link ProtocolOutput} object for the content of the file indicated - * by url - */ - public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) { - String urlString = url.toString(); - try { - URL u = new URL(urlString); - - int redirects = 0; - - while (true) { - FileResponse response; - response = new FileResponse(u, datum, this, getConf()); // make a - // request - - int code = response.getCode(); - - if (code == 200) { // got a good response - return new ProtocolOutput(response.toContent()); // return it - - } else if (code == 304) { // got not modified - return new ProtocolOutput(response.toContent(), - ProtocolStatus.STATUS_NOTMODIFIED); - - } else if (code == 401) { // access denied / no read permissions - return new ProtocolOutput(response.toContent(), new ProtocolStatus( - ProtocolStatus.ACCESS_DENIED)); - - } else if (code == 404) { // no such file - return new ProtocolOutput(response.toContent(), - ProtocolStatus.STATUS_NOTFOUND); - - } else if (code >= 300 && code < 400) { // handle redirect - u = new URL(response.getHeader("Location")); - if (LOG.isTraceEnabled()) { - LOG.trace("redirect to " + u); - } - if (symlinksAsRedirects) { - return new ProtocolOutput(response.toContent(), new ProtocolStatus( - ProtocolStatus.MOVED, u)); - } else if (redirects == MAX_REDIRECTS) { - LOG.trace("Too many redirects: {}", url); - return new ProtocolOutput(response.toContent(), new ProtocolStatus( - ProtocolStatus.REDIR_EXCEEDED, u)); - } - redirects++; - - } else { // convert to exception - throw new FileError(code); - } - } - } catch (Exception e) { - e.printStackTrace(); - return new ProtocolOutput(null, new ProtocolStatus(e)); - } - } - - /** - * Quick way for running this class. Useful for debugging. - */ - public static void main(String[] args) throws Exception { - int maxContentLength = Integer.MIN_VALUE; - String logLevel = "info"; - boolean dumpContent = false; - String urlString = null; - - String usage = "Usage: File [-logLevel level] [-maxContentLength L] [-dumpContent] url"; - - if (args.length == 0) { - System.err.println(usage); - System.exit(-1); - } - - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-logLevel")) { - logLevel = args[++i]; - } else if (args[i].equals("-maxContentLength")) { - maxContentLength = Integer.parseInt(args[++i]); - } else if (args[i].equals("-dumpContent")) { - dumpContent = true; - } else if (i != args.length - 1) { - System.err.println(usage); - System.exit(-1); - } else - urlString = args[i]; - } - - File file = new File(); - file.setConf(NutchConfiguration.create()); - - if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength - file.setMaxContentLength(maxContentLength); - - // set log level - // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase())); - - ProtocolOutput output = file.getProtocolOutput(new Text(urlString), - new CrawlDatum()); - Content content = output.getContent(); - - System.err.println("URL: " + content.getUrl()); - System.err.println("Status: " + output.getStatus()); - System.err.println("Content-Type: " + content.getContentType()); - System.err.println("Content-Length: " - + content.getMetadata().get(Response.CONTENT_LENGTH)); - System.err.println("Last-Modified: " - + content.getMetadata().get(Response.LAST_MODIFIED)); - String redirectLocation = content.getMetadata().get("Location"); - if (redirectLocation != null) { - System.err.println("Location: " + redirectLocation); - } - - if (dumpContent) { - System.out.print(new String(content.getContent())); - } - - file = null; - } - - /** - * No robots parsing is done for file protocol. So this returns a set of empty - * rules which will allow every url. - */ - public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) { - return RobotRulesParser.EMPTY_RULES; - } -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java deleted file mode 100644 index 4fef340..0000000 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.file; - -/** - * Thrown for File error codes. - */ -public class FileError extends FileException { - - private int code; - - public int getCode(int code) { - return code; - } - - public FileError(int code) { - super("File Error: " + code); - this.code = code; - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java deleted file mode 100644 index f0467de..0000000 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.file; - -import org.apache.nutch.protocol.ProtocolException; - -public class FileException extends ProtocolException { - - public FileException() { - super(); - } - - public FileException(String message) { - super(message); - } - - public FileException(String message, Throwable cause) { - super(message, cause); - } - - public FileException(Throwable cause) { - super(cause); - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java deleted file mode 100644 index b6e74ff..0000000 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java +++ /dev/null @@ -1,317 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.file; - -// JDK imports -import java.net.URL; -import java.io.IOException; -import java.io.UnsupportedEncodingException; - -// Nutch imports -import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.protocol.Content; -import org.apache.nutch.util.MimeUtil; -import org.apache.nutch.metadata.Metadata; -import org.apache.nutch.net.protocols.HttpDateFormat; -import org.apache.nutch.net.protocols.Response; - -// Tika imports -import org.apache.tika.Tika; - -// Hadoop imports -import org.apache.hadoop.conf.Configuration; - -/************************************ - * FileResponse.java mimics file replies as http response. It tries its best to - * follow http's way for headers, response codes as well as exceptions. - * - * Comments: (1) java.net.URL and java.net.URLConnection can handle file: - * scheme. However they are not flexible enough, so not used in this - * implementation. - * - * (2) java.io.File is used for its abstractness across platforms. Warning: - * java.io.File API (1.4.2) does not elaborate on how special files, such as - * /dev/* in unix and /proc/* on linux, are treated. Tests show (a) - * java.io.File.isFile() return false for /dev/* (b) java.io.File.isFile() - * return true for /proc/* (c) java.io.File.length() return 0 for /proc/* We are - * probably oaky for now. Could be buggy here. How about special files on - * windows? - * - * (3) java.io.File API (1.4.2) does not seem to know unix hard link files. They - * are just treated as individual files. - * - * (4) No funcy POSIX file attributes yet. May never need? - * - * @author John Xing - ***********************************/ -public class FileResponse { - - private String orig; - private String base; - private byte[] content; - private static final byte[] EMPTY_CONTENT = new byte[0]; - private int code; - private Metadata headers = new Metadata(); - - private final File file; - private Configuration conf; - - private MimeUtil MIME; - private Tika tika; - - /** Returns the response code. */ - public int getCode() { - return code; - } - - /** Returns the value of a named header. */ - public String getHeader(String name) { - return headers.get(name); - } - - public byte[] getContent() { - return content; - } - - public Content toContent() { - return new Content(orig, base, (content != null ? content : EMPTY_CONTENT), - getHeader(Response.CONTENT_TYPE), headers, this.conf); - } - - /** - * Default public constructor - * - * @param url - * @param datum - * @param file - * @param conf - * @throws FileException - * @throws IOException - */ - public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf) - throws FileException, IOException { - - this.orig = url.toString(); - this.base = url.toString(); - this.file = file; - this.conf = conf; - - MIME = new MimeUtil(conf); - tika = new Tika(); - - if (!"file".equals(url.getProtocol())) - throw new FileException("Not a file url:" + url); - - if (File.LOG.isTraceEnabled()) { - File.LOG.trace("fetching " + url); - } - - if (url.getPath() != url.getFile()) { - if (File.LOG.isWarnEnabled()) { - File.LOG.warn("url.getPath() != url.getFile(): " + url); - } - } - - String path = "".equals(url.getPath()) ? "/" : url.getPath(); - - try { - // specify the encoding via the config later? - path = java.net.URLDecoder.decode(path, "UTF-8"); - } catch (UnsupportedEncodingException ex) { - } - - try { - - this.content = null; - - // url.toURI() is only in j2se 1.5.0 - // java.io.File f = new java.io.File(url.toURI()); - java.io.File f = new java.io.File(path); - - if (!f.exists()) { - this.code = 404; // http Not Found - return; - } - - if (!f.canRead()) { - this.code = 401; // http Unauthorized - return; - } - - // symbolic link or relative path on unix - // fix me: what's the consequence on windows platform - // where case is insensitive - if (!f.equals(f.getCanonicalFile())) { - // set headers - // hdrs.put("Location", f.getCanonicalFile().toURI()); - // - // we want to automatically escape characters that are illegal in URLs. - // It is recommended that new code convert an abstract pathname into a - // URL - // by first converting it into a URI, via the toURI method, and then - // converting the URI into a URL via the URI.toURL method. - headers.set(Response.LOCATION, f.getCanonicalFile().toURI().toURL() - .toString()); - - this.code = 300; // http redirect - return; - } - if (f.lastModified() <= datum.getModifiedTime()) { - this.code = 304; - this.headers.set("Last-Modified", - HttpDateFormat.toString(f.lastModified())); - return; - } - - if (f.isDirectory()) { - getDirAsHttpResponse(f); - } else if (f.isFile()) { - getFileAsHttpResponse(f); - } else { - this.code = 500; // http Internal Server Error - return; - } - - } catch (IOException e) { - throw e; - } - - } - - // get file as http response - private void getFileAsHttpResponse(java.io.File f) throws FileException, - IOException { - - // ignore file of size larger than - // Integer.MAX_VALUE = 2^31-1 = 2147483647 - long size = f.length(); - if (size > Integer.MAX_VALUE) { - throw new FileException("file is too large, size: " + size); - // or we can do this? - // this.code = 400; // http Bad request - // return; - } - - // capture content - int len = (int) size; - - if (this.file.maxContentLength >= 0 && len > this.file.maxContentLength) - len = this.file.maxContentLength; - - this.content = new byte[len]; - - java.io.InputStream is = new java.io.FileInputStream(f); - int offset = 0; - int n = 0; - while (offset < len - && (n = is.read(this.content, offset, len - offset)) >= 0) { - offset += n; - } - if (offset < len) { // keep whatever already have, but issue a warning - if (File.LOG.isWarnEnabled()) { - File.LOG.warn("not enough bytes read from file: " + f.getPath()); - } - } - is.close(); - - // set headers - headers.set(Response.CONTENT_LENGTH, new Long(size).toString()); - headers.set(Response.LAST_MODIFIED, - HttpDateFormat.toString(f.lastModified())); - - String mimeType = tika.detect(f); - - headers.set(Response.CONTENT_TYPE, mimeType != null ? mimeType : ""); - - // response code - this.code = 200; // http OK - } - - /** - * get dir list as http response - * - * @param f - * @throws IOException - */ - private void getDirAsHttpResponse(java.io.File f) throws IOException { - - String path = f.toString(); - if (this.file.crawlParents) - this.content = list2html(f.listFiles(), path, "/".equals(path) ? false - : true); - else - this.content = list2html(f.listFiles(), path, false); - - // set headers - headers.set(Response.CONTENT_LENGTH, - new Integer(this.content.length).toString()); - headers.set(Response.CONTENT_TYPE, "text/html"); - headers.set(Response.LAST_MODIFIED, - HttpDateFormat.toString(f.lastModified())); - - // response code - this.code = 200; // http OK - } - - /** - * generate html page from dir list - * - * @param list - * @param path - * @param includeDotDot - * @return - */ - private byte[] list2html(java.io.File[] list, String path, - boolean includeDotDot) { - - StringBuffer x = new StringBuffer("<html><head>"); - x.append("<title>Index of " + path + "</title></head>\n"); - x.append("<body><h1>Index of " + path + "</h1><pre>\n"); - - if (includeDotDot) { - x.append("<a href='../'>../</a>\t-\t-\t-\n"); - } - - // fix me: we might want to sort list here! but not now. - - java.io.File f; - for (int i = 0; i < list.length; i++) { - f = list[i]; - String name = f.getName(); - String time = HttpDateFormat.toString(f.lastModified()); - if (f.isDirectory()) { - // java 1.4.2 api says dir itself and parent dir are not listed - // so the following is not needed. - // if (name.equals(".") || name.equals("..")) - // continue; - x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t"); - x.append(time + "\t-\n"); - } else if (f.isFile()) { - x.append("<a href='" + name + "'>" + name + "</a>\t"); - x.append(time + "\t" + f.length() + "\n"); - } else { - // ignore any other - } - } - - x.append("</pre></body></html>\n"); - - return new String(x).getBytes(); - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html deleted file mode 100644 index 221c79c..0000000 --- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html +++ /dev/null @@ -1,5 +0,0 @@ -<html> -<body> -<p>Protocol plugin which supports retrieving local file resources.</p><p></p> -</body> -</html> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java b/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java deleted file mode 100644 index 5f95377..0000000 --- a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.file; - -// Hadoop imports -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; - -// Nutch imports -import org.apache.nutch.crawl.CrawlDatum; -import org.apache.nutch.net.protocols.Response; -import org.apache.nutch.protocol.Protocol; -import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.protocol.ProtocolFactory; -import org.apache.nutch.protocol.ProtocolOutput; -import org.apache.nutch.protocol.ProtocolStatus; -import org.apache.nutch.util.NutchConfiguration; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** - * @author mattmann - * @version $Revision$ - * - * <p> - * Unit tests for the {@link File}Protocol. - * </p> - * . - */ -public class TestProtocolFile { - - private String fileSeparator = System.getProperty("file.separator"); - private String sampleDir = System.getProperty("test.data", "."); - - private static final String[] testTextFiles = new String[] { - "testprotocolfile.txt", "testprotocolfile_(encoded).txt", - "testprotocolfile_%28encoded%29.txt" }; - - private static final CrawlDatum datum = new CrawlDatum(); - - private static final String expectedMimeType = "text/plain"; - - private Configuration conf; - - @Before - public void setUp() { - conf = NutchConfiguration.create(); - } - - @Test - public void testSetContentType() throws ProtocolException { - for (String testTextFile : testTextFiles) { - setContentType(testTextFile); - } - } - - /** - * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata field. - * - * @since NUTCH-384 - * - */ - public void setContentType(String testTextFile) throws ProtocolException { - String urlString = "file:" + sampleDir + fileSeparator + testTextFile; - Assert.assertNotNull(urlString); - Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString); - ProtocolOutput output = protocol.getProtocolOutput(new Text(urlString), - datum); - Assert.assertNotNull(output); - Assert.assertEquals("Status code: [" + output.getStatus().getCode() - + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: [" - + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output - .getStatus().getCode()); - Assert.assertNotNull(output.getContent()); - Assert.assertNotNull(output.getContent().getContentType()); - Assert.assertEquals(expectedMimeType, output.getContent().getContentType()); - Assert.assertNotNull(output.getContent().getMetadata()); - Assert.assertEquals(expectedMimeType, output.getContent().getMetadata() - .get(Response.CONTENT_TYPE)); - - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/build.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/build.xml b/src/plugin/protocol-ftp/build.xml deleted file mode 100644 index 79314d4..0000000 --- a/src/plugin/protocol-ftp/build.xml +++ /dev/null @@ -1,22 +0,0 @@ -<?xml version="1.0"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<project name="protocol-ftp" default="jar-core"> - - <import file="../build-plugin.xml"/> - -</project> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/ivy.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/ivy.xml b/src/plugin/protocol-ftp/ivy.xml deleted file mode 100644 index 214c445..0000000 --- a/src/plugin/protocol-ftp/ivy.xml +++ /dev/null @@ -1,42 +0,0 @@ -<?xml version="1.0" ?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<ivy-module version="1.0"> - <info organisation="org.apache.nutch" module="${ant.project.name}"> - <license name="Apache 2.0"/> - <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/> - <description> - Apache Nutch - </description> - </info> - - <configurations> - <include file="../../..//ivy/ivy-configurations.xml"/> - </configurations> - - <publications> - <!--get the artifact from our module name--> - <artifact conf="master"/> - </publications> - - <dependencies> - <dependency org="commons-net" name="commons-net" rev="1.2.2" conf="*->master"/> - </dependencies> - -</ivy-module> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/plugin.xml ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/plugin.xml b/src/plugin/protocol-ftp/plugin.xml deleted file mode 100644 index 1421e37..0000000 --- a/src/plugin/protocol-ftp/plugin.xml +++ /dev/null @@ -1,46 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<plugin - id="protocol-ftp" - name="Ftp Protocol Plug-in" - version="1.0.0" - provider-name="nutch.org"> - - <runtime> - <library name="protocol-ftp.jar"> - <export name="*"/> - </library> - <library name="commons-net-1.2.0-dev.jar"/> - </runtime> - - <requires> - <import plugin="nutch-extensionpoints"/> - </requires> - - <extension id="org.apache.nutch.protocol.ftp" - name="FtpProtocol" - point="org.apache.nutch.protocol.Protocol"> - - <implementation id="org.apache.nutch.protocol.ftp.Ftp" - class="org.apache.nutch.protocol.ftp.Ftp"> - <parameter name="protocolName" value="ftp"/> - </implementation> - - </extension> - -</plugin> http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java deleted file mode 100644 index da25d87..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java +++ /dev/null @@ -1,595 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; - -import java.net.InetAddress; -import java.net.Socket; - -import java.util.List; -//import java.util.LinkedList; - -import org.apache.commons.net.MalformedServerReplyException; - -import org.apache.commons.net.ftp.FTP; -import org.apache.commons.net.ftp.FTPCommand; -import org.apache.commons.net.ftp.FTPFile; -import org.apache.commons.net.ftp.FTPFileEntryParser; -import org.apache.commons.net.ftp.FTPReply; - -import org.apache.commons.net.ftp.FTPConnectionClosedException; - -/*********************************************** - * Client.java encapsulates functionalities necessary for nutch to get dir list - * and retrieve file from an FTP server. This class takes care of all low level - * details of interacting with an FTP server and provides a convenient higher - * level interface. - * - * Modified from FtpClient.java in apache commons-net. - * - * Notes by John Xing: ftp server implementations are hardly uniform and none - * seems to follow RFCs whole-heartedly. We have no choice, but assume common - * denominator as following: (1) Use stream mode for data transfer. Block mode - * will be better for multiple file downloading and partial file downloading. - * However not every ftpd has block mode support. (2) Use passive mode for data - * connection. So Nutch will work if we run behind firewall. (3) Data connection - * is opened/closed per ftp command for the reasons listed in (1). There are ftp - * servers out there, when partial downloading is enforced by closing data - * channel socket on our client side, the server side immediately closes control - * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used - * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but - * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single - * thread? Do not use it at all. - * - * About exceptions: Some specific exceptions are re-thrown as one of - * FtpException*.java In fact, each function throws FtpException*.java or pass - * IOException. - * - * @author John Xing - ***********************************************/ - -public class Client extends FTP { - private int __dataTimeout; - private int __passivePort; - private String __passiveHost; - // private int __fileType, __fileFormat; - private boolean __remoteVerificationEnabled; - // private FTPFileEntryParser __entryParser; - private String __systemName; - - /** Public default constructor */ - public Client() { - __initDefaults(); - __dataTimeout = -1; - __remoteVerificationEnabled = true; - } - - // defaults when initialize - private void __initDefaults() { - __passiveHost = null; - __passivePort = -1; - __systemName = null; - // __fileType = FTP.ASCII_FILE_TYPE; - // __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; - // __entryParser = null; - } - - // parse reply for pass() - private void __parsePassiveModeReply(String reply) - throws MalformedServerReplyException { - int i, index, lastIndex; - String octet1, octet2; - StringBuffer host; - - reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim(); - - host = new StringBuffer(24); - lastIndex = 0; - index = reply.indexOf(','); - host.append(reply.substring(lastIndex, index)); - - for (i = 0; i < 3; i++) { - host.append('.'); - lastIndex = index + 1; - index = reply.indexOf(',', lastIndex); - host.append(reply.substring(lastIndex, index)); - } - - lastIndex = index + 1; - index = reply.indexOf(',', lastIndex); - - octet1 = reply.substring(lastIndex, index); - octet2 = reply.substring(index + 1); - - // index and lastIndex now used as temporaries - try { - index = Integer.parseInt(octet1); - lastIndex = Integer.parseInt(octet2); - } catch (NumberFormatException e) { - throw new MalformedServerReplyException( - "Could not parse passive host information.\nServer Reply: " + reply); - } - - index <<= 8; - index |= lastIndex; - - __passiveHost = host.toString(); - __passivePort = index; - } - - /** - * open a passive data connection socket - * - * @param command - * @param arg - * @return - * @throws IOException - * @throws FtpExceptionCanNotHaveDataConnection - */ - protected Socket __openPassiveDataConnection(int command, String arg) - throws IOException, FtpExceptionCanNotHaveDataConnection { - Socket socket; - - // // 20040317, xing, accommodate ill-behaved servers, see below - // int port_previous = __passivePort; - - if (pasv() != FTPReply.ENTERING_PASSIVE_MODE) - throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. " - + getReplyString()); - - try { - __parsePassiveModeReply(getReplyStrings()[0]); - } catch (MalformedServerReplyException e) { - throw new FtpExceptionCanNotHaveDataConnection(e.getMessage()); - } - - // // 20040317, xing, accommodate ill-behaved servers, see above - // int count = 0; - // System.err.println("__passivePort "+__passivePort); - // System.err.println("port_previous "+port_previous); - // while (__passivePort == port_previous) { - // // just quit if too many tries. make it an exception here? - // if (count++ > 10) - // return null; - // // slow down further for each new try - // Thread.sleep(500*count); - // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE) - // throw new FtpExceptionCanNotHaveDataConnection( - // "pasv() failed. " + getReplyString()); - // //return null; - // try { - // __parsePassiveModeReply(getReplyStrings()[0]); - // } catch (MalformedServerReplyException e) { - // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage()); - // } - // } - - socket = _socketFactory_.createSocket(__passiveHost, __passivePort); - - if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) { - socket.close(); - return null; - } - - if (__remoteVerificationEnabled && !verifyRemote(socket)) { - InetAddress host1, host2; - - host1 = socket.getInetAddress(); - host2 = getRemoteAddress(); - - socket.close(); - - // our precaution - throw new FtpExceptionCanNotHaveDataConnection( - "Host attempting data connection " + host1.getHostAddress() - + " is not same as server " + host2.getHostAddress() - + " So we intentionally close it for security precaution."); - } - - if (__dataTimeout >= 0) - socket.setSoTimeout(__dataTimeout); - - return socket; - } - - /*** - * Sets the timeout in milliseconds to use for data connection. set - * immediately after opening the data connection. - ***/ - public void setDataTimeout(int timeout) { - __dataTimeout = timeout; - } - - /*** - * Closes the connection to the FTP server and restores connection parameters - * to the default values. - * <p> - * - * @exception IOException - * If an error occurs while disconnecting. - ***/ - public void disconnect() throws IOException { - __initDefaults(); - super.disconnect(); - // no worry for data connection, since we always close it - // in every ftp command that invloves data connection - } - - /*** - * Enable or disable verification that the remote host taking part of a data - * connection is the same as the host to which the control connection is - * attached. The default is for verification to be enabled. You may set this - * value at any time, whether the FTPClient is currently connected or not. - * <p> - * - * @param enable - * True to enable verification, false to disable verification. - ***/ - public void setRemoteVerificationEnabled(boolean enable) { - __remoteVerificationEnabled = enable; - } - - /*** - * Return whether or not verification of the remote host participating in data - * connections is enabled. The default behavior is for verification to be - * enabled. - * <p> - * - * @return True if verification is enabled, false if not. - ***/ - public boolean isRemoteVerificationEnabled() { - return __remoteVerificationEnabled; - } - - /*** - * Login to the FTP server using the provided username and password. - * <p> - * - * @param username - * The username to login under. - * @param password - * The password to use. - * @return True if successfully completed, false if not. - * @exception FTPConnectionClosedException - * If the FTP server prematurely closes the connection as a - * result of the client being idle or some other reason causing - * the server to send FTP reply code 421. This exception may be - * caught either as an IOException or independently as itself. - * @exception IOException - * If an I/O error occurs while either sending a command to the - * server or receiving a reply from the server. - ***/ - public boolean login(String username, String password) throws IOException { - user(username); - - if (FTPReply.isPositiveCompletion(getReplyCode())) - return true; - - // If we get here, we either have an error code, or an intermmediate - // reply requesting password. - if (!FTPReply.isPositiveIntermediate(getReplyCode())) - return false; - - return FTPReply.isPositiveCompletion(pass(password)); - } - - /*** - * Logout of the FTP server by sending the QUIT command. - * <p> - * - * @return True if successfully completed, false if not. - * @exception FTPConnectionClosedException - * If the FTP server prematurely closes the connection as a - * result of the client being idle or some other reason causing - * the server to send FTP reply code 421. This exception may be - * caught either as an IOException or independently as itself. - * @exception IOException - * If an I/O error occurs while either sending a command to the - * server or receiving a reply from the server. - ***/ - public boolean logout() throws IOException { - return FTPReply.isPositiveCompletion(quit()); - } - - /** - * retrieve list reply for path - * - * @param path - * @param entries - * @param limit - * @param parser - * @throws IOException - * @throws FtpExceptionCanNotHaveDataConnection - * @throws FtpExceptionUnknownForcedDataClose - * @throws FtpExceptionControlClosedByForcedDataClose - */ - public void retrieveList(String path, List<FTPFile> entries, int limit, - FTPFileEntryParser parser) throws IOException, - FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose, - FtpExceptionControlClosedByForcedDataClose { - Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path); - - if (socket == null) - throw new FtpExceptionCanNotHaveDataConnection("LIST " - + ((path == null) ? "" : path)); - - BufferedReader reader = new BufferedReader(new InputStreamReader( - socket.getInputStream())); - - // force-close data channel socket, when download limit is reached - // boolean mandatory_close = false; - - // List entries = new LinkedList(); - int count = 0; - String line = parser.readNextEntry(reader); - while (line != null) { - FTPFile ftpFile = parser.parseFTPEntry(line); - // skip non-formatted lines - if (ftpFile == null) { - line = parser.readNextEntry(reader); - continue; - } - entries.add(ftpFile); - count += line.length(); - // impose download limit if limit >= 0, otherwise no limit - // here, cut off is up to the line when total bytes is just over limit - if (limit >= 0 && count > limit) { - // mandatory_close = true; - break; - } - line = parser.readNextEntry(reader); - } - - // if (mandatory_close) - // you always close here, no matter mandatory_close or not. - // however different ftp servers respond differently, see below. - socket.close(); - - // scenarios: - // (1) mandatory_close is false, download limit not reached - // no special care here - // (2) mandatory_close is true, download limit is reached - // different servers have different reply codes: - - try { - int reply = getReply(); - if (!_notBadReply(reply)) - throw new FtpExceptionUnknownForcedDataClose(getReplyString()); - } catch (FTPConnectionClosedException e) { - // some ftp servers will close control channel if data channel socket - // is closed by our end before all data has been read out. Check: - // tux414.q-tam.hp.com FTP server (hp.com version whp02) - // so must catch FTPConnectionClosedException thrown by getReply() above - // disconnect(); - throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage()); - } - - } - - /** - * retrieve file for path - * - * @param path - * @param os - * @param limit - * @throws IOException - * @throws FtpExceptionCanNotHaveDataConnection - * @throws FtpExceptionUnknownForcedDataClose - * @throws FtpExceptionControlClosedByForcedDataClose - */ - public void retrieveFile(String path, OutputStream os, int limit) - throws IOException, FtpExceptionCanNotHaveDataConnection, - FtpExceptionUnknownForcedDataClose, - FtpExceptionControlClosedByForcedDataClose { - - Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path); - - if (socket == null) - throw new FtpExceptionCanNotHaveDataConnection("RETR " - + ((path == null) ? "" : path)); - - InputStream input = socket.getInputStream(); - - // 20040318, xing, treat everything as BINARY_FILE_TYPE for now - // do we ever need ASCII_FILE_TYPE? - // if (__fileType == ASCII_FILE_TYPE) - // input = new FromNetASCIIInputStream(input); - - // fixme, should we instruct server here for binary file type? - - // force-close data channel socket - // boolean mandatory_close = false; - - int len; - int count = 0; - byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE]; - while ((len = input.read(buf, 0, buf.length)) != -1) { - count += len; - // impose download limit if limit >= 0, otherwise no limit - // here, cut off is exactly of limit bytes - if (limit >= 0 && count > limit) { - os.write(buf, 0, len - (count - limit)); - // mandatory_close = true; - break; - } - os.write(buf, 0, len); - os.flush(); - } - - // if (mandatory_close) - // you always close here, no matter mandatory_close or not. - // however different ftp servers respond differently, see below. - socket.close(); - - // scenarios: - // (1) mandatory_close is false, download limit not reached - // no special care here - // (2) mandatory_close is true, download limit is reached - // different servers have different reply codes: - - // do not need this - // sendCommand("ABOR"); - - try { - int reply = getReply(); - if (!_notBadReply(reply)) - throw new FtpExceptionUnknownForcedDataClose(getReplyString()); - } catch (FTPConnectionClosedException e) { - // some ftp servers will close control channel if data channel socket - // is closed by our end before all data has been read out. Check: - // tux414.q-tam.hp.com FTP server (hp.com version whp02) - // so must catch FTPConnectionClosedException thrown by getReply() above - // disconnect(); - throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage()); - } - - } - - /** - * reply check after closing data connection - * - * @param reply - * @return - */ - private boolean _notBadReply(int reply) { - - if (FTPReply.isPositiveCompletion(reply)) { - // do nothing - } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED - // some ftp servers reply 426, e.g., - // foggy FTP server (Version wu-2.6.2(2) - // there is second reply witing? no! - // getReply(); - } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN - // some ftp servers reply 450, e.g., - // ProFTPD [ftp.kernel.org] - // there is second reply witing? no! - // getReply(); - } else if (reply == 451) { // FTPReply.ACTION_ABORTED - // some ftp servers reply 451, e.g., - // ProFTPD [ftp.kernel.org] - // there is second reply witing? no! - // getReply(); - } else if (reply == 451) { // FTPReply.ACTION_ABORTED - } else { - // what other kind of ftp server out there? - return false; - } - - return true; - } - - /*** - * Sets the file type to be transferred. This should be one of - * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>, - * etc. The file type only needs to be set when you want to change the type. - * After changing it, the new type stays in effect until you change it again. - * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method - * is never called. - * <p> - * - * @param fileType - * The <code> _FILE_TYPE </code> constant indcating the type of file. - * @return True if successfully completed, false if not. - * @exception FTPConnectionClosedException - * If the FTP server prematurely closes the connection as a - * result of the client being idle or some other reason causing - * the server to send FTP reply code 421. This exception may be - * caught either as an IOException or independently as itself. - * @exception IOException - * If an I/O error occurs while either sending a command to the - * server or receiving a reply from the server. - ***/ - public boolean setFileType(int fileType) throws IOException { - if (FTPReply.isPositiveCompletion(type(fileType))) { - /* - * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; - */ - return true; - } - return false; - } - - /*** - * Fetches the system type name from the server and returns the string. This - * value is cached for the duration of the connection after the first call to - * this method. In other words, only the first time that you invoke this - * method will it issue a SYST command to the FTP server. FTPClient will - * remember the value and return the cached value until a call to disconnect. - * <p> - * - * @return The system type name obtained from the server. null if the - * information could not be obtained. - * @exception FTPConnectionClosedException - * If the FTP server prematurely closes the connection as a - * result of the client being idle or some other reason causing - * the server to send FTP reply code 421. This exception may be - * caught either as an IOException or independently as itself. - * @exception IOException - * If an I/O error occurs while either sending a command to the - * server or receiving a reply from the server. - ***/ - public String getSystemName() throws IOException, FtpExceptionBadSystResponse { - // if (syst() == FTPReply.NAME_SYSTEM_TYPE) - // Technically, we should expect a NAME_SYSTEM_TYPE response, but - // in practice FTP servers deviate, so we soften the condition to - // a positive completion. - if (__systemName == null && FTPReply.isPositiveCompletion(syst())) { - __systemName = (getReplyStrings()[0]).substring(4); - } else { - throw new FtpExceptionBadSystResponse("Bad response of SYST: " - + getReplyString()); - } - - return __systemName; - } - - /*** - * Sends a NOOP command to the FTP server. This is useful for preventing - * server timeouts. - * <p> - * - * @return True if successfully completed, false if not. - * @exception FTPConnectionClosedException - * If the FTP server prematurely closes the connection as a - * result of the client being idle or some other reason causing - * the server to send FTP reply code 421. This exception may be - * caught either as an IOException or independently as itself. - * @exception IOException - * If an I/O error occurs while either sending a command to the - * server or receiving a reply from the server. - ***/ - public boolean sendNoOp() throws IOException { - return FTPReply.isPositiveCompletion(noop()); - } - - // client.stat(path); - // client.sendCommand("STAT"); - // client.sendCommand("STAT",path); - // client.sendCommand("MDTM",path); - // client.sendCommand("SIZE",path); - // client.sendCommand("HELP","SITE"); - // client.sendCommand("SYST"); - // client.setRestartOffset(120); - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java deleted file mode 100644 index 772f3bb..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java +++ /dev/null @@ -1,267 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.net.ftp.FTPFileEntryParser; - -import org.apache.nutch.crawl.CrawlDatum; -import org.apache.hadoop.io.Text; -import org.apache.nutch.net.protocols.Response; - -import org.apache.hadoop.conf.Configuration; - -import org.apache.nutch.protocol.Content; -import org.apache.nutch.metadata.Nutch; -import org.apache.nutch.protocol.Protocol; -import org.apache.nutch.protocol.ProtocolOutput; -import org.apache.nutch.protocol.ProtocolStatus; -import crawlercommons.robots.BaseRobotRules; - -import java.net.URL; - -import java.io.IOException; - -/** - * This class is a protocol plugin used for ftp: scheme. It creates - * {@link FtpResponse} object and gets the content of the url from it. - * Configurable parameters are {@code ftp.username}, {@code ftp.password}, - * {@code ftp.content.limit}, {@code ftp.timeout}, {@code ftp.server.timeout}, - * {@code ftp.password}, {@code ftp.keep.connection} and {@code ftp.follow.talk} - * . For details see "FTP properties" section in {@code nutch-default.xml}. - */ -public class Ftp implements Protocol { - - public static final Logger LOG = LoggerFactory.getLogger(Ftp.class); - - private static final int BUFFER_SIZE = 16384; // 16*1024 = 16384 - - static final int MAX_REDIRECTS = 5; - - int timeout; - - int maxContentLength; - - String userName; - String passWord; - - // typical/default server timeout is 120*1000 millisec. - // better be conservative here - int serverTimeout; - - // when to have client start anew - long renewalTime = -1; - - boolean keepConnection; - - boolean followTalk; - - // ftp client - Client client = null; - // ftp dir list entry parser - FTPFileEntryParser parser = null; - - private Configuration conf; - - private FtpRobotRulesParser robots = null; - - // constructor - public Ftp() { - robots = new FtpRobotRulesParser(); - } - - /** Set the timeout. */ - public void setTimeout(int to) { - timeout = to; - } - - /** Set the point at which content is truncated. */ - public void setMaxContentLength(int length) { - maxContentLength = length; - } - - /** Set followTalk */ - public void setFollowTalk(boolean followTalk) { - this.followTalk = followTalk; - } - - /** Set keepConnection */ - public void setKeepConnection(boolean keepConnection) { - this.keepConnection = keepConnection; - } - - /** - * Creates a {@link FtpResponse} object corresponding to the url and returns a - * {@link ProtocolOutput} object as per the content received - * - * @param url - * Text containing the ftp url - * @param datum - * The CrawlDatum object corresponding to the url - * - * @return {@link ProtocolOutput} object for the url - */ - public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) { - String urlString = url.toString(); - try { - URL u = new URL(urlString); - - int redirects = 0; - - while (true) { - FtpResponse response; - response = new FtpResponse(u, datum, this, getConf()); // make a request - - int code = response.getCode(); - datum.getMetaData().put(Nutch.PROTOCOL_STATUS_CODE_KEY, - new Text(Integer.toString(code))); - - - if (code == 200) { // got a good response - return new ProtocolOutput(response.toContent()); // return it - - } else if (code >= 300 && code < 400) { // handle redirect - if (redirects == MAX_REDIRECTS) - throw new FtpException("Too many redirects: " + url); - u = new URL(response.getHeader("Location")); - redirects++; - if (LOG.isTraceEnabled()) { - LOG.trace("redirect to " + u); - } - } else { // convert to exception - throw new FtpError(code); - } - } - } catch (Exception e) { - return new ProtocolOutput(null, new ProtocolStatus(e)); - } - } - - protected void finalize() { - try { - if (this.client != null && this.client.isConnected()) { - this.client.logout(); - this.client.disconnect(); - } - } catch (IOException e) { - // do nothing - } - } - - /** For debugging. */ - public static void main(String[] args) throws Exception { - int timeout = Integer.MIN_VALUE; - int maxContentLength = Integer.MIN_VALUE; - String logLevel = "info"; - boolean followTalk = false; - boolean keepConnection = false; - boolean dumpContent = false; - String urlString = null; - - String usage = "Usage: Ftp [-logLevel level] [-followTalk] [-keepConnection] [-timeout N] [-maxContentLength L] [-dumpContent] url"; - - if (args.length == 0) { - System.err.println(usage); - System.exit(-1); - } - - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-logLevel")) { - logLevel = args[++i]; - } else if (args[i].equals("-followTalk")) { - followTalk = true; - } else if (args[i].equals("-keepConnection")) { - keepConnection = true; - } else if (args[i].equals("-timeout")) { - timeout = Integer.parseInt(args[++i]) * 1000; - } else if (args[i].equals("-maxContentLength")) { - maxContentLength = Integer.parseInt(args[++i]); - } else if (args[i].equals("-dumpContent")) { - dumpContent = true; - } else if (i != args.length - 1) { - System.err.println(usage); - System.exit(-1); - } else { - urlString = args[i]; - } - } - - Ftp ftp = new Ftp(); - - ftp.setFollowTalk(followTalk); - ftp.setKeepConnection(keepConnection); - - if (timeout != Integer.MIN_VALUE) // set timeout - ftp.setTimeout(timeout); - - if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength - ftp.setMaxContentLength(maxContentLength); - - // set log level - // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase())); - - Content content = ftp.getProtocolOutput(new Text(urlString), - new CrawlDatum()).getContent(); - - System.err.println("Content-Type: " + content.getContentType()); - System.err.println("Content-Length: " - + content.getMetadata().get(Response.CONTENT_LENGTH)); - System.err.println("Last-Modified: " - + content.getMetadata().get(Response.LAST_MODIFIED)); - if (dumpContent) { - System.out.print(new String(content.getContent())); - } - - ftp = null; - } - - /** - * Set the {@link Configuration} object - */ - public void setConf(Configuration conf) { - this.conf = conf; - this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024); - this.timeout = conf.getInt("ftp.timeout", 10000); - this.userName = conf.get("ftp.username", "anonymous"); - this.passWord = conf.get("ftp.password", "[email protected]"); - this.serverTimeout = conf.getInt("ftp.server.timeout", 60 * 1000); - this.keepConnection = conf.getBoolean("ftp.keep.connection", false); - this.followTalk = conf.getBoolean("ftp.follow.talk", false); - this.robots.setConf(conf); - } - - /** - * Get the {@link Configuration} object - */ - public Configuration getConf() { - return this.conf; - } - - /** - * Get the robots rules for a given url - */ - public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) { - return robots.getRobotRulesSet(this, url); - } - - public int getBufferSize() { - return BUFFER_SIZE; - } -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java deleted file mode 100644 index b63a67e..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -/** - * Thrown for Ftp error codes. - */ -public class FtpError extends FtpException { - - private int code; - - public int getCode(int code) { - return code; - } - - public FtpError(int code) { - super("Ftp Error: " + code); - this.code = code; - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java deleted file mode 100644 index 5a29668..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -import org.apache.nutch.protocol.ProtocolException; - -/*** - * Superclass for important exceptions thrown during FTP talk, that must be - * handled with care. - * - * @author John Xing - */ -public class FtpException extends ProtocolException { - - public FtpException() { - super(); - } - - public FtpException(String message) { - super(message); - } - - public FtpException(String message, Throwable cause) { - super(message, cause); - } - - public FtpException(Throwable cause) { - super(cause); - } - -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java deleted file mode 100644 index 689ac8e..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -/** - * Exception indicating bad reply of SYST command. - * - * @author John Xing - */ -public class FtpExceptionBadSystResponse extends FtpException { - FtpExceptionBadSystResponse(String msg) { - super(msg); - } -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java deleted file mode 100644 index 9f35b74..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -/** - * Exception indicating failure of opening data connection. - * - * @author John Xing - */ -public class FtpExceptionCanNotHaveDataConnection extends FtpException { - FtpExceptionCanNotHaveDataConnection(String msg) { - super(msg); - } -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java deleted file mode 100644 index c058fcb..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -/** - * Exception indicating control channel is closed by server end, due to forced - * closure of data channel at client (our) end. - * - * @author John Xing - */ -public class FtpExceptionControlClosedByForcedDataClose extends FtpException { - FtpExceptionControlClosedByForcedDataClose(String msg) { - super(msg); - } -} http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java ---------------------------------------------------------------------- diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java deleted file mode 100644 index 9083d7c..0000000 --- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nutch.protocol.ftp; - -/** - * Exception indicating unrecognizable reply from server after forced closure of - * data channel by client (our) side. - * - * @author John Xing - */ -public class FtpExceptionUnknownForcedDataClose extends FtpException { - FtpExceptionUnknownForcedDataClose(String msg) { - super(msg); - } -}
