Modified: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java (original) +++ nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java Tue May 21 01:19:26 2013 @@ -42,8 +42,6 @@ import com.anotherbigidea.io.InStream; /** * Parser for Flash SWF files. Loosely based on the sample in JavaSWF * distribution. - * - * @author Andrzej Bialecki */ public class SWFParser implements Parser { public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.swf"); @@ -63,7 +61,7 @@ public class SWFParser implements Parser public ParseResult getParse(Content content) { String text = null; - Vector outlinks = new Vector(); + Vector<Outlink> outlinks = new Vector<Outlink>(); try { @@ -120,6 +118,7 @@ public class SWFParser implements Parser byte[] buf = new byte[in.available()]; in.read(buf); + in.close(); SWFParser parser = new SWFParser(); ParseResult parseResult = parser.getParse(new Content("file:" + args[0], "file:" + args[0], buf, "application/x-shockwave-flash", @@ -153,13 +152,13 @@ class ExtractText extends SWFTagTypesImp * character codes for the correspnding font glyphs (An empty array denotes a * System Font). */ - protected HashMap fontCodes = new HashMap(); + protected HashMap<Integer, int[]> fontCodes = new HashMap<Integer, int[]>(); - public ArrayList strings = new ArrayList(); + public ArrayList<String> strings = new ArrayList<String>(); - public HashSet actionStrings = new HashSet(); + public HashSet<String> actionStrings = new HashSet<String>(); - public ArrayList urls = new ArrayList(); + public ArrayList<String> urls = new ArrayList<String>(); public ExtractText() { super(null); @@ -167,7 +166,7 @@ class ExtractText extends SWFTagTypesImp public String getText() { StringBuffer res = new StringBuffer(); - Iterator it = strings.iterator(); + Iterator<String> it = strings.iterator(); while (it.hasNext()) { if (res.length() > 0) res.append(' '); res.append(it.next()); @@ -189,7 +188,7 @@ class ExtractText extends SWFTagTypesImp public String[] getUrls() { String[] res = new String[urls.size()]; int i = 0; - Iterator it = urls.iterator(); + Iterator<String> it = urls.iterator(); while (it.hasNext()) { res[i] = (String) it.next(); i++; @@ -350,26 +349,23 @@ class ExtractText extends SWFTagTypesImp * ActionScript parser. This parser tries to extract free text embedded inside * the script, but without polluting it too much with names of variables, * methods, etc. Not ideal, but it works. - * - * @author Andrzej Bialecki */ class NutchSWFActions extends SWFActionBlockImpl implements SWFActions { - private HashSet strings = null; + private HashSet<String> strings = null; - private ArrayList urls = null; + private ArrayList<String> urls = null; String[] dict = null; - Stack stack = null; + Stack<Object> stack = null; - public NutchSWFActions(HashSet strings, ArrayList urls) { + public NutchSWFActions(HashSet<String> strings, ArrayList<String> urls) { this.strings = strings; this.urls = urls; stack = new SmallStack(100, strings); } public void lookupTable(String[] values) throws IOException { - // System.out.println("-lookupTable: " + values.length); for (int i = 0; i < values.length; i++) { if (!strings.contains(values[i])) strings.add(values[i]); } @@ -378,7 +374,6 @@ class NutchSWFActions extends SWFActionB } public void defineLocal() throws IOException { - // System.out.println("-defineLocal"); stack.pop(); super.defineLocal(); } @@ -398,69 +393,58 @@ class NutchSWFActions extends SWFActionB } public SWFActionBlock.TryCatchFinally _try(String var) throws IOException { - // System.out.println("_try: var=" + var); // stack.push(var); strings.remove(var); return super._try(var); } public void comment(String var) throws IOException { - // System.out.println("-comment: var=" + var); // stack.push(var); strings.remove(var); super.comment(var); } public void goToFrame(String var) throws IOException { - // System.out.println("-goToFrame: var=" + var); stack.push(var); strings.remove(var); super.gotoFrame(var); } public void ifJump(String var) throws IOException { - // System.out.println("-ifJump: var=" + var); strings.remove(var); super.ifJump(var); } public void jump(String var) throws IOException { - // System.out.println("-jump: var=" + var); strings.remove(var); super.jump(var); } public void jumpLabel(String var) throws IOException { - // System.out.println("-jumpLabel: var=" + var); strings.remove(var); super.jumpLabel(var); } public void lookup(int var) throws IOException { - // System.out.println("-lookup: var=" + var); if (dict != null && var >= 0 && var < dict.length) { - // System.out.println(" push " + dict[var]); stack.push(dict[var]); } super.lookup(var); } public void push(String var) throws IOException { - // System.out.println("-push: var=" + var); stack.push(var); strings.remove(var); super.push(var); } public void setTarget(String var) throws IOException { - // System.out.println("-setTarget: var=" + var); stack.push(var); strings.remove(var); super.setTarget(var); } public SWFActionBlock startFunction(String var, String[] params) throws IOException { - // System.out.println("-startFunction1: var=" + var); stack.push(var); strings.remove(var); if (params != null) { @@ -472,7 +456,6 @@ class NutchSWFActions extends SWFActionB } public SWFActionBlock startFunction2(String var, int arg1, int arg2, String[] params, int[] arg3) throws IOException { - // System.out.println("-startFunction2: var=" + var); stack.push(var); strings.remove(var); if (params != null) { @@ -484,74 +467,61 @@ class NutchSWFActions extends SWFActionB } public void waitForFrame(int num, String var) throws IOException { - // System.out.println("-waitForFrame: var=" + var); stack.push(var); strings.remove(var); super.waitForFrame(num, var); } public void waitForFrame(String var) throws IOException { - // System.out.println("-waitForFrame: var=" + var); stack.push(var); strings.remove(var); super.waitForFrame(var); } public void done() throws IOException { - // System.out.println("-done"); while (stack.size() > 0) { strings.remove(stack.pop()); } } public SWFActionBlock start(int arg0, int arg1) throws IOException { - // System.out.println("-start"); return this; } public SWFActionBlock start(int arg0) throws IOException { - // System.out.println("-start"); return this; } public void add() throws IOException { - // System.out.println("-add"); super.add(); } public void asciiToChar() throws IOException { - // System.out.println("-asciitochar"); super.asciiToChar(); } public void asciiToCharMB() throws IOException { - // System.out.println("-asciitocharMB"); super.asciiToCharMB(); } public void push(int var) throws IOException { - // System.out.println("-push(int)"); if (dict != null && var >= 0 && var < dict.length) { - // System.out.println(" push " + dict[var]); stack.push(dict[var]); } super.push(var); } public void callFunction() throws IOException { - // System.out.println("-callFunction"); strings.remove(stack.pop()); super.callFunction(); } public void callMethod() throws IOException { - // System.out.println("-callMethod"); strings.remove(stack.pop()); super.callMethod(); } public void getMember() throws IOException { - // System.out.println("-getMember"); // 0: name String val = (String) stack.pop(); strings.remove(val); @@ -560,116 +530,97 @@ class NutchSWFActions extends SWFActionB public void setMember() throws IOException { // 0: value -1: name - String val = (String) stack.pop(); + stack.pop(); // value String name = (String) stack.pop(); - // System.out.println("-setMember: name=" + name + ", val=" + val); strings.remove(name); super.setMember(); } public void setProperty() throws IOException { - // System.out.println("-setProperty"); super.setProperty(); } public void setVariable() throws IOException { - // System.out.println("-setVariable"); super.setVariable(); } public void call() throws IOException { - // System.out.println("-call"); strings.remove(stack.pop()); super.call(); } public void setTarget() throws IOException { - // System.out.println("-setTarget"); strings.remove(stack.pop()); super.setTarget(); } public void pop() throws IOException { - // System.out.println("-pop"); strings.remove(stack.pop()); super.pop(); } public void push(boolean arg0) throws IOException { - // System.out.println("-push(b)"); stack.push("" + arg0); super.push(arg0); } public void push(double arg0) throws IOException { - // System.out.println("-push(d)"); stack.push("" + arg0); super.push(arg0); } public void push(float arg0) throws IOException { - // System.out.println("-push(f)"); stack.push("" + arg0); super.push(arg0); } public void pushNull() throws IOException { - // System.out.println("-push(null)"); stack.push(""); super.pushNull(); } public void pushRegister(int arg0) throws IOException { - // System.out.println("-push(reg)"); stack.push("" + arg0); super.pushRegister(arg0); } public void pushUndefined() throws IOException { - // System.out.println("-push(undef)"); stack.push("???"); super.pushUndefined(); } public void getProperty() throws IOException { - // System.out.println("-getProperty"); stack.pop(); super.getProperty(); } public void getVariable() throws IOException { - // System.out.println("-getVariable"); strings.remove(stack.pop()); super.getVariable(); } public void gotoFrame(boolean arg0) throws IOException { - // System.out.println("-gotoFrame(b)"); stack.push("" + arg0); super.gotoFrame(arg0); } public void gotoFrame(int arg0) throws IOException { - // System.out.println("-gotoFrame(int)"); stack.push("" + arg0); super.gotoFrame(arg0); } public void gotoFrame(String arg0) throws IOException { - // System.out.println("-gotoFrame(string)"); stack.push("" + arg0); strings.remove(arg0); super.gotoFrame(arg0); } public void newObject() throws IOException { - // System.out.println("-newObject"); stack.pop(); super.newObject(); } public SWFActionBlock startWith() throws IOException { - // System.out.println("-startWith"); return this; } @@ -678,13 +629,15 @@ class NutchSWFActions extends SWFActionB /* * Small bottom-less stack. */ -class SmallStack extends Stack { +class SmallStack extends Stack<Object> { + + private static final long serialVersionUID = 1L; private int maxSize; - private HashSet strings = null; + private HashSet<String> strings = null; - public SmallStack(int maxSize, HashSet strings) { + public SmallStack(int maxSize, HashSet<String> strings) { this.maxSize = maxSize; this.strings = strings; }
Modified: nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java (original) +++ nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java Tue May 21 01:19:26 2013 @@ -32,13 +32,12 @@ import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; +import org.mortbay.log.Log; import junit.framework.TestCase; /** * Unit tests for SWFParser. - * - * @author Andrzej Bialecki */ public class TestSWFParser extends TestCase { @@ -48,7 +47,6 @@ public class TestSWFParser extends TestC private String[] sampleFiles = new String[]{"test1.swf", "test2.swf", "test3.swf"}; private String[] sampleTexts = new String[]{"test1.txt", "test2.txt", "test3.txt"}; - private String[] texts = new String[sampleTexts.length]; public TestSWFParser(String name) { super(name); @@ -94,5 +92,4 @@ public class TestSWFParser extends TestC assertTrue(sampleTexts[i].equals(text)); } } - } Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java (original) +++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java Tue May 21 01:19:26 2013 @@ -58,7 +58,7 @@ class DOMBuilder public DocumentFragment m_docFrag = null; /** Vector of element nodes */ - protected Stack m_elemStack = new Stack(); + protected Stack<Element> m_elemStack = new Stack<Element>(); /** * DOMBuilder instance constructor... it will add the DOM nodes Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java (original) +++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java Tue May 21 01:19:26 2013 @@ -59,7 +59,8 @@ public class TikaParser implements org.a private HtmlParseFilters htmlParseFilters; private String cachingPolicy; - public ParseResult getParse(Content content) { + @SuppressWarnings("deprecation") + public ParseResult getParse(Content content) { String mimeType = content.getContentType(); URL base; Modified: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original) +++ nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Tue May 21 01:19:26 2013 @@ -18,15 +18,12 @@ package org.apache.nutch.parse.zip; import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.util.Properties; import java.util.ArrayList; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.nutch.metadata.Metadata; import org.apache.nutch.net.protocols.Response; import org.apache.nutch.parse.Outlink; import org.apache.nutch.parse.ParseData; @@ -40,8 +37,6 @@ import org.apache.hadoop.conf.Configurat /** * ZipParser class based on MSPowerPointParser class by Stephan Strittmatter. * Nutch parse plugin for zip files - Content Type : application/zip - * - * @author Rohit Kulkarni & Ashish Vaidya */ public class ZipParser implements Parser { @@ -57,17 +52,13 @@ public class ZipParser implements Parser String resultText = null; String resultTitle = null; Outlink[] outlinks = null; - List outLinksList = new ArrayList(); - Properties properties = null; + List<Outlink> outLinksList = new ArrayList<Outlink>(); try { final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH); final int len = Integer.parseInt(contentLen); if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); } final byte[] contentInBytes = content.getContent(); - final ByteArrayInputStream bainput = new ByteArrayInputStream( - contentInBytes); - final InputStream input = bainput; if (contentLen != null && contentInBytes.length != len) { return new ParseStatus(ParseStatus.FAILED, Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java (original) +++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java Tue May 21 01:19:26 2013 @@ -77,9 +77,9 @@ public class Client extends FTP private int __dataTimeout; private int __passivePort; private String __passiveHost; - private int __fileType, __fileFormat; +// private int __fileType, __fileFormat; private boolean __remoteVerificationEnabled; - private FTPFileEntryParser __entryParser; +// private FTPFileEntryParser __entryParser; private String __systemName; // constructor @@ -95,10 +95,10 @@ public class Client extends FTP { __passiveHost = null; __passivePort = -1; - __fileType = FTP.ASCII_FILE_TYPE; - __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; __systemName = null; - __entryParser = null; +// __fileType = FTP.ASCII_FILE_TYPE; +// __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; +// __entryParser = null; } // parse reply for pass() @@ -315,7 +315,7 @@ public class Client extends FTP } // retrieve list reply for path - public void retrieveList(String path, List entries, int limit, + public void retrieveList(String path, List<FTPFile> entries, int limit, FTPFileEntryParser parser) throws IOException, FtpExceptionCanNotHaveDataConnection, @@ -331,7 +331,7 @@ public class Client extends FTP new BufferedReader(new InputStreamReader(socket.getInputStream())); // force-close data channel socket, when download limit is reached - boolean mandatory_close = false; +// boolean mandatory_close = false; //List entries = new LinkedList(); int count = 0; @@ -348,7 +348,7 @@ public class Client extends FTP // impose download limit if limit >= 0, otherwise no limit // here, cut off is up to the line when total bytes is just over limit if (limit >= 0 && count > limit) { - mandatory_close = true; +// mandatory_close = true; break; } line = parser.readNextEntry(reader); @@ -403,7 +403,7 @@ public class Client extends FTP // fixme, should we instruct server here for binary file type? // force-close data channel socket - boolean mandatory_close = false; + // boolean mandatory_close = false; int len; int count = 0; byte[] buf = @@ -414,7 +414,7 @@ public class Client extends FTP // here, cut off is exactly of limit bytes if (limit >= 0 && count > limit) { os.write(buf,0,len-(count-limit)); - mandatory_close = true; + // mandatory_close = true; break; } os.write(buf,0,len); @@ -502,8 +502,8 @@ public class Client extends FTP { if (FTPReply.isPositiveCompletion(type(fileType))) { - __fileType = fileType; - __fileFormat = FTP.NON_PRINT_TEXT_FORMAT; +/* __fileType = fileType; + __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/ return true; } return false; Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original) +++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Tue May 21 01:19:26 2013 @@ -55,7 +55,7 @@ public class Ftp implements Protocol { public static final Logger LOG = LoggerFactory.getLogger(Ftp.class); - static final int BUFFER_SIZE = 16384; // 16*1024 = 16384 + private static final int BUFFER_SIZE = 16384; // 16*1024 = 16384 static final int MAX_REDIRECTS = 5; @@ -257,5 +257,9 @@ public class Ftp implements Protocol { public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) { return RobotRulesParser.EMPTY_RULES; } + + public int getBufferSize() { + return BUFFER_SIZE; + } } Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original) +++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Tue May 21 01:19:26 2013 @@ -17,11 +17,9 @@ package org.apache.nutch.protocol.ftp; - import org.apache.commons.net.ftp.FTP; import org.apache.commons.net.ftp.FTPFile; import org.apache.commons.net.ftp.FTPReply; - import org.apache.commons.net.ftp.parser.DefaultFTPFileEntryParserFactory; import org.apache.commons.net.ftp.parser.ParserInitializationException; @@ -42,8 +40,7 @@ import java.util.LinkedList; import java.io.ByteArrayOutputStream; import java.io.IOException; - -/************************************ +/** * FtpResponse.java mimics ftp replies as http response. * It tries its best to follow http's way for headers, response codes * as well as exceptions. @@ -53,9 +50,7 @@ import java.io.IOException; * and some important commons-net exceptions passed by Client.java * must have been properly dealt with. They'd better not be leaked * to the caller of this class. - * - * @author John Xing - ***********************************/ + */ public class FtpResponse { private String orig; @@ -146,7 +141,7 @@ public class FtpResponse { // follow ftp talk? if (ftp.followTalk) ftp.client.addProtocolCommandListener( - new PrintCommandListener(ftp.LOG)); + new PrintCommandListener(Ftp.LOG)); } // quit from previous site if at a different site now @@ -284,8 +279,8 @@ public class FtpResponse { } } catch (Exception e) { - if (ftp.LOG.isWarnEnabled()) { - ftp.LOG.warn("Error: ", e); + if (Ftp.LOG.isWarnEnabled()) { + Ftp.LOG.warn("Error: ", e); } // for any un-foreseen exception (run time exception or not), // do ultimate clean and leave ftp.client for garbage collection @@ -312,11 +307,11 @@ public class FtpResponse { throws IOException { ByteArrayOutputStream os = null; - List list = null; + List<FTPFile> list = null; try { // first get its possible attributes - list = new LinkedList(); + list = new LinkedList<FTPFile>(); ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser); FTPFile ftpFile = (FTPFile) list.get(0); @@ -329,7 +324,7 @@ public class FtpResponse { code = 304; return; } - os = new ByteArrayOutputStream(ftp.BUFFER_SIZE); + os = new ByteArrayOutputStream(ftp.getBufferSize()); ftp.client.retrieveFile(path, os, ftp.maxContentLength); this.content = os.toByteArray(); @@ -414,7 +409,7 @@ public class FtpResponse { // get ftp dir list as http response private void getDirAsHttpResponse(String path, long lastModified) throws IOException { - List list = new LinkedList(); + List<FTPFile> list = new LinkedList<FTPFile>(); try { @@ -482,7 +477,7 @@ public class FtpResponse { } // generate html page from ftp dir list - private byte[] list2html(List list, String path, boolean includeDotDot) { + private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) { //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>"); StringBuffer x = new StringBuffer("<html><head>"); Modified: nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original) +++ nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Tue May 21 01:19:26 2013 @@ -42,6 +42,8 @@ import org.apache.commons.httpclient.NTC import org.apache.commons.httpclient.auth.AuthScope; import org.apache.commons.httpclient.params.HttpConnectionManagerParams; import org.apache.commons.httpclient.protocol.Protocol; +import org.apache.commons.httpclient.protocol.ProtocolSocketFactory; +import org.apache.commons.httpclient.protocol.SSLProtocolSocketFactory; // Nutch imports import org.apache.nutch.crawl.CrawlDatum; @@ -158,8 +160,8 @@ public class Http extends HttpBase { private void configureClient() { // Set up an HTTPS socket factory that accepts self-signed certs. - Protocol https = new Protocol("https", - new DummySSLProtocolSocketFactory(), 443); + ProtocolSocketFactory factory = new SSLProtocolSocketFactory(); + Protocol https = new Protocol("https", factory, 443); Protocol.registerProtocol("https", https); HttpConnectionManagerParams params = connectionManager.getParams(); @@ -174,7 +176,7 @@ public class Http extends HttpBase { client.getParams().setConnectionManagerTimeout(timeout); HostConfiguration hostConf = client.getHostConfiguration(); - ArrayList headers = new ArrayList(); + ArrayList<Header> headers = new ArrayList<Header>(); // Set the User Agent in the header headers.add(new Header("User-Agent", userAgent)); // prefer English @@ -199,7 +201,7 @@ public class Http extends HttpBase { NTCredentials proxyCredentials = new NTCredentials( this.proxyUsername, this.proxyPassword, - this.agentHost, this.proxyRealm); + Http.agentHost, this.proxyRealm); client.getState().setProxyCredentials( proxyAuthScope, proxyCredentials); Modified: nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java (original) +++ nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java Tue May 21 01:19:26 2013 @@ -23,7 +23,6 @@ import junit.framework.TestCase; import org.mortbay.jetty.Server; import org.mortbay.jetty.bio.SocketConnector; import org.mortbay.jetty.handler.ContextHandler; -import org.mortbay.jetty.handler.ResourceHandler; import org.mortbay.jetty.servlet.ServletHandler; import org.mortbay.jetty.servlet.SessionHandler; import org.apache.hadoop.conf.Configuration; @@ -32,8 +31,6 @@ import org.apache.nutch.net.protocols.Re /** * Test cases for protocol-httpclient. - * - * @author Susam Pal */ public class TestProtocolHttpClient extends TestCase { Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java (original) +++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java Tue May 21 01:19:26 2013 @@ -14,9 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -// $Id$ - package org.apache.nutch.urlfilter.prefix; import org.slf4j.Logger; @@ -79,7 +76,7 @@ public class PrefixURLFilter implements throws IOException { BufferedReader in=new BufferedReader(reader); - List urlprefixes = new ArrayList(); + List<String> urlprefixes = new ArrayList<String>(); String line; while((line=in.readLine())!=null) { Modified: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java (original) +++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java Tue May 21 01:19:26 2013 @@ -181,7 +181,7 @@ public class SuffixURLFilter implements return; } BufferedReader in = new BufferedReader(reader); - List aSuffixes = new ArrayList(); + List<String> aSuffixes = new ArrayList<String>(); boolean allow = false; boolean ignore = false; String line; Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java Tue May 21 01:19:26 2013 @@ -44,9 +44,9 @@ public class TestCrawlDbMerger extends T url21 }; - TreeSet init1 = new TreeSet(); - TreeSet init2 = new TreeSet(); - HashMap expected = new HashMap(); + TreeSet<String> init1 = new TreeSet<String>(); + TreeSet<String> init2 = new TreeSet<String>(); + HashMap<String, CrawlDatum> expected = new HashMap<String, CrawlDatum>(); CrawlDatum cd1, cd2, cd3; Configuration conf; FileSystem fs; @@ -83,6 +83,7 @@ public class TestCrawlDbMerger extends T fs.mkdirs(testDir); } + @SuppressWarnings("deprecation") public void tearDown() { try { if (fs.exists(testDir)) @@ -93,6 +94,7 @@ public class TestCrawlDbMerger extends T } catch (Exception e) { } } + @SuppressWarnings("deprecation") public void testMerge() throws Exception { Path crawldb1 = new Path(testDir, "crawldb1"); Path crawldb2 = new Path(testDir, "crawldb2"); @@ -105,11 +107,11 @@ public class TestCrawlDbMerger extends T LOG.fine("* reading crawldb: " + output); reader = new CrawlDbReader(); String crawlDb = output.toString(); - Iterator it = expected.keySet().iterator(); + Iterator<String> it = expected.keySet().iterator(); while (it.hasNext()) { - String url = (String)it.next(); + String url = it.next(); LOG.fine("url=" + url); - CrawlDatum cd = (CrawlDatum)expected.get(url); + CrawlDatum cd = expected.get(url); CrawlDatum res = reader.get(crawlDb, url, conf); LOG.fine(" -> " + res); System.out.println("url=" + url); @@ -123,13 +125,13 @@ public class TestCrawlDbMerger extends T fs.delete(testDir); } - private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception { + private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet<String> init, CrawlDatum cd) throws Exception { LOG.fine("* creating crawldb: " + crawldb); Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME); MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class); - Iterator it = init.iterator(); + Iterator<String> it = init.iterator(); while (it.hasNext()) { - String key = (String)it.next(); + String key = it.next(); writer.append(new Text(key), cd); } writer.close(); Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java Tue May 21 01:19:26 2013 @@ -68,9 +68,9 @@ public class TestLinkDbMerger extends Te String[] urls20_expected = urls11_expected; String[] urls21_expected = urls21; - TreeMap init1 = new TreeMap(); - TreeMap init2 = new TreeMap(); - HashMap expected = new HashMap(); + TreeMap<String, String[]> init1 = new TreeMap<String, String[]>(); + TreeMap<String, String[]> init2 = new TreeMap<String, String[]>(); + HashMap<String, String[]> expected = new HashMap<String, String[]>(); Configuration conf; Path testDir; FileSystem fs; @@ -116,16 +116,16 @@ public class TestLinkDbMerger extends Te merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false); LOG.fine("* reading linkdb: " + output); reader = new LinkDbReader(conf, output); - Iterator it = expected.keySet().iterator(); + Iterator<String> it = expected.keySet().iterator(); while (it.hasNext()) { - String url = (String)it.next(); + String url = it.next(); LOG.fine("url=" + url); - String[] vals = (String[])expected.get(url); + String[] vals = expected.get(url); Inlinks inlinks = reader.getInlinks(new Text(url)); // may not be null assertNotNull(inlinks); - ArrayList links = new ArrayList(); - Iterator it2 = inlinks.iterator(); + ArrayList<String> links = new ArrayList<String>(); + Iterator<?> it2 = inlinks.iterator(); while (it2.hasNext()) { Inlink in = (Inlink)it2.next(); links.add(in.getFromUrl()); @@ -139,15 +139,15 @@ public class TestLinkDbMerger extends Te fs.delete(testDir, true); } - private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception { + private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap<String, String[]> init) throws Exception { LOG.fine("* creating linkdb: " + linkdb); Path dir = new Path(linkdb, LinkDb.CURRENT_NAME); MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class); - Iterator it = init.keySet().iterator(); + Iterator<String> it = init.keySet().iterator(); while (it.hasNext()) { - String key = (String)it.next(); + String key = it.next(); Inlinks inlinks = new Inlinks(); - String[] vals = (String[])init.get(key); + String[] vals = init.get(key); for (int i = 0; i < vals.length; i++) { Inlink in = new Inlink(vals[i], vals[i]); inlinks.add(in); Modified: nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original) +++ nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java Tue May 21 01:19:26 2013 @@ -35,13 +35,11 @@ import org.apache.nutch.util.NutchJob; /** * Unit tests for the plugin system - * - * @author joa23 */ public class TestPluginSystem extends TestCase { private int fPluginCount; - private LinkedList fFolders = new LinkedList(); + private LinkedList<File> fFolders = new LinkedList<File>(); private Configuration conf ; private PluginRepository repository; @@ -62,11 +60,10 @@ public class TestPluginSystem extends Te */ protected void tearDown() throws Exception { for (int i = 0; i < fFolders.size(); i++) { - File folder = (File) fFolders.get(i); + File folder = fFolders.get(i); delete(folder); folder.delete(); } - } /** Modified: nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java (original) +++ nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java Tue May 21 01:19:26 2013 @@ -42,11 +42,11 @@ public class TestSegmentMerger extends T public void setUp() throws Exception { conf = NutchConfiguration.create(); fs = FileSystem.get(conf); - long blkSize = fs.getDefaultBlockSize(); testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis()); seg1 = new Path(testDir, "seg1"); seg2 = new Path(testDir, "seg2"); out = new Path(testDir, "out"); + // create large parse-text segments System.err.println("Creating large segment 1..."); DecimalFormat df = new DecimalFormat("0000000"); @@ -55,6 +55,9 @@ public class TestSegmentMerger extends T MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class); long curSize = 0; countSeg1 = 0; + FileStatus fileStatus = fs.getFileStatus(ptPath); + long blkSize = fileStatus.getBlockSize(); + while (curSize < blkSize * 2) { k.set("seg1-" + df.format(countSeg1)); w.append(k, new ParseText("seg1 text " + countSeg1));
