Author: tejasp
Date: Tue Apr 30 19:36:23 2013
New Revision: 1477792
URL: http://svn.apache.org/r1477792
Log:
NUTCH-1273 Fix [deprecation] javac warnings
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/ivy/ivy.xml
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginDescriptor.java
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginManifestParser.java
nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
nutch/branches/2.x/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Apr 30 19:36:23 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1273 Fix [deprecation] javac warnings (lewsimc + tejasp)
+
* NUTCH-1031 Delegate parsing of robots.txt to crawler-commons (tejasp)
* NUTCH-346 Improve readability of logs/hadoop.log (Renaud Richardet via
tejasp)
Modified: nutch/branches/2.x/ivy/ivy.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/ivy/ivy.xml?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
--- nutch/branches/2.x/ivy/ivy.xml (original)
+++ nutch/branches/2.x/ivy/ivy.xml Tue Apr 30 19:36:23 2013
@@ -65,6 +65,7 @@
<dependency org="xerces" name="xercesImpl" rev="2.9.1" />
<dependency org="xerces" name="xmlParserAPIs" rev="2.6.2" />
+ <dependency org="xalan" name="serializer" rev="2.7.1" />
<dependency org="oro" name="oro" rev="2.0.8" />
<dependency org="org.jdom" name="jdom" rev="1.1" conf="*->default" />
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginDescriptor.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginDescriptor.java
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginDescriptor.java
Tue Apr 30 19:36:23 2013
@@ -219,7 +219,7 @@ public class PluginDescriptor {
*/
public void addExportedLibRelative(String pLibPath)
throws MalformedURLException {
- URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
+ URL url = new File(getPluginPath() + File.separator +
pLibPath).toURI().toURL();
fExportedLibs.add(url);
}
@@ -248,7 +248,7 @@ public class PluginDescriptor {
*/
public void addNotExportedLibRelative(String pLibPath)
throws MalformedURLException {
- URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
+ URL url = new File(getPluginPath() + File.separator +
pLibPath).toURI().toURL();
fNotExportedLibs.add(url);
}
@@ -279,7 +279,7 @@ public class PluginDescriptor {
try {
for (File file2 : file.listFiles()) {
if (file2.getAbsolutePath().endsWith("properties"))
- arrayList.add(file2.getParentFile().toURL());
+ arrayList.add(file2.getParentFile().toURI().toURL());
}
} catch (MalformedURLException e) {
LOG.debug(getPluginId() + " " + e.toString());
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginManifestParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
---
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginManifestParser.java
(original)
+++
nutch/branches/2.x/src/java/org/apache/nutch/plugin/PluginManifestParser.java
Tue Apr 30 19:36:23 2013
@@ -147,7 +147,7 @@ public class PluginManifestParser {
private PluginDescriptor parseManifestFile(String pManifestPath)
throws MalformedURLException, SAXException, IOException,
ParserConfigurationException {
- Document document = parseXML(new File(pManifestPath).toURL());
+ Document document = parseXML(new File(pManifestPath).toURI().toURL());
String pPath = new File(pManifestPath).getParent();
return parsePlugin(document, pPath);
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/MimeUtil.java Tue Apr 30
19:36:23 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configurat
// Tika imports
import org.apache.tika.Tika;
+import org.apache.tika.config.TikaConfig;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
@@ -33,7 +34,7 @@ import org.apache.tika.mime.MimeTypesFac
// Slf4j logging imports
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
+
// imported for Javadoc
import org.apache.nutch.protocol.ProtocolOutput;
@@ -169,11 +170,19 @@ public final class MimeUtil {
|| (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
// If no mime-type header, or cannot find a corresponding registered
// mime-type, then guess a mime-type from the url pattern
- type = this.mimeTypes.getMimeType(url) != null ? this.mimeTypes
- .getMimeType(url) : type;
- }
- retType= type.getName();
+ try {
+ TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ Tika tika = new Tika(tikaConfig);
+ retType = tika.detect(url) != null ? tika.detect(url) : null;
+ } catch (Exception e) {
+ String message = "Problem loading default Tika configuration";
+ LOG.error(message, e);
+ throw new RuntimeException(e);
+ }
+ } else {
+ retType = type.getName();
+ }
// if magic is enabled use mime magic to guess if the mime type returned
// from the magic guess is different than the one that's already set so far
@@ -257,6 +266,4 @@ public final class MimeUtil {
return null;
}
}
-
-
}
Modified:
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
Tue Apr 30 19:36:23 2013
@@ -50,6 +50,7 @@ import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.TableUtil;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
@@ -164,7 +165,8 @@ public class TikaParser implements org.a
// populate Nutch metadata with Tika metadata
String[] TikaMDNames = tikamd.names();
for (String tikaMDName : TikaMDNames) {
- if (tikaMDName.equalsIgnoreCase(Metadata.TITLE)) continue;
+ if (tikaMDName.equalsIgnoreCase(TikaCoreProperties.TITLE.toString()))
+ continue;
// TODO what if multivalued?
page.putToMetadata(new Utf8(tikaMDName),
ByteBuffer.wrap(Bytes.toBytes(tikamd
.get(tikaMDName))));
Modified:
nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/DOMContentUtilsTest.java
Tue Apr 30 19:36:23 2013
@@ -29,7 +29,7 @@ import org.apache.nutch.util.NutchConfig
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
-import org.apache.xml.serialize.DOMSerializerImpl;
+import org.apache.xml.serializer.dom3.LSSerializerImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -229,9 +229,9 @@ public class DOMContentUtilsTest extends
fail("caught exception: " + e);
}
testDOMs[i] = root;
- DOMSerializerImpl ds = new DOMSerializerImpl();
+ LSSerializerImpl lsi = new LSSerializerImpl();
System.out.println("input " + i + ": '" + testPages[i]
+ "'");
- System.out.println("output " + i + ": '" +
ds.writeToString(root)
+ System.out.println("output " + i + ": '" +
lsi.writeToString(root)
+ "'");
}
Modified:
nutch/branches/2.x/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
(original)
+++
nutch/branches/2.x/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
Tue Apr 30 19:36:23 2013
@@ -143,7 +143,7 @@ public class FileResponse {
if (!f.equals(f.getCanonicalFile())) {
// set headers
//hdrs.put("Location", f.getCanonicalFile().toURI());
- headers.set(Response.LOCATION,
f.getCanonicalFile().toURL().toString());
+ headers.set(Response.LOCATION,
f.getCanonicalFile().toURI().toURL().toString());
this.code = 300; // http redirect
return;
Modified:
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1477792&r1=1477791&r2=1477792&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
(original)
+++
nutch/branches/2.x/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
Tue Apr 30 19:36:23 2013
@@ -45,6 +45,8 @@ import org.apache.commons.httpclient.NTC
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.protocol.Protocol;
+import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
+import org.apache.commons.httpclient.protocol.SSLProtocolSocketFactory;
// Nutch imports
import org.apache.nutch.storage.WebPage;
@@ -124,7 +126,7 @@ public class Http extends HttpBase {
*/
public void setConf(Configuration conf) {
super.setConf(conf);
- this.conf = conf;
+ Http.conf = conf;
this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
this.proxyUsername = conf.get("http.proxy.username", "");
this.proxyPassword = conf.get("http.proxy.password", "");
@@ -178,8 +180,8 @@ public class Http extends HttpBase {
private void configureClient() {
// Set up an HTTPS socket factory that accepts self-signed
certs.
- Protocol https = new Protocol("https",
- new DummySSLProtocolSocketFactory(), 443);
+ ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
+ Protocol https = new Protocol("https", factory, 443);
Protocol.registerProtocol("https", https);
HttpConnectionManagerParams params =
connectionManager.getParams();
@@ -195,7 +197,7 @@ public class Http extends HttpBase {
client.getParams().setConnectionManagerTimeout(timeout);
HostConfiguration hostConf = client.getHostConfiguration();
- ArrayList headers = new ArrayList();
+ ArrayList<Header> headers = new ArrayList<Header>();
// Set the User Agent in the header
headers.add(new Header("User-Agent", userAgent));
// prefer English
@@ -222,7 +224,7 @@ public class Http extends HttpBase {
this.proxyPort,
this.proxyRealm);
NTCredentials proxyCredentials = new
NTCredentials(
- this.proxyUsername,
this.proxyPassword, this.agentHost,
+ this.proxyUsername,
this.proxyPassword, Http.agentHost,
this.proxyRealm);
client.getState().setProxyCredentials(proxyAuthScope,