[ https://issues.apache.org/jira/browse/NUTCH-2494?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16343555#comment-16343555 ]
Hudson commented on NUTCH-2494: ------------------------------- SUCCESS: Integrated in Jenkins build Nutch-trunk #3499 (See [https://builds.apache.org/job/Nutch-trunk/3499/]) Fix for NUTCH-2494 contributed by Ashraful Islam, closes #274 (snagel: [https://github.com/apache/nutch/commit/87c7a2e57bdde875a0ab36f8586fb607970a4e1d]) * (edit) src/java/org/apache/nutch/parse/ParseOutputFormat.java * (edit) src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java > Fetcher: java.lang.IllegalArgumentException: Wrong FS: s3 > --------------------------------------------------------- > > Key: NUTCH-2494 > URL: https://issues.apache.org/jira/browse/NUTCH-2494 > Project: Nutch > Issue Type: Bug > Components: fetcher, parser > Affects Versions: 1.14 > Environment: * AWS EMR Cluster > * AWS S3 > * Hadoop 2.2.7 > Reporter: Ashraful Islam > Assignee: Sebastian Nagel > Priority: Major > Fix For: 1.15 > > Attachments: NUTCH-2494.patch > > > We are using nutch 1.14 in AWS EMR Cluster (Hadoop 2.2.7). trying to use S3 > as main storage. > We are using the below command. > {code} > bin/crawl -s s3://nutch-emr-cluster/test/crawl/urls > s3://nutch-emr-cluster/test/crawl 1 > {code} > Injector and Generator completed successfully without any error and data > written perfectly into S3. But in the Fetcher and Parser steps we are getting > IllegalArgumentException > Full stacktrace > {code:java} > 18/01/11 07:16:52 ERROR fetcher.Fetcher: Fetcher: > java.lang.IllegalArgumentException: Wrong FS: > s3://nutch-emr-cluster/test/crawl/segments/20180111071602/crawl_fetch, > expected: hdfs://ip-172-31-26-180.eu-west-1.compute.internal:8020 > at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:653) > at > org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:194) > at > org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFileSystem.java:106) > at > org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1305) > at > org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1301) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317) > at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1430) > at > org.apache.nutch.fetcher.FetcherOutputFormat.checkOutputSpecs(FetcherOutputFormat.java:55) > at > org.apache.hadoop.mapreduce.JobSubmitter.checkSpecs(JobSubmitter.java:268) > at > org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:139) > at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290) > at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) > at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287) > at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:575) > at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:570) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698) > at > org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:570) > at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:561) > at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:870) > at org.apache.nutch.fetcher.Fetcher.fetch(Fetcher.java:486) > at org.apache.nutch.fetcher.Fetcher.run(Fetcher.java:521) > at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) > at org.apache.nutch.fetcher.Fetcher.main(Fetcher.java:495) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)