This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 54510e503f7da7301a59f5f0e5bf4509b37d35b4 Merge: a2f637e 405682e Author: Lewis John McGibbney <[email protected]> AuthorDate: Tue Feb 27 14:02:02 2018 -0800 Merge pull request #221 from Omkar20895/NUTCH-2375 NUTCH-2375 Upgrading nutch to use org.apache.hadoop.mapreduce src/java/org/apache/nutch/crawl/CrawlDb.java | 48 +- src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 38 +- src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 46 +- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 315 ++++---- .../org/apache/nutch/crawl/CrawlDbReducer.java | 44 +- .../org/apache/nutch/crawl/DeduplicationJob.java | 121 ++- src/java/org/apache/nutch/crawl/Generator.java | 873 +++++++++++---------- src/java/org/apache/nutch/crawl/LinkDb.java | 226 +++--- src/java/org/apache/nutch/crawl/LinkDbFilter.java | 30 +- src/java/org/apache/nutch/crawl/LinkDbMerger.java | 90 +-- src/java/org/apache/nutch/crawl/LinkDbReader.java | 52 +- .../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +- .../org/apache/nutch/crawl/URLPartitioner.java | 15 +- src/java/org/apache/nutch/fetcher/FetchNode.java | 2 +- src/java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +- src/java/org/apache/nutch/fetcher/Fetcher.java | 576 +++++++------- .../apache/nutch/fetcher/FetcherOutputFormat.java | 70 +- .../org/apache/nutch/fetcher/FetcherThread.java | 118 +-- src/java/org/apache/nutch/fetcher/QueueFeeder.java | 26 +- src/java/org/apache/nutch/hostdb/HostDatum.java | 2 +- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 +- .../org/apache/nutch/hostdb/ResolverThread.java | 37 +- src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 56 +- .../apache/nutch/hostdb/UpdateHostDbMapper.java | 50 +- .../apache/nutch/hostdb/UpdateHostDbReducer.java | 52 +- src/java/org/apache/nutch/indexer/CleaningJob.java | 76 +- src/java/org/apache/nutch/indexer/IndexWriter.java | 5 +- .../org/apache/nutch/indexer/IndexWriters.java | 6 +- .../org/apache/nutch/indexer/IndexerMapReduce.java | 497 ++++++------ .../apache/nutch/indexer/IndexerOutputFormat.java | 22 +- .../nutch/indexer/IndexingFiltersChecker.java | 6 +- src/java/org/apache/nutch/indexer/IndexingJob.java | 49 +- .../org/apache/nutch/net/URLExemptionFilters.java | 2 +- src/java/org/apache/nutch/parse/ParseCallable.java | 2 +- .../org/apache/nutch/parse/ParseOutputFormat.java | 117 ++- src/java/org/apache/nutch/parse/ParseSegment.java | 207 ++--- .../apache/nutch/scoring/webgraph/LinkDumper.java | 164 ++-- .../apache/nutch/scoring/webgraph/LinkRank.java | 484 ++++++------ .../apache/nutch/scoring/webgraph/NodeDumper.java | 317 ++++---- .../apache/nutch/scoring/webgraph/NodeReader.java | 7 +- .../nutch/scoring/webgraph/ScoreUpdater.java | 146 ++-- .../apache/nutch/scoring/webgraph/WebGraph.java | 656 +++++++++------- .../nutch/segment/ContentAsTextInputFormat.java | 50 +- .../org/apache/nutch/segment/SegmentChecker.java | 2 +- .../org/apache/nutch/segment/SegmentMerger.java | 587 +++++++------- src/java/org/apache/nutch/segment/SegmentPart.java | 2 +- .../org/apache/nutch/segment/SegmentReader.java | 183 +++-- .../org/apache/nutch/service/impl/JobFactory.java | 2 +- .../nutch/service/model/request/JobConfig.java | 2 +- src/java/org/apache/nutch/tools/Benchmark.java | 10 +- src/java/org/apache/nutch/tools/FreeGenerator.java | 179 +++-- .../org/apache/nutch/tools/arc/ArcInputFormat.java | 26 +- .../apache/nutch/tools/arc/ArcRecordReader.java | 22 +- .../apache/nutch/tools/arc/ArcSegmentCreator.java | 466 +++++------ .../org/apache/nutch/tools/warc/WARCExporter.java | 296 +++---- src/java/org/apache/nutch/util/JexlUtil.java | 2 +- src/java/org/apache/nutch/util/NutchJob.java | 17 +- src/java/org/apache/nutch/util/NutchTool.java | 2 +- .../util/{NutchJob.java => SegmentReaderUtil.java} | 25 +- .../nutch/webui/client/model/ConnectionStatus.java | 2 +- .../pages/components/ColorEnumLabelBuilder.java | 2 +- .../webui/pages/components/CpmIteratorAdapter.java | 2 +- .../apache/nutch/indexer/geoip/package-info.java | 2 +- .../indexer/links/TestLinksIndexingFilter.java | 2 +- .../test/org/apache/nutch/parse/TestOutlinks.java | 2 +- .../cloudsearch/CloudSearchIndexWriter.java | 9 +- .../nutch/indexwriter/dummy/DummyIndexWriter.java | 5 +- .../elasticrest/ElasticRestIndexWriter.java | 32 +- .../indexwriter/elastic/ElasticConstants.java | 2 +- .../indexwriter/elastic/ElasticIndexWriter.java | 17 +- .../elastic/TestElasticIndexWriter.java | 14 +- .../indexwriter/rabbit/RabbitIndexWriter.java | 3 +- .../nutch/indexwriter/solr/SolrIndexWriter.java | 17 +- .../apache/nutch/indexwriter/solr/SolrUtils.java | 10 +- .../nutch/analysis/lang/HTMLLanguageParser.java | 2 +- .../nutch/parsefilter/regex/RegexParseFilter.java | 2 +- .../parsefilter/regex/TestRegexParseFilter.java | 2 +- .../apache/nutch/protocol/http/HttpResponse.java | 2 +- .../nutch/scoring/similarity/cosine/Model.java | 2 +- .../scoring/similarity/cosine/package-info.java | 2 +- .../scoring/similarity/util/package-info.java | 2 +- .../net/urlnormalizer/ajax/AjaxURLNormalizer.java | 2 +- .../urlnormalizer/ajax/TestAjaxURLNormalizer.java | 2 +- .../protocol/ProtocolURLNormalizer.java | 2 +- .../urlnormalizer/slash/SlashURLNormalizer.java | 2 +- .../nutch/crawl/ContinuousCrawlTestUtil.java | 28 +- .../org/apache/nutch/crawl/CrawlDBTestUtil.java | 297 ++++++- .../nutch/crawl/CrawlDbUpdateTestDriver.java | 2 +- .../org/apache/nutch/crawl/CrawlDbUpdateUtil.java | 284 ++++++- .../apache/nutch/crawl/TODOTestCrawlDbStates.java | 31 +- .../org/apache/nutch/crawl/TestCrawlDbFilter.java | 20 +- .../org/apache/nutch/crawl/TestCrawlDbMerger.java | 4 +- .../org/apache/nutch/crawl/TestCrawlDbStates.java | 98 ++- src/test/org/apache/nutch/crawl/TestGenerator.java | 22 +- src/test/org/apache/nutch/crawl/TestInjector.java | 2 +- src/test/org/apache/nutch/fetcher/TestFetcher.java | 6 +- .../apache/nutch/indexer/TestIndexerMapReduce.java | 8 +- .../org/apache/nutch/plugin/TestPluginSystem.java | 14 +- .../apache/nutch/segment/TestSegmentMerger.java | 4 +- .../segment/TestSegmentMergerCrawlDatums.java | 4 +- .../apache/nutch/tools/proxy/SegmentHandler.java | 6 +- 101 files changed, 4781 insertions(+), 3724 deletions(-) -- To stop receiving notification emails like this one, please contact [email protected].
