This is an automated email from the ASF dual-hosted git repository. lewismc pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git.
from a2f637e Merge pull request #284 from YossiTamari/master add c93d908 NUTCH-2375 Upgrading nutch to use org.apache.hadoop.mapreduce add fe5bfb4 Merge branch 'master' into NUTCH-2375 add 405682e Merge branch 'NUTCH-2375' of https://github.com/Omkar20895/nutch into NUTCH-2375 new 54510e5 Merge pull request #221 from Omkar20895/NUTCH-2375 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: src/java/org/apache/nutch/crawl/CrawlDb.java | 48 +- src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 38 +- src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 46 +- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 315 ++++---- .../org/apache/nutch/crawl/CrawlDbReducer.java | 44 +- .../org/apache/nutch/crawl/DeduplicationJob.java | 121 ++- src/java/org/apache/nutch/crawl/Generator.java | 873 +++++++++++---------- src/java/org/apache/nutch/crawl/LinkDb.java | 226 +++--- src/java/org/apache/nutch/crawl/LinkDbFilter.java | 30 +- src/java/org/apache/nutch/crawl/LinkDbMerger.java | 90 +-- src/java/org/apache/nutch/crawl/LinkDbReader.java | 52 +- .../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +- .../org/apache/nutch/crawl/URLPartitioner.java | 15 +- src/java/org/apache/nutch/fetcher/FetchNode.java | 2 +- src/java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +- src/java/org/apache/nutch/fetcher/Fetcher.java | 576 +++++++------- .../apache/nutch/fetcher/FetcherOutputFormat.java | 70 +- .../org/apache/nutch/fetcher/FetcherThread.java | 118 +-- src/java/org/apache/nutch/fetcher/QueueFeeder.java | 26 +- src/java/org/apache/nutch/hostdb/HostDatum.java | 2 +- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 +- .../org/apache/nutch/hostdb/ResolverThread.java | 37 +- src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 56 +- .../apache/nutch/hostdb/UpdateHostDbMapper.java | 50 +- .../apache/nutch/hostdb/UpdateHostDbReducer.java | 52 +- src/java/org/apache/nutch/indexer/CleaningJob.java | 76 +- src/java/org/apache/nutch/indexer/IndexWriter.java | 5 +- .../org/apache/nutch/indexer/IndexWriters.java | 6 +- .../org/apache/nutch/indexer/IndexerMapReduce.java | 497 ++++++------ .../apache/nutch/indexer/IndexerOutputFormat.java | 22 +- .../nutch/indexer/IndexingFiltersChecker.java | 6 +- src/java/org/apache/nutch/indexer/IndexingJob.java | 49 +- .../org/apache/nutch/net/URLExemptionFilters.java | 2 +- src/java/org/apache/nutch/parse/ParseCallable.java | 2 +- .../org/apache/nutch/parse/ParseOutputFormat.java | 117 ++- src/java/org/apache/nutch/parse/ParseSegment.java | 207 ++--- .../apache/nutch/scoring/webgraph/LinkDumper.java | 164 ++-- .../apache/nutch/scoring/webgraph/LinkRank.java | 484 ++++++------ .../apache/nutch/scoring/webgraph/NodeDumper.java | 317 ++++---- .../apache/nutch/scoring/webgraph/NodeReader.java | 7 +- .../nutch/scoring/webgraph/ScoreUpdater.java | 146 ++-- .../apache/nutch/scoring/webgraph/WebGraph.java | 656 +++++++++------- .../nutch/segment/ContentAsTextInputFormat.java | 50 +- .../org/apache/nutch/segment/SegmentChecker.java | 2 +- .../org/apache/nutch/segment/SegmentMerger.java | 587 +++++++------- src/java/org/apache/nutch/segment/SegmentPart.java | 2 +- .../org/apache/nutch/segment/SegmentReader.java | 183 +++-- .../org/apache/nutch/service/impl/JobFactory.java | 2 +- .../nutch/service/model/request/JobConfig.java | 2 +- src/java/org/apache/nutch/tools/Benchmark.java | 10 +- src/java/org/apache/nutch/tools/FreeGenerator.java | 179 +++-- .../org/apache/nutch/tools/arc/ArcInputFormat.java | 26 +- .../apache/nutch/tools/arc/ArcRecordReader.java | 22 +- .../apache/nutch/tools/arc/ArcSegmentCreator.java | 466 +++++------ .../org/apache/nutch/tools/warc/WARCExporter.java | 296 +++---- src/java/org/apache/nutch/util/JexlUtil.java | 2 +- src/java/org/apache/nutch/util/NutchJob.java | 17 +- src/java/org/apache/nutch/util/NutchTool.java | 2 +- .../util/{NutchJob.java => SegmentReaderUtil.java} | 25 +- .../nutch/webui/client/model/ConnectionStatus.java | 2 +- .../pages/components/ColorEnumLabelBuilder.java | 2 +- .../webui/pages/components/CpmIteratorAdapter.java | 2 +- .../apache/nutch/indexer/geoip/package-info.java | 2 +- .../indexer/links/TestLinksIndexingFilter.java | 2 +- .../test/org/apache/nutch/parse/TestOutlinks.java | 2 +- .../cloudsearch/CloudSearchIndexWriter.java | 9 +- .../nutch/indexwriter/dummy/DummyIndexWriter.java | 5 +- .../elasticrest/ElasticRestIndexWriter.java | 32 +- .../indexwriter/elastic/ElasticConstants.java | 2 +- .../indexwriter/elastic/ElasticIndexWriter.java | 17 +- .../elastic/TestElasticIndexWriter.java | 14 +- .../indexwriter/rabbit/RabbitIndexWriter.java | 3 +- .../nutch/indexwriter/solr/SolrIndexWriter.java | 17 +- .../apache/nutch/indexwriter/solr/SolrUtils.java | 10 +- .../nutch/analysis/lang/HTMLLanguageParser.java | 2 +- .../nutch/parsefilter/regex/RegexParseFilter.java | 2 +- .../parsefilter/regex/TestRegexParseFilter.java | 2 +- .../apache/nutch/protocol/http/HttpResponse.java | 2 +- .../nutch/scoring/similarity/cosine/Model.java | 2 +- .../scoring/similarity/cosine/package-info.java | 2 +- .../scoring/similarity/util/package-info.java | 2 +- .../net/urlnormalizer/ajax/AjaxURLNormalizer.java | 2 +- .../urlnormalizer/ajax/TestAjaxURLNormalizer.java | 2 +- .../protocol/ProtocolURLNormalizer.java | 2 +- .../urlnormalizer/slash/SlashURLNormalizer.java | 2 +- .../nutch/crawl/ContinuousCrawlTestUtil.java | 28 +- .../org/apache/nutch/crawl/CrawlDBTestUtil.java | 297 ++++++- .../nutch/crawl/CrawlDbUpdateTestDriver.java | 2 +- .../org/apache/nutch/crawl/CrawlDbUpdateUtil.java | 284 ++++++- .../apache/nutch/crawl/TODOTestCrawlDbStates.java | 31 +- .../org/apache/nutch/crawl/TestCrawlDbFilter.java | 20 +- .../org/apache/nutch/crawl/TestCrawlDbMerger.java | 4 +- .../org/apache/nutch/crawl/TestCrawlDbStates.java | 98 ++- src/test/org/apache/nutch/crawl/TestGenerator.java | 22 +- src/test/org/apache/nutch/crawl/TestInjector.java | 2 +- src/test/org/apache/nutch/fetcher/TestFetcher.java | 6 +- .../apache/nutch/indexer/TestIndexerMapReduce.java | 8 +- .../org/apache/nutch/plugin/TestPluginSystem.java | 14 +- .../apache/nutch/segment/TestSegmentMerger.java | 4 +- .../segment/TestSegmentMergerCrawlDatums.java | 4 +- .../apache/nutch/tools/proxy/SegmentHandler.java | 6 +- 101 files changed, 4781 insertions(+), 3724 deletions(-) copy src/java/org/apache/nutch/util/{NutchJob.java => SegmentReaderUtil.java} (56%) -- To stop receiving notification emails like this one, please contact lewi...@apache.org.