This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git.
from a2f637e Merge pull request #284 from YossiTamari/master
add c93d908 NUTCH-2375 Upgrading nutch to use org.apache.hadoop.mapreduce
add fe5bfb4 Merge branch 'master' into NUTCH-2375
add 405682e Merge branch 'NUTCH-2375' of
https://github.com/Omkar20895/nutch into NUTCH-2375
new 54510e5 Merge pull request #221 from Omkar20895/NUTCH-2375
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/java/org/apache/nutch/crawl/CrawlDb.java | 48 +-
src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 38 +-
src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 46 +-
src/java/org/apache/nutch/crawl/CrawlDbReader.java | 315 ++++----
.../org/apache/nutch/crawl/CrawlDbReducer.java | 44 +-
.../org/apache/nutch/crawl/DeduplicationJob.java | 121 ++-
src/java/org/apache/nutch/crawl/Generator.java | 873 +++++++++++----------
src/java/org/apache/nutch/crawl/LinkDb.java | 226 +++---
src/java/org/apache/nutch/crawl/LinkDbFilter.java | 30 +-
src/java/org/apache/nutch/crawl/LinkDbMerger.java | 90 +--
src/java/org/apache/nutch/crawl/LinkDbReader.java | 52 +-
.../nutch/crawl/MimeAdaptiveFetchSchedule.java | 2 +-
.../org/apache/nutch/crawl/URLPartitioner.java | 15 +-
src/java/org/apache/nutch/fetcher/FetchNode.java | 2 +-
src/java/org/apache/nutch/fetcher/FetchNodeDb.java | 2 +-
src/java/org/apache/nutch/fetcher/Fetcher.java | 576 +++++++-------
.../apache/nutch/fetcher/FetcherOutputFormat.java | 70 +-
.../org/apache/nutch/fetcher/FetcherThread.java | 118 +--
src/java/org/apache/nutch/fetcher/QueueFeeder.java | 26 +-
src/java/org/apache/nutch/hostdb/HostDatum.java | 2 +-
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 +-
.../org/apache/nutch/hostdb/ResolverThread.java | 37 +-
src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 56 +-
.../apache/nutch/hostdb/UpdateHostDbMapper.java | 50 +-
.../apache/nutch/hostdb/UpdateHostDbReducer.java | 52 +-
src/java/org/apache/nutch/indexer/CleaningJob.java | 76 +-
src/java/org/apache/nutch/indexer/IndexWriter.java | 5 +-
.../org/apache/nutch/indexer/IndexWriters.java | 6 +-
.../org/apache/nutch/indexer/IndexerMapReduce.java | 497 ++++++------
.../apache/nutch/indexer/IndexerOutputFormat.java | 22 +-
.../nutch/indexer/IndexingFiltersChecker.java | 6 +-
src/java/org/apache/nutch/indexer/IndexingJob.java | 49 +-
.../org/apache/nutch/net/URLExemptionFilters.java | 2 +-
src/java/org/apache/nutch/parse/ParseCallable.java | 2 +-
.../org/apache/nutch/parse/ParseOutputFormat.java | 117 ++-
src/java/org/apache/nutch/parse/ParseSegment.java | 207 ++---
.../apache/nutch/scoring/webgraph/LinkDumper.java | 164 ++--
.../apache/nutch/scoring/webgraph/LinkRank.java | 484 ++++++------
.../apache/nutch/scoring/webgraph/NodeDumper.java | 317 ++++----
.../apache/nutch/scoring/webgraph/NodeReader.java | 7 +-
.../nutch/scoring/webgraph/ScoreUpdater.java | 146 ++--
.../apache/nutch/scoring/webgraph/WebGraph.java | 656 +++++++++-------
.../nutch/segment/ContentAsTextInputFormat.java | 50 +-
.../org/apache/nutch/segment/SegmentChecker.java | 2 +-
.../org/apache/nutch/segment/SegmentMerger.java | 587 +++++++-------
src/java/org/apache/nutch/segment/SegmentPart.java | 2 +-
.../org/apache/nutch/segment/SegmentReader.java | 183 +++--
.../org/apache/nutch/service/impl/JobFactory.java | 2 +-
.../nutch/service/model/request/JobConfig.java | 2 +-
src/java/org/apache/nutch/tools/Benchmark.java | 10 +-
src/java/org/apache/nutch/tools/FreeGenerator.java | 179 +++--
.../org/apache/nutch/tools/arc/ArcInputFormat.java | 26 +-
.../apache/nutch/tools/arc/ArcRecordReader.java | 22 +-
.../apache/nutch/tools/arc/ArcSegmentCreator.java | 466 +++++------
.../org/apache/nutch/tools/warc/WARCExporter.java | 296 +++----
src/java/org/apache/nutch/util/JexlUtil.java | 2 +-
src/java/org/apache/nutch/util/NutchJob.java | 17 +-
src/java/org/apache/nutch/util/NutchTool.java | 2 +-
.../util/{NutchJob.java => SegmentReaderUtil.java} | 25 +-
.../nutch/webui/client/model/ConnectionStatus.java | 2 +-
.../pages/components/ColorEnumLabelBuilder.java | 2 +-
.../webui/pages/components/CpmIteratorAdapter.java | 2 +-
.../apache/nutch/indexer/geoip/package-info.java | 2 +-
.../indexer/links/TestLinksIndexingFilter.java | 2 +-
.../test/org/apache/nutch/parse/TestOutlinks.java | 2 +-
.../cloudsearch/CloudSearchIndexWriter.java | 9 +-
.../nutch/indexwriter/dummy/DummyIndexWriter.java | 5 +-
.../elasticrest/ElasticRestIndexWriter.java | 32 +-
.../indexwriter/elastic/ElasticConstants.java | 2 +-
.../indexwriter/elastic/ElasticIndexWriter.java | 17 +-
.../elastic/TestElasticIndexWriter.java | 14 +-
.../indexwriter/rabbit/RabbitIndexWriter.java | 3 +-
.../nutch/indexwriter/solr/SolrIndexWriter.java | 17 +-
.../apache/nutch/indexwriter/solr/SolrUtils.java | 10 +-
.../nutch/analysis/lang/HTMLLanguageParser.java | 2 +-
.../nutch/parsefilter/regex/RegexParseFilter.java | 2 +-
.../parsefilter/regex/TestRegexParseFilter.java | 2 +-
.../apache/nutch/protocol/http/HttpResponse.java | 2 +-
.../nutch/scoring/similarity/cosine/Model.java | 2 +-
.../scoring/similarity/cosine/package-info.java | 2 +-
.../scoring/similarity/util/package-info.java | 2 +-
.../net/urlnormalizer/ajax/AjaxURLNormalizer.java | 2 +-
.../urlnormalizer/ajax/TestAjaxURLNormalizer.java | 2 +-
.../protocol/ProtocolURLNormalizer.java | 2 +-
.../urlnormalizer/slash/SlashURLNormalizer.java | 2 +-
.../nutch/crawl/ContinuousCrawlTestUtil.java | 28 +-
.../org/apache/nutch/crawl/CrawlDBTestUtil.java | 297 ++++++-
.../nutch/crawl/CrawlDbUpdateTestDriver.java | 2 +-
.../org/apache/nutch/crawl/CrawlDbUpdateUtil.java | 284 ++++++-
.../apache/nutch/crawl/TODOTestCrawlDbStates.java | 31 +-
.../org/apache/nutch/crawl/TestCrawlDbFilter.java | 20 +-
.../org/apache/nutch/crawl/TestCrawlDbMerger.java | 4 +-
.../org/apache/nutch/crawl/TestCrawlDbStates.java | 98 ++-
src/test/org/apache/nutch/crawl/TestGenerator.java | 22 +-
src/test/org/apache/nutch/crawl/TestInjector.java | 2 +-
src/test/org/apache/nutch/fetcher/TestFetcher.java | 6 +-
.../apache/nutch/indexer/TestIndexerMapReduce.java | 8 +-
.../org/apache/nutch/plugin/TestPluginSystem.java | 14 +-
.../apache/nutch/segment/TestSegmentMerger.java | 4 +-
.../segment/TestSegmentMergerCrawlDatums.java | 4 +-
.../apache/nutch/tools/proxy/SegmentHandler.java | 6 +-
101 files changed, 4781 insertions(+), 3724 deletions(-)
copy src/java/org/apache/nutch/util/{NutchJob.java => SegmentReaderUtil.java}
(56%)
--
To stop receiving notification emails like this one, please contact
[email protected].