This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 7cb7abde6e3fae7c2ae70865e7ee2ee5ff95e2ba Merge: 31819b7 303fd19 Author: Lewis John McGibbney <[email protected]> AuthorDate: Tue Mar 27 06:49:09 2018 -0700 Merge pull request #295 from lewismc/NUTCH-2516 NUTCH-2516 Hadoop imports use wildcards .gitignore | 6 + ivy/ivy-2.4.0.jar | Bin 1282424 -> 0 bytes src/java/org/apache/nutch/crawl/CrawlDatum.java | 28 ++- src/java/org/apache/nutch/crawl/CrawlDb.java | 30 ++- src/java/org/apache/nutch/crawl/CrawlDbFilter.java | 1 - src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 15 +- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 4 - .../org/apache/nutch/crawl/CrawlDbReducer.java | 8 +- .../org/apache/nutch/crawl/DeduplicationJob.java | 4 - src/java/org/apache/nutch/crawl/Generator.java | 31 ++- src/java/org/apache/nutch/crawl/Inlink.java | 8 +- src/java/org/apache/nutch/crawl/Inlinks.java | 19 +- src/java/org/apache/nutch/crawl/LinkDbFilter.java | 2 - src/java/org/apache/nutch/crawl/LinkDbMerger.java | 1 - src/java/org/apache/nutch/crawl/LinkDbReader.java | 19 +- .../org/apache/nutch/crawl/SignatureFactory.java | 1 - .../org/apache/nutch/crawl/URLPartitioner.java | 3 +- src/java/org/apache/nutch/fetcher/FetchNodeDb.java | 1 - src/java/org/apache/nutch/fetcher/Fetcher.java | 28 ++- .../apache/nutch/fetcher/FetcherOutputFormat.java | 5 - .../org/apache/nutch/fetcher/FetcherThread.java | 2 - .../apache/nutch/fetcher/FetcherThreadEvent.java | 1 - src/java/org/apache/nutch/fetcher/QueueFeeder.java | 1 - src/java/org/apache/nutch/hostdb/HostDatum.java | 2 - src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 - src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 6 - .../apache/nutch/hostdb/UpdateHostDbMapper.java | 4 - .../apache/nutch/hostdb/UpdateHostDbReducer.java | 3 - src/java/org/apache/nutch/indexer/CleaningJob.java | 3 - src/java/org/apache/nutch/indexer/IndexWriter.java | 1 - .../org/apache/nutch/indexer/IndexWriters.java | 1 - .../org/apache/nutch/indexer/IndexerMapReduce.java | 5 - .../apache/nutch/indexer/IndexerOutputFormat.java | 1 - .../org/apache/nutch/indexer/IndexingFilter.java | 2 - .../org/apache/nutch/indexer/IndexingFilters.java | 1 - .../nutch/indexer/IndexingFiltersChecker.java | 1 - src/java/org/apache/nutch/indexer/IndexingJob.java | 1 - src/java/org/apache/nutch/indexer/NutchField.java | 17 +- .../org/apache/nutch/metadata/CreativeCommons.java | 6 +- .../org/apache/nutch/metadata/HttpHeaders.java | 18 +- .../org/apache/nutch/net/URLExemptionFilter.java | 3 +- src/java/org/apache/nutch/net/URLFilter.java | 2 - .../org/apache/nutch/net/URLFilterChecker.java | 7 - .../org/apache/nutch/net/URLNormalizerChecker.java | 7 - .../org/apache/nutch/net/protocols/Response.java | 2 - .../org/apache/nutch/parse/HtmlParseFilter.java | 3 - src/java/org/apache/nutch/parse/ParseData.java | 16 +- src/java/org/apache/nutch/parse/ParseImpl.java | 7 +- .../org/apache/nutch/parse/ParseOutputFormat.java | 19 +- .../org/apache/nutch/parse/ParsePluginList.java | 1 - .../org/apache/nutch/parse/ParsePluginsReader.java | 4 - src/java/org/apache/nutch/parse/ParseSegment.java | 39 ++-- src/java/org/apache/nutch/parse/ParseText.java | 24 +- src/java/org/apache/nutch/parse/ParseUtil.java | 2 - src/java/org/apache/nutch/parse/Parser.java | 2 - src/java/org/apache/nutch/parse/ParserFactory.java | 4 - src/java/org/apache/nutch/protocol/Content.java | 3 - src/java/org/apache/nutch/protocol/Protocol.java | 2 - .../org/apache/nutch/protocol/ProtocolFactory.java | 6 +- .../apache/nutch/protocol/RobotRulesParser.java | 3 - .../apache/nutch/scoring/webgraph/LinkDumper.java | 4 - .../apache/nutch/scoring/webgraph/LinkRank.java | 2 - .../apache/nutch/scoring/webgraph/NodeDumper.java | 2 - .../apache/nutch/scoring/webgraph/NodeReader.java | 1 - .../nutch/scoring/webgraph/ScoreUpdater.java | 2 - .../apache/nutch/scoring/webgraph/WebGraph.java | 2 - .../org/apache/nutch/segment/SegmentReader.java | 9 +- src/java/org/apache/nutch/service/NutchReader.java | 1 - .../org/apache/nutch/service/impl/NodeReader.java | 1 - .../service/impl/NutchServerPoolExecutor.java | 2 - .../nutch/service/model/request/JobConfig.java | 1 - .../nutch/service/resources/ConfigResource.java | 3 - .../nutch/service/resources/SeedResource.java | 1 - .../nutch/tools/AbstractCommonCrawlFormat.java | 2 - .../apache/nutch/tools/CommonCrawlDataDumper.java | 5 - .../apache/nutch/tools/CommonCrawlFormatWARC.java | 4 +- src/java/org/apache/nutch/tools/DmozParser.java | 50 +++-- src/java/org/apache/nutch/tools/FileDumper.java | 5 - src/java/org/apache/nutch/tools/FreeGenerator.java | 2 - src/java/org/apache/nutch/tools/WARCUtils.java | 218 +++++++++--------- .../apache/nutch/tools/arc/ArcSegmentCreator.java | 1 - .../org/apache/nutch/tools/warc/WARCExporter.java | 2 - src/java/org/apache/nutch/util/DeflateUtils.java | 1 - src/java/org/apache/nutch/util/DomUtil.java | 1 - src/java/org/apache/nutch/util/GZIPUtils.java | 1 - src/java/org/apache/nutch/util/MimeUtil.java | 5 - .../nutch/util/ProtocolStatusStatistics.java | 1 - src/java/org/apache/nutch/util/URLUtil.java | 4 +- .../org/apache/nutch/webui/model/NutchConfig.java | 16 ++ .../creativecommons/nutch/CCIndexingFilter.java | 2 +- .../org/creativecommons/nutch/CCParseFilter.java | 38 ++-- .../nutch/indexer/feed/FeedIndexingFilter.java | 2 - .../org/apache/nutch/parse/feed/FeedParser.java | 4 - .../nutch/parse/headings/HeadingsParseFilter.java | 9 +- .../nutch/indexer/more/MoreIndexingFilter.java | 4 +- .../nutch/indexer/replace/FieldReplacer.java | 6 +- .../elasticrest/ElasticRestIndexWriter.java | 1 - .../indexwriter/elastic/ElasticIndexWriter.java | 3 +- .../indexwriter/rabbit/RabbitIndexWriter.java | 246 ++++++++++----------- .../indexwriter/rabbit/RabbitMQConstants.java | 24 +- .../nutch/indexwriter/rabbit/RabbitMessage.java | 72 +++--- .../nutch/indexwriter/solr/SolrIndexWriter.java | 7 - .../apache/nutch/indexwriter/solr/SolrUtils.java | 1 - .../nutch/analysis/lang/HTMLLanguageParser.java | 1 - .../analysis/lang/LanguageIndexingFilter.java | 2 - .../apache/nutch/protocol/http/api/HttpBase.java | 5 - .../nutch/protocol/http/api/HttpException.java | 1 - .../nutch/urlfilter/api/RegexURLFilterBase.java | 7 +- .../microformats/reltag/RelTagIndexingFilter.java | 2 - .../nutch/microformats/reltag/RelTagParser.java | 4 - .../indexer/filter/MimeTypeIndexingFilter.java | 1 - .../apache/nutch/parse/html/DOMContentUtils.java | 6 +- .../apache/nutch/parse/html/HTMLMetaProcessor.java | 4 +- .../org/apache/nutch/parse/html/HtmlParser.java | 40 +++- .../java/org/apache/nutch/parse/swf/SWFParser.java | 88 ++++---- .../parse/tika/BoilerpipeExtractorRepository.java | 6 +- .../apache/nutch/parse/tika/HTMLMetaProcessor.java | 4 +- .../org/apache/nutch/parse/tika/TikaParser.java | 1 - .../apache/nutch/parse/zip/ZipTextExtractor.java | 4 - .../nutch/parsefilter/naivebayes/Classify.java | 1 - .../nutch/parsefilter/regex/RegexParseFilter.java | 9 +- .../apache/nutch/protocol/file/FileResponse.java | 4 - .../java/org/apache/nutch/protocol/ftp/Client.java | 1 - .../nutch/protocol/ftp/FtpRobotRulesParser.java | 1 - .../java/org/apache/nutch/protocol/http/Http.java | 4 - .../org/apache/nutch/protocol/httpclient/Http.java | 6 - .../httpclient/HttpAuthenticationFactory.java | 4 - .../httpclient/HttpBasicAuthentication.java | 4 - .../nutch/protocol/httpclient/HttpResponse.java | 3 - .../nutch/protocol/interactiveselenium/Http.java | 1 - .../protocol/interactiveselenium/HttpResponse.java | 1 - .../handlers/DefaultHandler.java | 15 +- .../handlers/InteractiveSeleniumHandler.java | 2 +- .../org/apache/nutch/protocol/selenium/Http.java | 1 - .../nutch/protocol/selenium/HttpResponse.java | 1 - .../nutch/scoring/opic/OPICScoringFilter.java | 1 - .../similarity/SimilarityScoringFilter.java | 1 - .../similarity/cosine/CosineSimilarity.java | 1 - .../nutch/scoring/similarity/cosine/Model.java | 7 - .../similarity/util/LuceneAnalyzerUtil.java | 1 - .../urlfilter/automaton/AutomatonURLFilter.java | 4 - .../urlfilter/ignoreexempt/ExemptionUrlFilter.java | 4 - .../nutch/urlfilter/prefix/PrefixURLFilter.java | 6 +- .../nutch/urlfilter/regex/RegexURLFilter.java | 1 - .../nutch/urlfilter/suffix/SuffixURLFilter.java | 3 +- .../net/urlnormalizer/ajax/AjaxURLNormalizer.java | 2 - .../net/urlnormalizer/host/HostURLNormalizer.java | 1 - .../protocol/ProtocolURLNormalizer.java | 1 - .../querystring/QuerystringURLNormalizer.java | 4 - .../urlnormalizer/slash/SlashURLNormalizer.java | 7 +- 150 files changed, 706 insertions(+), 791 deletions(-) -- To stop receiving notification emails like this one, please contact [email protected].
