This is an automated email from the ASF dual-hosted git repository. claude pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/creadur-rat.git
commit 8fcb1cf497701ca0a3d12ebe61a2abbb516f6ac6 Merge: 261e5624 dd8fc904 Author: Claude Warren <[email protected]> AuthorDate: Sat May 4 16:50:45 2024 +0100 Merge pull request #240 from Claudenw/tika_based_document_analyzer RAT-54: Tika based document analyzer apache-rat-core/pom.xml | 8 +- .../src/main/java/org/apache/rat/Defaults.java | 53 ++- .../src/main/java/org/apache/rat/Report.java | 10 +- .../java/org/apache/rat/ReportConfiguration.java | 36 +- .../rat/analysis/DefaultAnalyserFactory.java | 32 +- .../org/apache/rat/analysis/TikaProcessor.java | 142 ++++++++ .../analysis/matchers/AbstractHeaderMatcher.java | 2 - .../apache/rat/analysis/matchers/NotMatcher.java | 2 - .../src/main/java/org/apache/rat/api/Document.java | 25 +- .../src/main/java/org/apache/rat/api/MetaData.java | 18 + .../rat/document/impl/guesser/ArchiveGuesser.java | 66 ---- .../rat/document/impl/guesser/BinaryGuesser.java | 372 --------------------- .../org/apache/rat/header/LineNumberReader.java | 130 +++---- .../apache/rat/report/claim/ClaimStatistic.java | 142 ++++++-- .../report/claim/impl/AbstractClaimReporter.java | 18 +- .../rat/report/claim/impl/ClaimAggregator.java | 43 +-- .../java/org/apache/rat/walker/ArchiveWalker.java | 28 +- .../org/apache/rat/walker/DirectoryWalker.java | 86 ++--- .../main/java/org/apache/rat/walker/Walker.java | 52 ++- .../org/apache/rat/ReportConfigurationTest.java | 40 ++- .../src/test/java/org/apache/rat/ReportTest.java | 15 +- .../src/test/java/org/apache/rat/ReporterTest.java | 26 +- .../apache/rat/analysis/AnalyserFactoryTest.java | 59 +++- .../org/apache/rat/analysis/TikaProcessorTest.java | 168 ++++++++++ .../document/impl/guesser/ArchiveGuesserTest.java | 66 ---- .../document/impl/guesser/BinaryGuesserTest.java | 150 --------- .../apache/rat/report/ConfigurationReportTest.java | 5 - .../rat/report/xml/XmlReportFactoryTest.java | 19 +- .../org/apache/rat/walker/DirectoryWalkerTest.java | 99 ++++-- .../resources/jira/RAT147/unix-newlines.txt.bin | 8 + .../resources/jira/RAT147/windows-newlines.txt.bin | 9 + .../RAT211/leader-election-message-arrives.dia | Bin 0 -> 5796 bytes .../src/test/resources/jira/RAT211/side_left.bmp | Bin 0 -> 345238 bytes .../src/test/resources/tikaFiles/README.md | 11 + .../src/test/resources/tikaFiles/archive/dummy.jar | Bin 0 -> 6615 bytes .../test/resources/tikaFiles/binary/Defaults.class | Bin 0 -> 7280 bytes .../src/test/resources/tikaFiles/binary/Image.gif | Bin 0 -> 74 bytes .../src/test/resources/tikaFiles/binary/Image.jpeg | Bin 0 -> 4803 bytes .../src/test/resources/tikaFiles/binary/Image.jpg | Bin 0 -> 4803 bytes .../src/test/resources/tikaFiles/binary/Image.png | Bin 0 -> 489 bytes .../src/test/resources/tikaFiles/binary/Image.psd | Bin 0 -> 2499 bytes .../src/test/resources/tikaFiles/binary/Image.xcf | Bin 0 -> 1565 bytes .../test/resources/tikaFiles/binary/KeyStore.jks | Bin 0 -> 4914 bytes .../src/test/resources/tikaFiles/binary/file.json | 7 + .../resources/tikaFiles/binary/maven_libjansi.so | Bin 0 -> 98380 bytes .../src/test/resources/tikaFiles/binary/test.aif | Bin 0 -> 3894 bytes .../src/test/resources/tikaFiles/binary/test.au | Bin 0 -> 3868 bytes .../src/test/resources/tikaFiles/binary/test.exe | Bin 0 -> 1024 bytes .../src/test/resources/tikaFiles/binary/test.flv | Bin 0 -> 90580 bytes .../src/test/resources/tikaFiles/binary/test.iff | Bin 0 -> 1060 bytes .../test/resources/tikaFiles/binary/test.keystore | Bin 0 -> 4914 bytes .../src/test/resources/tikaFiles/binary/test.mid | Bin 0 -> 322 bytes .../src/test/resources/tikaFiles/binary/test.mp3 | Bin 0 -> 2668637 bytes .../src/test/resources/tikaFiles/binary/test.ogg | Bin 0 -> 5418 bytes .../src/test/resources/tikaFiles/binary/test.swf | Bin 0 -> 180 bytes .../resources/tikaFiles/binary/test.truststore | Bin 0 -> 1302 bytes .../src/test/resources/tikaFiles/binary/test.wav | Bin 0 -> 3884 bytes .../src/test/resources/tikaFiles/binary/test.wma | Bin 0 -> 27747 bytes .../resources/tikaFiles/binary/testMP3i18n.mp3 | Bin 0 -> 40832 bytes .../tikaFiles/binary/testMP3i18n_truncated.mp3 | Bin 0 -> 40672 bytes .../resources/tikaFiles/binary/testMP3id3v1.mp3 | Bin 0 -> 39416 bytes .../resources/tikaFiles/binary/testMP3id3v1_v2.mp3 | Bin 0 -> 40960 bytes .../resources/tikaFiles/binary/testMP3id3v2.mp3 | Bin 0 -> 39577 bytes .../resources/tikaFiles/binary/testMP3id3v24.mp3 | Bin 0 -> 39471 bytes .../resources/tikaFiles/binary/testMP3lyrics.mp3 | Bin 0 -> 34688 bytes .../resources/tikaFiles/binary/testMP3noid3.mp3 | Bin 0 -> 39288 bytes .../tikaFiles/binary/testMP3truncated.mp3 | Bin 0 -> 65536 bytes .../test/resources/tikaFiles/binary/testMP4.m4a | Bin 0 -> 4770 bytes .../tikaFiles/binary/testMP4_truncated.m4a | Bin 0 -> 74 bytes .../tikaFiles/binary/testNakedUTF16BOM.mp3 | Bin 0 -> 2625 bytes .../test/resources/tikaFiles/binary/truststore.jks | Bin 0 -> 1302 bytes .../src/test/resources/tikaFiles/ca-cert | 22 ++ .../src/test/resources/tikaFiles/ca-key | 30 ++ .../src/test/resources/tikaFiles/cert-file | 19 ++ .../src/test/resources/tikaFiles/cert-signed | 21 ++ .../src/test/resources/tikaFiles/notice/LICENSE | 202 +++++++++++ .../src/test/resources/tikaFiles/notice/NOTICE | 7 + .../tikaFiles/standard/ChineseCommentsJava.java | 42 +++ .../resources/tikaFiles/standard/HelloWorld.groovy | 24 ++ .../resources/tikaFiles/standard/HelloWorld.java | 24 ++ .../test/resources/tikaFiles/standard/Image.pdf | Bin 0 -> 2390 bytes .../tikaFiles/standard/UTF16_with_signature.xml | Bin 0 -> 2402 bytes .../tikaFiles/standard/UTF8_with_signature.xml | 29 ++ .../src/test/resources/tikaFiles/standard/file.C | 22 ++ .../src/test/resources/tikaFiles/standard/file.CPP | 24 ++ .../src/test/resources/tikaFiles/standard/file.c | 22 ++ .../src/test/resources/tikaFiles/standard/file.c++ | 24 ++ .../src/test/resources/tikaFiles/standard/file.cc | 24 ++ .../src/test/resources/tikaFiles/standard/file.cp | 24 ++ .../src/test/resources/tikaFiles/standard/file.cpp | 24 ++ .../src/test/resources/tikaFiles/standard/file.csv | 1 + .../src/test/resources/tikaFiles/standard/file.cxx | 24 ++ .../test/resources/tikaFiles/standard/file.plain | 20 ++ .../src/test/resources/tikaFiles/standard/file.tsv | 1 + .../java/org/apache/rat/mp/AbstractRatMojo.java | 6 +- .../main/java/org/apache/rat/mp/RatCheckMojo.java | 2 +- .../java/org/apache/rat/mp/RatCheckMojoTest.java | 15 +- .../main/java/org/apache/rat/anttasks/Report.java | 2 +- checkstyle-suppressions.xml | 26 ++ pom.xml | 10 +- src/changes/changes.xml | 19 ++ 101 files changed, 1579 insertions(+), 1078 deletions(-)
