This is an automated email from the ASF dual-hosted git repository.
claude pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/creadur-rat.git
The following commit(s) were added to refs/heads/master by this push:
new 0a9559e1 RAT-81: Fixed encoding issue causing text files to not be
read properly (#395)
0a9559e1 is described below
commit 0a9559e1d8726ba16c933ec560f7e5d42400360e
Author: Claude Warren <[email protected]>
AuthorDate: Mon Nov 18 17:21:02 2024 +0000
RAT-81: Fixed encoding issue causing text files to not be read properly
(#395)
* Fixed encoding issue where text files not in UTF-8 encoding would not be
properly.
Change adds charset to the metadata when it can be discovered. If not UTF8
is returned.
Added integration test RAT-81 to show reading of UTF8 and IBM037 encoding
works.
* Minor fixes
* RAT-81: Add changelog about encoding bugfix
* added logging and removed dead code
* fix for RAT-96
Added mediaType and encoding attributes to XML output.
Added updated DefaultAnalyserFactoryTests to account for change
Added integration tests for RAT-147 and RAT-211 based on code in
DefaultAnalyserFactoryTests
Updated ReportTest to add dependencies and package jar to classpath for
test.
Fixed testing issues in Ant unit caused by addition of mediatype and
attributes.
renamed reportTest directories to use a '_' rather than a '-' to account
for java package names.
* RAT-81: groovify the test code, minor fixes
* RAT-81: Add mediaType and encoding to RAT report, minor fixes
---------
Co-authored-by: P. Ottlinger <[email protected]>
Co-authored-by: P. Ottlinger <[email protected]>
---
apache-rat-core/pom.xml | 4 +
.../src/it/java/org/apache/rat/ReportTest.java | 11 ++-
.../it/resources/ReportTest/RAT-246/src/.gitignore | 1 -
.../ReportTest/RAT-246/src/dir1/FileToIgnore | 0
.../resources/ReportTest/RAT_147/commandLine.txt | 4 +
.../ReportTest/RAT_147/src/unix-newlines.txt.bin | 8 ++
.../RAT_147/src/windows-newlines.txt.bin | 9 +++
.../it/resources/ReportTest/RAT_147/verify.groovy | 62 +++++++++++++++
.../resources/ReportTest/RAT_211/commandLine.txt | 6 ++
.../src/leader-election-message-arrives.dia | Bin 0 -> 5796 bytes
.../resources/ReportTest/RAT_211/src/side_left.bmp | Bin 0 -> 345238 bytes
.../it/resources/ReportTest/RAT_211/verify.groovy | 56 ++++++++++++++
.../{RAT-246 => RAT_246}/commandLine.txt | 0
.../ReportTest/{RAT-246 => RAT_246}/notes.md | 0
.../{RAT-246 => RAT_246}/src/dir1/FileToProcess | 0
.../ReportTest/{RAT-246 => RAT_246}/verify.groovy | 6 +-
.../{RAT-408 => RAT_408}/commandLine.txt | 0
.../{RAT-408 => RAT_408}/expected-message.txt | 0
.../src/karapace/anonymize_schemas/_init_.py | 0
.../it/resources/ReportTest/RAT_81/commandLine.txt | 2 +
.../it/resources/ReportTest/RAT_81/src/IBM037.txt | 1 +
.../it/resources/ReportTest/RAT_81/src/UTF8.txt | 20 +++++
.../it/resources/ReportTest/RAT_81/verify.groovy | 61 +++++++++++++++
.../src/it/resources/ReportTest/readme.md | 2 +
.../org/apache/rat/analysis/TikaProcessor.java | 46 ++++++++++-
.../src/main/java/org/apache/rat/api/Document.java | 12 ++-
.../src/main/java/org/apache/rat/api/MetaData.java | 19 +++++
.../apache/rat/document/ArchiveEntryDocument.java | 8 --
.../java/org/apache/rat/document/FileDocument.java | 8 +-
.../org/apache/rat/report/xml/XmlElements.java | 62 ++++++++++-----
.../main/resources/org/apache/rat/rat-report.xsd | 2 +
.../rat/analysis/DefaultAnalyserFactoryTest.java | 84 ++++++++++-----------
.../resources/antunit/report-normal-operation.xml | 2 +-
pom.xml | 8 +-
src/changes/changes.xml | 9 +++
35 files changed, 419 insertions(+), 94 deletions(-)
diff --git a/apache-rat-core/pom.xml b/apache-rat-core/pom.xml
index 145cd4f7..8a500a5f 100644
--- a/apache-rat-core/pom.xml
+++ b/apache-rat-core/pom.xml
@@ -196,6 +196,10 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-text-module</artifactId>
+ </dependency>
<dependency>
<!-- this dependency is actually used by integeration testing code -->
<groupId>org.codehaus.groovy</groupId>
diff --git a/apache-rat-core/src/it/java/org/apache/rat/ReportTest.java
b/apache-rat-core/src/it/java/org/apache/rat/ReportTest.java
index 6654ed58..45d74840 100644
--- a/apache-rat-core/src/it/java/org/apache/rat/ReportTest.java
+++ b/apache-rat-core/src/it/java/org/apache/rat/ReportTest.java
@@ -18,7 +18,6 @@
*/
package org.apache.rat;
-import groovy.lang.Binding;
import groovy.lang.GroovyShell;
import java.io.File;
import java.io.FileReader;
@@ -48,6 +47,7 @@ import org.apache.rat.report.RatReport;
import org.apache.rat.utils.DefaultLog;
import org.apache.rat.utils.Log;
import org.apache.rat.walker.DirectoryWalker;
+import org.codehaus.groovy.control.CompilerConfiguration;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
@@ -123,9 +123,12 @@ public class ReportTest {
File groovyScript = new File(baseDir, "verify.groovy");
if (groovyScript.exists()) {
// call groovy expressions from Java code
- Binding binding = new Binding();
- GroovyShell shell = new GroovyShell(binding);
+ CompilerConfiguration compilerConfiguration = new
CompilerConfiguration();
+ GroovyShell shell = new GroovyShell(compilerConfiguration);
+ for (String classPath :
System.getProperty("java.class.path").split(File.pathSeparator)) {
+ shell.getClassLoader().addClasspath(classPath);
+ }
Object value = shell.run(groovyScript, new
String[]{outputFile.getAbsolutePath(), logFile.getAbsolutePath()});
if (value != null) {
fail(String.format("%s",value));
@@ -194,7 +197,7 @@ public class ReportTest {
@Override
public void log(Level level, String msg) {
if (isEnabled(level)) {
- logFile.println(String.format("%s: %s", level, msg));
+ logFile.printf("%s: %s%n", level, msg);
}
}
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/.gitignore
b/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/.gitignore
deleted file mode 100644
index 4b2162f1..00000000
--- a/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-**/FileToIgnore
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/dir1/FileToIgnore
b/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/dir1/FileToIgnore
deleted file mode 100644
index e69de29b..00000000
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_147/commandLine.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/commandLine.txt
new file mode 100644
index 00000000..91180535
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_147/commandLine.txt
@@ -0,0 +1,4 @@
+--counter-max
+UNAPPROVED:2
+--output-style
+xml
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/unix-newlines.txt.bin
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/unix-newlines.txt.bin
new file mode 100644
index 00000000..2c498da3
--- /dev/null
+++
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/unix-newlines.txt.bin
@@ -0,0 +1,8 @@
+sentence 1.
+sentence 2.
+
+
+sentence 3.
+
+sentence 4.
+
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/windows-newlines.txt.bin
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/windows-newlines.txt.bin
new file mode 100644
index 00000000..a0adb98f
--- /dev/null
+++
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/src/windows-newlines.txt.bin
@@ -0,0 +1,9 @@
+sentence 1.
+sentence 2.
+
+
+sentence 3.
+
+sentence 4.
+
+
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_147/verify.groovy
b/apache-rat-core/src/it/resources/ReportTest/RAT_147/verify.groovy
new file mode 100644
index 00000000..e531e870
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_147/verify.groovy
@@ -0,0 +1,62 @@
+package ReportTest.RAT_246
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.rat.testhelpers.TextUtils
+import org.apache.rat.testhelpers.XmlUtils
+import org.w3c.dom.NodeList
+
+import javax.xml.xpath.XPath
+import javax.xml.xpath.XPathFactory
+
+import static org.junit.jupiter.api.Assertions.assertEquals
+
+doc = XmlUtils.toDom(new FileInputStream(args[0]))
+XPath xPath = XPathFactory.newInstance().newXPath()
+
+NodeList nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/unix-newlines.txt.bin']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("UTF-8", attributes.getNamedItem("encoding").getNodeValue())
+assertEquals("text/plain", attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("STANDARD", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("false", attributes.getNamedItem("approval").getNodeValue())
+
+
+nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/windows-newlines.txt.bin']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("UTF-8", attributes.getNamedItem("encoding").getNodeValue())
+assertEquals("text/plain", attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("STANDARD", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("false", attributes.getNamedItem("approval").getNodeValue())
+
+logOutput = new File(args[1])
+log = logOutput.text
+
+TextUtils.assertPatternNotInTarget("^ERROR:", log)
+TextUtils.assertPatternNotInTarget("^WARN:", log)
\ No newline at end of file
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_211/commandLine.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/commandLine.txt
new file mode 100644
index 00000000..119348ed
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_211/commandLine.txt
@@ -0,0 +1,6 @@
+--counter-min
+LICENSE_CATEGORIES:0
+LICENSE_NAMES:0
+STANDARDS:0
+--output-style
+xml
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/leader-election-message-arrives.dia
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/leader-election-message-arrives.dia
new file mode 100644
index 00000000..41fa5300
Binary files /dev/null and
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/leader-election-message-arrives.dia
differ
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/side_left.bmp
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/side_left.bmp
new file mode 100644
index 00000000..c1d3d69a
Binary files /dev/null and
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/src/side_left.bmp differ
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_211/verify.groovy
b/apache-rat-core/src/it/resources/ReportTest/RAT_211/verify.groovy
new file mode 100644
index 00000000..288fe4a6
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_211/verify.groovy
@@ -0,0 +1,56 @@
+package ReportTest.RAT_211
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.rat.testhelpers.TextUtils
+import org.apache.rat.testhelpers.XmlUtils
+import org.w3c.dom.NodeList
+
+import javax.xml.xpath.XPath
+import javax.xml.xpath.XPathFactory
+
+import static org.junit.jupiter.api.Assertions.assertEquals
+import static org.junit.jupiter.api.Assertions.assertNull
+
+doc = XmlUtils.toDom(new FileInputStream(args[0]))
+XPath xPath = XPathFactory.newInstance().newXPath()
+
+NodeList nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/leader-election-message-arrives.dia']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertNull(attributes.getNamedItem("encoding"), "There should not be an
encoding")
+assertEquals("application/gzip",
attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("ARCHIVE", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(0, nodeList.getLength())
+
+nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/side_left.bmp']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertNull(attributes.getNamedItem("encoding"), "There should not be an
encoding")
+assertEquals("image/bmp", attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("BINARY", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(0, nodeList.getLength())
+
+logOutput = new File(args[1])
+log = logOutput.text
+
+TextUtils.assertPatternNotInTarget("^ERROR:", log)
+TextUtils.assertPatternNotInTarget("^WARN:", log)
\ No newline at end of file
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-246/commandLine.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_246/commandLine.txt
similarity index 100%
rename from apache-rat-core/src/it/resources/ReportTest/RAT-246/commandLine.txt
rename to apache-rat-core/src/it/resources/ReportTest/RAT_246/commandLine.txt
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT-246/notes.md
b/apache-rat-core/src/it/resources/ReportTest/RAT_246/notes.md
similarity index 100%
rename from apache-rat-core/src/it/resources/ReportTest/RAT-246/notes.md
rename to apache-rat-core/src/it/resources/ReportTest/RAT_246/notes.md
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-246/src/dir1/FileToProcess
b/apache-rat-core/src/it/resources/ReportTest/RAT_246/src/dir1/FileToProcess
similarity index 100%
rename from
apache-rat-core/src/it/resources/ReportTest/RAT-246/src/dir1/FileToProcess
rename to
apache-rat-core/src/it/resources/ReportTest/RAT_246/src/dir1/FileToProcess
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT-246/verify.groovy
b/apache-rat-core/src/it/resources/ReportTest/RAT_246/verify.groovy
similarity index 90%
rename from apache-rat-core/src/it/resources/ReportTest/RAT-246/verify.groovy
rename to apache-rat-core/src/it/resources/ReportTest/RAT_246/verify.groovy
index 5d089962..1be9253b 100644
--- a/apache-rat-core/src/it/resources/ReportTest/RAT-246/verify.groovy
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_246/verify.groovy
@@ -16,14 +16,14 @@ package ReportTest.RAT_246
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-output = new File(args[0]);
+output = new File(args[0])
content = output.text
assert !content.contains('/dir1/FileToIgnore')
assert content.contains('/dir1/FileToProcess')
-logOutput = new File(args[1]);
+logOutput = new File(args[1])
log = logOutput.text
assert log.contains('INFO: Processing exclude file from GIT.')
-assert log.contains("INFO: Excluding GIT collection.")
+assert log.contains("INFO: Excluding GIT collection.")
\ No newline at end of file
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-408/commandLine.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_408/commandLine.txt
similarity index 100%
rename from apache-rat-core/src/it/resources/ReportTest/RAT-408/commandLine.txt
rename to apache-rat-core/src/it/resources/ReportTest/RAT_408/commandLine.txt
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-408/expected-message.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_408/expected-message.txt
similarity index 100%
rename from
apache-rat-core/src/it/resources/ReportTest/RAT-408/expected-message.txt
rename to
apache-rat-core/src/it/resources/ReportTest/RAT_408/expected-message.txt
diff --git
a/apache-rat-core/src/it/resources/ReportTest/RAT-408/src/karapace/anonymize_schemas/_init_.py
b/apache-rat-core/src/it/resources/ReportTest/RAT_408/src/karapace/anonymize_schemas/_init_.py
similarity index 100%
rename from
apache-rat-core/src/it/resources/ReportTest/RAT-408/src/karapace/anonymize_schemas/_init_.py
rename to
apache-rat-core/src/it/resources/ReportTest/RAT_408/src/karapace/anonymize_schemas/_init_.py
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_81/commandLine.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_81/commandLine.txt
new file mode 100644
index 00000000..6981deb8
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_81/commandLine.txt
@@ -0,0 +1,2 @@
+--output-style
+xml
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/IBM037.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/IBM037.txt
new file mode 100644
index 00000000..fc5a8995
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/IBM037.txt
@@ -0,0 +1 @@
+a\@\@Ӊ������@��@���@������@▆�����@Ɩ��������@M���]@�����@���@@@\@\@��@����@�����������@�������@����������K@@ⅅ@���@������@����@\@\@�����������@����@����@����@���@����������@�����������@@@@@@@@\@\@���������@���������@���������K@@㈅@���@��������@����@����@@@\@\@��@���@�����@���@������@Ӊ�����k@兙����@�K�@M���@@@@@@@@@@@@\@\@Ӊ�����]^@���@���@���@���@����@����@������@��@����������@@@\@\@����@���@Ӊ�����K@@薤@���@������@�@�
�����@��@���@Ӊ�����@��@@@\@\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ [...]
\ No newline at end of file
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/UTF8.txt
b/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/UTF8.txt
new file mode 100644
index 00000000..175b1890
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_81/src/UTF8.txt
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ */
+
+This is a file that has the IBM037 encoding. Hopefully it will not fail.
diff --git a/apache-rat-core/src/it/resources/ReportTest/RAT_81/verify.groovy
b/apache-rat-core/src/it/resources/ReportTest/RAT_81/verify.groovy
new file mode 100644
index 00000000..a3a78c3a
--- /dev/null
+++ b/apache-rat-core/src/it/resources/ReportTest/RAT_81/verify.groovy
@@ -0,0 +1,61 @@
+package ReportTest.RAT_81
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.rat.testhelpers.TextUtils
+import org.apache.rat.testhelpers.XmlUtils
+import org.w3c.dom.NodeList
+
+import javax.xml.xpath.XPath
+import javax.xml.xpath.XPathFactory
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+doc = XmlUtils.toDom(new FileInputStream(args[0]))
+XPath xPath = XPathFactory.newInstance().newXPath()
+
+NodeList nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/IBM037.txt']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("IBM500", attributes.getNamedItem("encoding").getNodeValue())
+assertEquals("text/plain", attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("STANDARD", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("true", attributes.getNamedItem("approval").getNodeValue())
+
+nodeList = XmlUtils.getNodeList(doc, xPath,
"/rat-report/resource[@name='/UTF8.txt']")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("ISO-8859-1", attributes.getNamedItem("encoding").getNodeValue())
+assertEquals("text/plain", attributes.getNamedItem("mediaType").getNodeValue())
+assertEquals("STANDARD", attributes.getNamedItem("type").getNodeValue())
+nodeList = XmlUtils.getNodeList(node, xPath, "license")
+assertEquals(1, nodeList.getLength())
+node = nodeList.item(0)
+attributes = node.getAttributes()
+assertEquals("true", attributes.getNamedItem("approval").getNodeValue())
+
+logOutput = new File(args[1])
+log = logOutput.text
+
+TextUtils.assertPatternNotInTarget("^ERROR:", log)
+TextUtils.assertPatternNotInTarget("^WARN:", log)
\ No newline at end of file
diff --git a/apache-rat-core/src/it/resources/ReportTest/readme.md
b/apache-rat-core/src/it/resources/ReportTest/readme.md
index 6c9937e4..f46013fc 100644
--- a/apache-rat-core/src/it/resources/ReportTest/readme.md
+++ b/apache-rat-core/src/it/resources/ReportTest/readme.md
@@ -1 +1,3 @@
Directories here are structured to be read by ReportTest.
+
+Directories should be named `RAT_###` to correspond with `RAT-###` jira
tickets where the association exists.
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
index 12658eb4..c88328bc 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/analysis/TikaProcessor.java
@@ -18,18 +18,25 @@
*/
package org.apache.rat.analysis;
+import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
import java.util.HashMap;
import java.util.Map;
import org.apache.rat.api.Document;
+import org.apache.rat.document.DocumentName;
import org.apache.rat.document.RatDocumentAnalysisException;
import org.apache.rat.document.guesser.NoteGuesser;
+import org.apache.rat.utils.DefaultLog;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.txt.CharsetDetector;
+import org.apache.tika.parser.txt.CharsetMatch;
/**
* A wrapping around the tika processor.
@@ -105,6 +112,15 @@ public final class TikaProcessor {
return new HashMap<>(DOCUMENT_TYPE_MAP);
}
+ /**
+ * Ensures that the input stream supports {@code mark}.
+ * @param stream the stream to test.
+ * @return a stream that supports {@code mark}.
+ */
+ public static InputStream markSupportedInputStream(final InputStream
stream) {
+ return stream.markSupported() ? stream : new
BufferedInputStream(stream);
+ }
+
/**
* Process the input document.
* @param document the Document to process.
@@ -113,7 +129,7 @@ public final class TikaProcessor {
*/
public static String process(final Document document) throws
RatDocumentAnalysisException {
Metadata metadata = new Metadata();
- try (InputStream stream = document.inputStream()) {
+ try (InputStream stream =
markSupportedInputStream(document.inputStream())) {
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY,
document.getName().getName());
String result = TIKA.detect(stream, metadata);
String[] parts = result.split("/");
@@ -122,6 +138,7 @@ public final class TikaProcessor {
document.getMetaData()
.setDocumentType(fromMediaType(mediaType));
if (Document.Type.STANDARD ==
document.getMetaData().getDocumentType()) {
+ document.getMetaData().setCharset(detectCharset(stream,
document.getName()));
if (NoteGuesser.isNote(document)) {
document.getMetaData().setDocumentType(Document.Type.NOTICE);
}
@@ -133,6 +150,33 @@ public final class TikaProcessor {
}
}
+ /**
+ * Determine the character set for the input stream. Input stream must
implement {@code mark}.
+ * @param stream the stream to check.
+ * @param documentName the name of the document being read.
+ * @return the detected character set or {@code null} if not detectable.
+ * @throws IOException on IO error.
+ */
+ private static Charset detectCharset(final InputStream stream, final
DocumentName documentName) throws IOException {
+ CharsetDetector encodingDetector = new CharsetDetector();
+ encodingDetector.setText(stream);
+ CharsetMatch charsetMatch = encodingDetector.detect();
+ if (charsetMatch != null) {
+ try {
+ return Charset.forName(charsetMatch.getName());
+ } catch (UnsupportedCharsetException e) {
+ DefaultLog.getInstance().warn(String.format("Unsupported
character set '%s' in file '%s'. Will use system default encoding.",
+ charsetMatch.getName(), documentName));
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Gets the Document.Type based on the MediaType.
+ * @param mediaType the media type to check.
+ * @return The document type.
+ */
public static Document.Type fromMediaType(final MediaType mediaType) {
if ("text".equals(mediaType.getType())) {
return Document.Type.STANDARD;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
index 7628e6b4..f5fd2e94 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/Document.java
@@ -23,8 +23,10 @@ import java.io.InputStream;
import java.io.Reader;
import java.util.SortedSet;
+import org.apache.rat.analysis.TikaProcessor;
import org.apache.rat.document.DocumentName;
import org.apache.rat.document.DocumentNameMatcher;
+import org.apache.tika.parser.txt.CharsetDetector;
/**
* The representation of a document being scanned.
@@ -46,7 +48,7 @@ public abstract class Document implements
Comparable<Document> {
/** A standard document. */
STANDARD,
/** An unknown document type. */
- UNKNOWN;
+ UNKNOWN
}
/** The path matcher used by this document */
@@ -104,14 +106,16 @@ public abstract class Document implements
Comparable<Document> {
/**
* Reads the contents of this document.
* @return <code>Reader</code> not null
- * @throws IOException if this document cannot be read
+ * @throws IOException if this document cannot be read.
*/
- public abstract Reader reader() throws IOException;
+ public Reader reader() throws IOException {
+ return new
CharsetDetector().getReader(TikaProcessor.markSupportedInputStream(inputStream()),
getMetaData().getCharset().name());
+ }
/**
* Streams the document's contents.
* @return a non-null input stream of the document.
- * @throws IOException when stream could not be opened
+ * @throws IOException when stream could not be opened.
*/
public abstract InputStream inputStream() throws IOException;
diff --git a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
index 806e0c0d..e73bb86e 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/api/MetaData.java
@@ -18,6 +18,7 @@
*/
package org.apache.rat.api;
+import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Set;
import java.util.SortedSet;
@@ -38,6 +39,8 @@ public class MetaData {
private final SortedSet<ILicense> matchedLicenses;
/** The list of License Family Categories that are approved */
private final Set<String> approvedLicenses;
+ /** The charset for this document */
+ private Charset charset;
/** The media type for this document */
private MediaType mediaType;
/** The document type for this document */
@@ -53,6 +56,22 @@ public class MetaData {
this.approvedLicenses = new HashSet<>();
}
+ /**
+ * Gets the charset for the document. If the charset was not set will
return the system default charset.
+ * @return the charset for the document
+ */
+ public Charset getCharset() {
+ return charset == null ? Charset.defaultCharset() : charset;
+ }
+
+ /**
+ * Sets the charset for the document. If set to {@code null} the system
default charset will be used.
+ * @param charset the charset to use.
+ */
+ public void setCharset(final Charset charset) {
+ this.charset = charset;
+ }
+
/**
* Gets the defined media type.
* @return the media type.
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/document/ArchiveEntryDocument.java
b/apache-rat-core/src/main/java/org/apache/rat/document/ArchiveEntryDocument.java
index e25e44db..0553e433 100644
---
a/apache-rat-core/src/main/java/org/apache/rat/document/ArchiveEntryDocument.java
+++
b/apache-rat-core/src/main/java/org/apache/rat/document/ArchiveEntryDocument.java
@@ -21,9 +21,6 @@ package org.apache.rat.document;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.SortedSet;
@@ -62,9 +59,4 @@ public class ArchiveEntryDocument extends Document {
public SortedSet<Document> listChildren() {
return Collections.emptySortedSet();
}
-
- @Override
- public Reader reader() {
- return new InputStreamReader(new ByteArrayInputStream(contents),
StandardCharsets.UTF_8);
- }
}
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/document/FileDocument.java
b/apache-rat-core/src/main/java/org/apache/rat/document/FileDocument.java
index 37bbfe79..31a7deb3 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/document/FileDocument.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/document/FileDocument.java
@@ -19,10 +19,8 @@
package org.apache.rat.document;
import java.io.File;
-import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
-import java.io.Reader;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Collections;
@@ -33,7 +31,7 @@ import org.apache.rat.api.Document;
import org.apache.rat.config.exclusion.ExclusionUtils;
/**
- * Document wrapping a File object
+ * Document wrapping a File object.
*/
public class FileDocument extends Document {
@@ -69,10 +67,6 @@ public class FileDocument extends Document {
return Collections.emptySortedSet();
}
- public Reader reader() throws IOException {
- return new FileReader(file);
- }
-
public InputStream inputStream() throws IOException {
return Files.newInputStream(file.toPath());
}
diff --git
a/apache-rat-core/src/main/java/org/apache/rat/report/xml/XmlElements.java
b/apache-rat-core/src/main/java/org/apache/rat/report/xml/XmlElements.java
index 6b1ef68b..82ea5ea1 100644
--- a/apache-rat-core/src/main/java/org/apache/rat/report/xml/XmlElements.java
+++ b/apache-rat-core/src/main/java/org/apache/rat/report/xml/XmlElements.java
@@ -30,11 +30,22 @@ import org.apache.rat.api.MetaData;
import org.apache.rat.api.RatException;
import org.apache.rat.license.ILicense;
import org.apache.rat.report.xml.writer.IXmlWriter;
+import org.apache.rat.utils.CasedString;
/**
* Creates the elements in the XML report.
*/
public class XmlElements {
+ /**
+ * Converts an enum name to snake case.
+ * @param name the enum name to convert.
+ * @return a camel cased name.
+ */
+ private static String normalizeName(final String name) {
+ CasedString casedName = new CasedString(CasedString.StringCase.SNAKE,
name.toLowerCase(Locale.ROOT));
+ return casedName.toCase(CasedString.StringCase.CAMEL);
+ }
+
/**
* The elements in the report.
*/
@@ -42,28 +53,28 @@ public class XmlElements {
/** The start of the Rat report */
RAT_REPORT("rat-report"),
/** The version of Rat being run */
- VERSION("version"),
+ VERSION(),
/** A resource element */
- RESOURCE("resource"),
+ RESOURCE(),
/** A license element */
- LICENSE("license"),
+ LICENSE(),
/** A notes element */
- NOTES("notes"),
+ NOTES(),
/** A sample from the file */
- SAMPLE("sample"),
+ SAMPLE(),
/** A statistics element */
- STATISTICS("statistics"),
+ STATISTICS(),
/** A statistic entry */
- STATISTIC("statistic"),
+ STATISTIC(),
/** A license name entry */
- LICENSE_NAME("licenseName"),
+ LICENSE_NAME(),
/** A license category entry */
- LICENSE_CATEGORY("licenseCategory"),
+ LICENSE_CATEGORY(),
/** A document type entry */
- DOCUMENT_TYPE("documentType");
+ DOCUMENT_TYPE();
/** The XML name for the element */
- private String elementName;
+ private final String elementName;
/**
* Constructor.
@@ -73,6 +84,10 @@ public class XmlElements {
this.elementName = elementName;
}
+ Elements() {
+ this.elementName = normalizeName(name());
+ }
+
/**
* Gets the XML element name.
* @return the XML element name.
@@ -80,7 +95,7 @@ public class XmlElements {
public String getElementName() {
return elementName;
}
- };
+ }
/**
* The attributes of elements in the report.
@@ -98,17 +113,21 @@ public class XmlElements {
APPROVAL,
/** The family category */
FAMILY,
- /** The type */
+ /** The document type */
TYPE,
- /** The Id */
+ /** The id */
ID,
/** The name */
NAME,
/** A counter */
COUNT,
/** A description */
- DESCRIPTION
- };
+ DESCRIPTION,
+ /** The media type for a document */
+ MEDIA_TYPE,
+ /** The encoding for a text document */
+ ENCODING
+ }
/** The XMLWriter that we write to */
private final IXmlWriter writer;
@@ -187,9 +206,14 @@ public class XmlElements {
*/
public XmlElements document(final Document document) throws RatException {
final MetaData metaData = document.getMetaData();
- return write(Elements.RESOURCE)
+ XmlElements result = write(Elements.RESOURCE)
.write(Attributes.NAME, document.getName().localized("/"))
- .write(Attributes.TYPE, metaData.getDocumentType().toString());
+ .write(Attributes.TYPE, metaData.getDocumentType().toString())
+ .write(Attributes.MEDIA_TYPE,
metaData.getMediaType().toString());
+ if (Document.Type.STANDARD == metaData.getDocumentType()) {
+ result = result.write(Attributes.ENCODING,
metaData.getCharset().displayName());
+ }
+ return result;
}
/**
@@ -314,7 +338,7 @@ public class XmlElements {
*/
public XmlElements write(final Attributes attribute, final String value)
throws RatException {
try {
- writer.attribute(attribute.name().toLowerCase(Locale.ROOT), value);
+ writer.attribute(normalizeName(attribute.name()), value);
return this;
} catch (IOException e) {
throw new RatException("Cannot open add attribute: " + attribute,
e);
diff --git a/apache-rat-core/src/main/resources/org/apache/rat/rat-report.xsd
b/apache-rat-core/src/main/resources/org/apache/rat/rat-report.xsd
index 5d002bc4..fdd530da 100644
--- a/apache-rat-core/src/main/resources/org/apache/rat/rat-report.xsd
+++ b/apache-rat-core/src/main/resources/org/apache/rat/rat-report.xsd
@@ -46,6 +46,8 @@
</xs:sequence>
<xs:attribute type="xs:string" name="name"
use="required"/>
<xs:attribute type="xs:string" name="type"
use="required"/>
+ <xs:attribute type="xs:string" name="mediaType"
use="required" />
+ <xs:attribute type="xs:string" name="encoding"
use="optional" />
</xs:complexType>
</xs:element>
<xs:element name="statistics" maxOccurs="1" minOccurs="0">
diff --git
a/apache-rat-core/src/test/java/org/apache/rat/analysis/DefaultAnalyserFactoryTest.java
b/apache-rat-core/src/test/java/org/apache/rat/analysis/DefaultAnalyserFactoryTest.java
index d72fb423..bb724630 100644
---
a/apache-rat-core/src/test/java/org/apache/rat/analysis/DefaultAnalyserFactoryTest.java
+++
b/apache-rat-core/src/test/java/org/apache/rat/analysis/DefaultAnalyserFactoryTest.java
@@ -25,6 +25,9 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
import org.apache.rat.Defaults;
import org.apache.rat.ReportConfiguration;
import org.apache.rat.api.Document;
@@ -38,6 +41,9 @@ import org.apache.rat.testhelpers.TextUtils;
import org.assertj.core.util.Files;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
public class DefaultAnalyserFactoryTest {
@@ -63,9 +69,6 @@ public class DefaultAnalyserFactoryTest {
@Test
public void standardTypeAnalyser() throws Exception {
String[] expected = {
- "<resource name='/elements/Text.txt' type='STANDARD'>"
- + "<license id='?????' name='Unknown license'
approval='false' family='?????'/>"
- + "<sample><![CDATA[ /*", //
" * Licensed to the Apache Software Foundation (ASF) under
one", //
" * or more contributor license agreements. See the NOTICE
file", //
" * distributed with this work for additional information", //
@@ -79,16 +82,19 @@ public class DefaultAnalyserFactoryTest {
" * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY",
//
" * KIND, either express or implied. See the License for
the", //
" * specific language governing permissions and limitations",
//
- " * under the License.", //
- " ]]></sample></resource>" };
+ " * under the License."
+ };
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/elements/Text.txt"), p -> true);
analyser.analyse(document);
- reporter.report(document);
- String result = out.toString();
+ assertEquals(Document.Type.STANDARD,
document.getMetaData().getDocumentType());
+ assertEquals("text/plain",
document.getMetaData().getMediaType().toString());
+ assertEquals(1, document.getMetaData().licenses().count());
+ document.getMetaData().licenses().forEach(lic ->
assertEquals(UnknownLicense.INSTANCE, lic));
+ String result = document.getMetaData().getSampleHeader();
for (String exp : expected) {
- assertTrue(result.contains(exp), () -> exp);
+ assertTrue(result.contains(exp), exp);
}
}
@@ -97,8 +103,8 @@ public class DefaultAnalyserFactoryTest {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/elements/LICENSE"), p -> true);
analyser.analyse(document);
- reporter.report(document);
- assertEquals("<resource name='/elements/LICENSE' type='NOTICE'/>",
out.toString());
+ assertEquals(Document.Type.NOTICE,
document.getMetaData().getDocumentType());
+ assertEquals("text/plain",
document.getMetaData().getMediaType().toString());
}
@Test
@@ -106,8 +112,8 @@ public class DefaultAnalyserFactoryTest {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/elements/Image.png"), p -> true);
analyser.analyse(document);
- reporter.report(document);
- assertEquals("<resource name='/elements/Image.png' type='BINARY'/>",
out.toString());
+ assertEquals(Document.Type.BINARY,
document.getMetaData().getDocumentType());
+ assertEquals("image/png",
document.getMetaData().getMediaType().toString());
}
@Test
@@ -119,42 +125,32 @@ public class DefaultAnalyserFactoryTest {
config.setFrom(defaults);
analyser = DefaultAnalyserFactory.createDefaultAnalyser(config);
analyser.analyse(document);
- reporter.report(document);
- assertEquals("<resource name='/elements/dummy.jar' type='ARCHIVE'/>",
out.toString());
+ assertEquals(Document.Type.ARCHIVE,
document.getMetaData().getDocumentType());
+ assertEquals("application/java-archive",
document.getMetaData().getMediaType().toString());
}
- @Test
- public void archivesAbsenceTest() throws Exception {
- final Document document = new FileDocument(basedir,
- Resources.getResourceFile("/elements/dummy.jar"), p -> true);
- Defaults defaults = Defaults.builder().build();
- ReportConfiguration config = new ReportConfiguration();
- config.setFrom(defaults);
- config.setArchiveProcessing(ReportConfiguration.Processing.ABSENCE);
- analyser = DefaultAnalyserFactory.createDefaultAnalyser(config);
- analyser.analyse(document);
- reporter.report(document);
- String result = out.toString();
- TextUtils.assertContains("<resource name='/elements/dummy.jar'
type='ARCHIVE'>", result);
- TextUtils.assertContains("<license id='?????' name='Unknown license'
approval='false' family='?????'/>", result);
- TextUtils.assertContains("<license id='ASL' name='Applied Apache
License Version 2.0' approval='false' family='AL '/>", result);
+ private static Stream<Arguments> archivesAbsenceTestData() {
+ List<Arguments> lst = new ArrayList<>();
+ lst.add(Arguments.of(ReportConfiguration.Processing.NOTIFICATION, 0));
+ lst.add(Arguments.of(ReportConfiguration.Processing.PRESENCE, 2));
+ lst.add(Arguments.of(ReportConfiguration.Processing.ABSENCE, 3));
+ return lst.stream();
}
- @Test
- public void archivesPresenceTest() throws Exception {
+ @ParameterizedTest
+ @MethodSource("archivesAbsenceTestData")
+ public void archivesAbsenceTest(ReportConfiguration.Processing
archiveProcessing, int expectedLicenseCount) throws Exception {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/elements/dummy.jar"), p -> true);
Defaults defaults = Defaults.builder().build();
ReportConfiguration config = new ReportConfiguration();
config.setFrom(defaults);
- config.setArchiveProcessing(ReportConfiguration.Processing.PRESENCE);
+ config.setArchiveProcessing(archiveProcessing);
analyser = DefaultAnalyserFactory.createDefaultAnalyser(config);
analyser.analyse(document);
- reporter.report(document);
- String result = out.toString();
- TextUtils.assertContains("<resource name='/elements/dummy.jar'
type='ARCHIVE'>", result);
- TextUtils.assertNotContains("<license id='?????' name='Unknown
license' approval='false' family='?????'/>", result);
- TextUtils.assertContains("<license id='ASL' name='Applied Apache
License Version 2.0' approval='false' family='AL '/>", result);
+ assertEquals(Document.Type.ARCHIVE,
document.getMetaData().getDocumentType());
+ assertEquals("application/java-archive",
document.getMetaData().getMediaType().toString());
+ assertEquals(expectedLicenseCount,
document.getMetaData().licenses().count());
}
@Test
@@ -162,8 +158,8 @@ public class DefaultAnalyserFactoryTest {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/elements/dummy.jar"), p -> true);
analyser.analyse(document);
- reporter.report(document);
- assertEquals("<resource name='/elements/dummy.jar' type='ARCHIVE'/>",
out.toString());
+ assertEquals(Document.Type.ARCHIVE,
document.getMetaData().getDocumentType());
+ assertEquals("application/java-archive",
document.getMetaData().getMediaType().toString());
}
@Test
@@ -171,8 +167,8 @@ public class DefaultAnalyserFactoryTest {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/jira/RAT211/side_left.bmp"), p ->
true);
analyser.analyse(document);
- reporter.report(document);
- assertEquals("<resource name='/jira/RAT211/side_left.bmp'
type='BINARY'/>", out.toString());
+ assertEquals(Document.Type.BINARY,
document.getMetaData().getDocumentType());
+ assertEquals("image/bmp",
document.getMetaData().getMediaType().toString());
}
@Test
@@ -180,10 +176,8 @@ public class DefaultAnalyserFactoryTest {
final Document document = new FileDocument(basedir,
Resources.getResourceFile("/jira/RAT211/leader-election-message-arrives.dia"),
p -> true);
analyser.analyse(document);
- reporter.report(document);
- assertEquals(
- "<resource
name='/jira/RAT211/leader-election-message-arrives.dia' type='ARCHIVE'/>",
- out.toString());
+ assertEquals(Document.Type.ARCHIVE,
document.getMetaData().getDocumentType());
+ assertEquals("application/gzip",
document.getMetaData().getMediaType().toString());
}
@Test
diff --git
a/apache-rat-tasks/src/test/resources/antunit/report-normal-operation.xml
b/apache-rat-tasks/src/test/resources/antunit/report-normal-operation.xml
index db495417..10cfd859 100644
--- a/apache-rat-tasks/src/test/resources/antunit/report-normal-operation.xml
+++ b/apache-rat-tasks/src/test/resources/antunit/report-normal-operation.xml
@@ -75,7 +75,7 @@ SPDX-License-Identifier: Apache-2.0
</pathconvert>
<property name="expectedOutput" value=" S
/report-normal-operation.xml" />
<property name="expectedOutputXML"
- value='<resource
name="/report-normal-operation.xml"' />
+ value='<resource encoding="ISO-8859-1"
mediaType="application/xml" name="/report-normal-operation.xml"
type="STANDARD"' />
<property name="expectedOutputXML2" value='family="AL "' />
</target>
diff --git a/pom.xml b/pom.xml
index 4801ddab..eb5f216d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,6 +50,7 @@ agnostic home for software distribution comprehension and
audit tools.
<ant.version>1.10.15</ant.version>
<mockito.version>4.11.0</mockito.version>
<javaVersion>1.8</javaVersion>
+ <tika.version>2.9.2</tika.version>
<maven.compiler.source>${javaVersion}</maven.compiler.source>
<maven.compiler.target>${javaVersion}</maven.compiler.target>
<!-- This is the version of Maven required to use the Rat Maven Plugin -->
@@ -198,7 +199,12 @@ agnostic home for software distribution comprehension and
audit tools.
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>2.9.2</version>
+ <version>${tika.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-text-module</artifactId>
+ <version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index f0b8008d..cee0945f 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -72,6 +72,15 @@ The <action> type attribute can be one of:
</release>
-->
<release version="0.17-SNAPSHOT" date="xxxx-yy-zz" description="Current
SNAPSHOT - release to be done">
+ <action issue="RAT-81" type="add" dev="claudenw">
+ Added encoding information of the file being read to the RAT report in
case of STANDARD document files.
+ Added media type attribute in report for all files.
+ </action>
+ <action issue="RAT-81" type="fix" dev="claudenw">
+ Fixed encoding issue where text files not in UTF-8 encoding would not
be read properly.
+ Change adds charset to the metadata when it can be discovered. If not
UTF-8 is returned.
+ Added integration test to show reading of UTF8 and IBM037 encoding
works.
+ </action>
<action issue="RAT-408" type="fix" dev="claudenw">
Added core integration tests and verified RAT-408 is fixed with the
new exclusion engine.
</action>