This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1d7a113cdecf64a97a349d8ff74cad1ecd9127d3
Author: Nick Burch <[email protected]>
AuthorDate: Thu May 10 13:48:03 2018 +0100

    CSV assert as best we can (no dedicated parser), start on XLS and SAS7BDAT 
consistency tests
---
 .../org/apache/tika/parser/TabularFormatsTest.java | 65 ++++++++++++++++++++--
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
index 4dc7336..8574d37 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java
@@ -17,6 +17,10 @@
 package org.apache.tika.parser;
 
 
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+
 import org.apache.tika.TikaTest;
 import org.junit.Test;
 
@@ -57,21 +61,70 @@ public class TabularFormatsTest extends TikaTest {
         }
     };
 
-    protected void assertHeaders(String xml, boolean isTH) {
-        // TODO Check for the first row, then TR or TH
+    protected void assertHeaders(String xml, boolean isTH, boolean hasLabel, 
boolean hasName) {
+        // Find the first row
+        int splitAt = xml.indexOf("</tr>");
+        String hRow = xml.substring(0, splitAt);
+        splitAt = xml.indexOf("<tr>");
+        hRow = hRow.substring(splitAt+4);
+
+        // Split into cells, ignoring stuff before first cell
+        String[] cells;
+        if (isTH) {
+            cells = hRow.split("<th");
+        } else {
+            cells = hRow.split("<td");
+        }
+        cells = Arrays.copyOfRange(cells, 1, cells.length);
+        for (int i=0; i<cells.length; i++) {
+            splitAt = cells[i].lastIndexOf("</");
+            cells[i] = cells[i].substring(0, splitAt).trim();
+        }
+
+        // Check we got the right number
+        assertEquals("Wrong number of cells in header row " + hRow,
+                     columnLabels.length, cells.length);
+
+        // Check we got the right stuff
+        // TODO
     }
     protected void assertContents(String xml, boolean hasHeader) {
         // TODO Check the rows
     }
 
     @Test
+    public void testSAS7BDAT() throws Exception {
+        XMLResult result = getXML("test-columnar.sas7bdat");
+        String xml = result.xml;
+        assertHeaders(xml, true, true, true);
+        assertContents(xml, true);
+    }
+    @Test
+    public void testXLS() throws Exception {
+        XMLResult result = getXML("test-columnar.xls");
+        String xml = result.xml;
+        assertHeaders(xml, false, true, false);
+        assertContents(xml, true);
+    }
+    // TODO Other formats
+
+    /**
+     * Note - we don't have a dedicated CSV parser
+     * 
+     * This means we don't get proper HTML out...
+     */
+    @Test
     public void testCSV() throws Exception {
         XMLResult result = getXML("test-columnar.csv");
         String xml = result.xml;
 
-        assertHeaders(xml, false);
-        assertContents(xml, true);
+        for (String label : columnLabels) {
+            assertContains(label, xml);
+        }
+        for (String[] vals : table) {
+            for (String val : vals) {
+                assertContains(val, xml);
+            }
+        }
     }
-    // TODO SAS7BDAT
-    // TODO Other formats
 }

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to