This is an automated email from the ASF dual-hosted git repository.

nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d0324f8e4fa70fce67d56dc70f611f5535fe229b
Author: Nick Burch <[email protected]>
AuthorDate: Wed May 9 18:19:34 2018 +0100

    Add a test .sas7bdat file with labels, and generate the columnar/tabular 
test file in a few more formats
---
 .../apache/tika/parser/sas/SAS7BDATParserTest.java |  51 +++++++----
 .../resources/test-documents/test-columnar.sas.xml | 102 +++++++++++++++++++++
 .../test-documents/test-columnar.sas7bdat          | Bin 9216 -> 17408 bytes
 .../resources/test-documents/test-columnar.xpt     | Bin 0 -> 4560 bytes
 .../src/test/resources/test-documents/testSAS2.sas |  48 ++++++++++
 5 files changed, 182 insertions(+), 19 deletions(-)

diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
index 2657ac2..3bb3e01 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
@@ -82,36 +82,36 @@ public class SAS7BDATParserTest extends TikaTest {
         Metadata metadata = new Metadata();
 
         try (InputStream stream = SAS7BDATParserTest.class.getResourceAsStream(
-                "/test-documents/test-columnar.sas7bdat")) {
+                "/test-documents/test-columnar.sas7bdat")) {            
             parser.parse(stream, handler, metadata, new ParseContext());
         }
 
         assertEquals("application/x-sas-data", 
metadata.get(Metadata.CONTENT_TYPE));
-        assertEquals("SHEET1", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("TESTING", metadata.get(TikaCoreProperties.TITLE));
 
-        // Fri Mar 06 19:10:19 GMT 2015
-        assertEquals("2015-03-06T19:10:19Z", 
metadata.get(TikaCoreProperties.CREATED));
-        assertEquals("2015-03-06T19:10:19Z", 
metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2018-05-09T16:42:04Z", 
metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("2018-05-09T16:42:04Z", 
metadata.get(TikaCoreProperties.MODIFIED));
         
         assertEquals("1", metadata.get(PagedText.N_PAGES));
-        assertEquals("5", metadata.get(Database.COLUMN_COUNT));
-        assertEquals("31", metadata.get(Database.ROW_COUNT));
+        assertEquals("7", metadata.get(Database.COLUMN_COUNT));
+        assertEquals("11", metadata.get(Database.ROW_COUNT));
         assertEquals("windows-1252", 
metadata.get(HttpHeaders.CONTENT_ENCODING));
-        assertEquals("XP_PRO", 
metadata.get(OfficeOpenXMLExtended.APPLICATION));
-        assertEquals("9.0101M3", 
metadata.get(OfficeOpenXMLExtended.APP_VERSION));
+        assertEquals("W32_7PRO", 
metadata.get(OfficeOpenXMLExtended.APPLICATION));
+        assertEquals("9.0301M2", 
metadata.get(OfficeOpenXMLExtended.APP_VERSION));
         assertEquals("32", metadata.get(MachineMetadata.ARCHITECTURE_BITS));
         assertEquals("Little", metadata.get(MachineMetadata.ENDIAN));
-        assertEquals(Arrays.asList("A","B","C","D","E"),
+        assertEquals(Arrays.asList("Record Number","Square of the Record 
Number",
+                                   "Description of the Row","Percent Done",
+                                   "Percent Increment","date","datetime"),
                      Arrays.asList(metadata.getValues(Database.COLUMN_NAME)));
         
         String content = handler.toString();
-        assertContains("SHEET1", content);
-        assertContains("A\tB\tC", content);
-        assertContains("Num=0\t", content);
-        assertContains("Num=404242\t", content);
-        assertContains("\t0\t", content);
-        assertContains("\t404242\t", content);
-        assertContains("\t08Feb1904\t", content);
+        assertContains("TESTING", content);
+        assertContains("0\t0\tThis", content);
+        assertContains("2\t4\tThis", content);
+        assertContains("4\t16\tThis", content);
+        assertContains("\t01-01-1960\t", content);
+        assertContains("\t01Jan1960:00:00", content);
     }
 
     @Test
@@ -129,7 +129,20 @@ public class SAS7BDATParserTest extends TikaTest {
         assertContains("<td>This is row", xml);
         assertContains("10</td>", xml);
     }
+    
+    @Test
+    public void testHTML2() throws Exception {
+        XMLResult result = getXML("test-columnar.sas7bdat");
+        String xml = result.xml;
 
-    // TODO Column names vs labels, with a different test file
-    // TODO Columnar consistency test
+        // Check the title came through
+        assertContains("<h1>TESTING</h1>", xml);
+        // Check the headings
+        assertContains("<th title=\"recnum\">Record Number</th>", xml);
+        assertContains("<th title=\"square\">Square of the Record 
Number</th>", xml);
+        assertContains("<th title=\"date\">date</th>", xml);
+        // Check formatting of dates
+        assertContains("<td>01-01-1960</td>", xml);
+        assertContains("<td>01Jan1960:00:00:10.00</td>", xml);
+    }
 }
diff --git 
a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml 
b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
new file mode 100644
index 0000000..ae12fc5
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="windows-1252" ?>
+<TABLE>
+   <TESTXML>
+      <recnum>0</recnum>
+      <square>0</square>
+      <desc>This is row            0 of           10</desc>
+      <pctdone>0</pctdone>
+      <pctincr missing="M" />
+      <date>0</date>
+      <datetime>1960-01-01T00:00:01</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>1</recnum>
+      <square>1</square>
+      <desc>This is row            1 of           10</desc>
+      <pctdone>0.1</pctdone>
+      <pctincr>0</pctincr>
+      <date>1</date>
+      <datetime>1960-01-01T00:00:10</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>2</recnum>
+      <square>4</square>
+      <desc>This is row            2 of           10</desc>
+      <pctdone>0.2</pctdone>
+      <pctincr>0.5</pctincr>
+      <date>16</date>
+      <datetime>1960-01-01T00:01:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>3</recnum>
+      <square>9</square>
+      <desc>This is row            3 of           10</desc>
+      <pctdone>0.3</pctdone>
+      <pctincr>0.6666666667</pctincr>
+      <date>81</date>
+      <datetime>1960-01-01T00:16:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>4</recnum>
+      <square>16</square>
+      <desc>This is row            4 of           10</desc>
+      <pctdone>0.4</pctdone>
+      <pctincr>0.75</pctincr>
+      <date>256</date>
+      <datetime>1960-01-01T02:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>5</recnum>
+      <square>25</square>
+      <desc>This is row            5 of           10</desc>
+      <pctdone>0.5</pctdone>
+      <pctincr>0.8</pctincr>
+      <date>625</date>
+      <datetime>1960-01-02T03:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>6</recnum>
+      <square>36</square>
+      <desc>This is row            6 of           10</desc>
+      <pctdone>0.6</pctdone>
+      <pctincr>0.8333333333</pctincr>
+      <date>1296</date>
+      <datetime>1960-01-12T13:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>7</recnum>
+      <square>49</square>
+      <desc>This is row            7 of           10</desc>
+      <pctdone>0.7</pctdone>
+      <pctincr>0.8571428571</pctincr>
+      <date>2401</date>
+      <datetime>1960-04-25T17:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>8</recnum>
+      <square>64</square>
+      <desc>This is row            8 of           10</desc>
+      <pctdone>0.8</pctdone>
+      <pctincr>0.875</pctincr>
+      <date>4096</date>
+      <datetime>1963-03-03T09:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>9</recnum>
+      <square>81</square>
+      <desc>This is row            9 of           10</desc>
+      <pctdone>0.9</pctdone>
+      <pctincr>0.8888888889</pctincr>
+      <date>6561</date>
+      <datetime>1991-09-09T01:46:40</datetime>
+   </TESTXML>
+   <TESTXML>
+      <recnum>10</recnum>
+      <square>100</square>
+      <desc>This is row           10 of           10</desc>
+      <pctdone>1</pctdone>
+      <pctincr>0.9</pctincr>
+      <date>10000</date>
+      <datetime>2276-11-19T17:46:40</datetime>
+   </TESTXML>
+</TABLE>
diff --git 
a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat 
b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat
index 250b3b8..553c45c 100644
Binary files 
a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat and 
b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat differ
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt 
b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt
new file mode 100644
index 0000000..d908228
Binary files /dev/null and 
b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt differ
diff --git a/tika-parsers/src/test/resources/test-documents/testSAS2.sas 
b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
new file mode 100644
index 0000000..bc8c1fe
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testSAS2.sas
@@ -0,0 +1,48 @@
+data testing;
+begin=0;
+end=10;
+msg="This is row %x of %y";
+do i = begin to end by 1;
+drop msg begin end i;
+recnum=i;
+square=i*i;
+desc=tranwrd(tranwrd(msg,"%x",i),"%y",end);
+format pctdone percent8.0;
+format pctincr percent7.1;
+pctdone=divide(i,end);
+pctincr=divide(i-1,i);
+format date ddmmyyd10.;
+format datetime datetime.;
+date=i**4;
+datetime=10**i;
+output;
+end;
+label recnum="Record Number"
+      square="Square of the Record Number"
+         desc="Description of the Row"
+         pctdone="Percent Done"
+         pctincr="Percent Increment";
+run;
+
+libname out          '/home/tika/testing/sas';
+libname outxpt XPORT '/home/tika/testing/sas/testing.xpt';
+libname outv6 v6     '/home/tika/testing/sas';
+libname outxml xmlv2 '/home/tika/testing/sas';
+
+data out.testing;
+set testing;
+run;
+data outv6.testv6;
+set testing;
+run;
+data outxml.testxml;
+set testing;
+run;
+proc copy in=out out=outxpt;
+select testing;
+run;
+
+
+proc print data=testing;
+run;
+

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to