This is an automated email from the ASF dual-hosted git repository. nick pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit ca2f5bc63b7595730e53e95758dc9aaf6b567daa Author: Nick Burch <[email protected]> AuthorDate: Thu May 10 11:35:04 2018 +0100 Add a time column to the test columnar files --- .../org/apache/tika/parser/TabularFormatsTest.java | 22 +++++++----- .../apache/tika/parser/sas/SAS7BDATParserTest.java | 8 ++--- .../resources/test-documents/test-columnar.csv | 37 +++++++-------------- .../resources/test-documents/test-columnar.sas.xml | 11 ++++++ .../test-documents/test-columnar.sas7bdat | Bin 17408 -> 17408 bytes .../resources/test-documents/test-columnar.xls | Bin 0 -> 6656 bytes .../resources/test-documents/test-columnar.xlsx | Bin 0 -> 4941 bytes .../resources/test-documents/test-columnar.xpt | Bin 4560 -> 4720 bytes .../src/test/resources/test-documents/testSAS2.sas | 27 ++++++++++++--- 9 files changed, 64 insertions(+), 41 deletions(-) diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java index 61fcca2..4dc7336 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/TabularFormatsTest.java @@ -26,25 +26,31 @@ import org.junit.Test; * This is mostly focused on the XHTML output */ public class TabularFormatsTest extends TikaTest { - protected static final String[] headers = new String[] { - "String (Num=)","Number","Date","Datetime","Number" + protected static final String[] columnNames = new String[] { + "recnum","square","desc","pctdone","pctinc", + "date","datetime","time" }; + protected static final String[] columnLabels = new String[] { + "Record Number","Square of the Record Number", + "Description of the Row","Percent Done", + "Percent Increment","date","datetime","time" + }; + /** * Expected values, by <em>column</em> */ protected static final String[][] table = new String[][] { // TODO All values new String[] { - "Num=0" + "0","1","2","3","4","5","6","7","8","9","10" }, new String[] { - "0.0" + "0","1","4" // etc }, - new String[] { - "1899-12-30" + new String[] { // etc + "01-01-1960" }, - new String[] { - "1900-01-01 11:00:00" + new String[] { // etc }, new String[] { "" diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java index 3bb3e01..610ffc3 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java @@ -89,11 +89,11 @@ public class SAS7BDATParserTest extends TikaTest { assertEquals("application/x-sas-data", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("TESTING", metadata.get(TikaCoreProperties.TITLE)); - assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.CREATED)); - assertEquals("2018-05-09T16:42:04Z", metadata.get(TikaCoreProperties.MODIFIED)); + assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.CREATED)); + assertEquals("2018-05-09T17:59:33Z", metadata.get(TikaCoreProperties.MODIFIED)); assertEquals("1", metadata.get(PagedText.N_PAGES)); - assertEquals("7", metadata.get(Database.COLUMN_COUNT)); + assertEquals("8", metadata.get(Database.COLUMN_COUNT)); assertEquals("11", metadata.get(Database.ROW_COUNT)); assertEquals("windows-1252", metadata.get(HttpHeaders.CONTENT_ENCODING)); assertEquals("W32_7PRO", metadata.get(OfficeOpenXMLExtended.APPLICATION)); @@ -102,7 +102,7 @@ public class SAS7BDATParserTest extends TikaTest { assertEquals("Little", metadata.get(MachineMetadata.ENDIAN)); assertEquals(Arrays.asList("Record Number","Square of the Record Number", "Description of the Row","Percent Done", - "Percent Increment","date","datetime"), + "Percent Increment","date","datetime","time"), Arrays.asList(metadata.getValues(Database.COLUMN_NAME))); String content = handler.toString(); diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.csv b/tika-parsers/src/test/resources/test-documents/test-columnar.csv index 8de4097..5ef57bb 100644 --- a/tika-parsers/src/test/resources/test-documents/test-columnar.csv +++ b/tika-parsers/src/test/resources/test-documents/test-columnar.csv @@ -1,25 +1,12 @@ -"String (Num=)","Number","Date","Datetime","Number" -Num=0,0.0,1899-12-30,1900-01-01 11:00:00, -Num=0.1,0.1,1899-12-30,1899-12-30 02:24:00,0.1 -Num=0.25,0.25,1899-12-30,1899-12-30 06:00:00,0.25 -Num=0.5,0.5,1899-12-30,1899-12-30 12:00:00,0.5 -Num=1,1.0,1900-01-01,1900-01-01 00:00:00, -Num=1.1,1.1,1900-01-01,1900-01-01 02:24:00,1.1 -Num=1.2,1.2,1900-01-01,1900-01-01 04:48:00,1.2 -Num=1.5,1.5,1900-01-01,1900-01-01 12:00:00,1.5 -Num=2,2.0,1900-01-02,1900-01-02 00:00:00,2.0 -Num=2.5,2.5,1900-01-02,1900-01-02 12:00:00,2.5 -Num=3,3.0,1900-01-03,1900-01-03 00:00:00,3.0 -Num=4,4.0,1900-01-04,1900-01-04 00:00:00,4.0 -Num=5,5.0,1900-01-05,1900-01-05 00:00:00,5.0 -Num=10,10.0,1900-01-10,1900-01-10 00:00:00,10.0 -Num=15,15.0,1900-01-15,1900-01-15 00:00:00,15.0 -Num=25,25.0,1900-01-25,1900-01-25 00:00:00,25.0 -Num=50,50.0,1900-02-19,1900-02-19 00:00:00,50.0 -Num=60,60.0,1900-02-28,1900-02-28 00:00:00,60.0 -Num=65,65.0,1900-03-05,1900-03-05 00:00:00,65.0 -Num=100,100.0,1900-04-09,1900-04-09 00:00:00,100.0 -Num=120,120.0,1900-04-29,1900-04-29 00:00:00,120.0 -Num=1500,1500.0,1904-02-08,1904-02-08 00:00:00,1500.0 -Num=20222,20222.0,1955-05-13,1955-05-13 00:00:00,20222.0 -Num=404242,404242.0,3006-10-10,3006-10-10 00:00:00,404242.0 +"Record Number","Square of the Record Number","Description of the Row","Percent Done","Percent Increment","date","datetime","time" +0,0,This is row 0 of 10,0%,M,01-01-1960,01JAN60:00:00:01,0:00:01 +1,1,This is row 1 of 10,10%,0.0%,02-01-1960,01JAN60:00:00:10,0:00:03 +2,4,This is row 2 of 10,20%,50.0%,17-01-1960,01JAN60:00:01:40,0:00:09 +3,9,This is row 3 of 10,30%,66.7%,22-03-1960,01JAN60:00:16:40,0:00:27 +4,16,This is row 4 of 10,40%,75.0%,13-09-1960,01JAN60:02:46:40,0:01:21 +5,25,This is row 5 of 10,50%,80.0%,17-09-1961,02JAN60:03:46:40,0:04:03 +6,36,This is row 6 of 10,60%,83.3%,20-07-1963,12JAN60:13:46:40,0:12:09 +7,49,This is row 7 of 10,70%,85.7%,29-07-1966,25APR60:17:46:40,0:36:27 +8,64,This is row 8 of 10,80%,87.5%,20-03-1971,03MAR63:09:46:40,1:49:21 +9,81,This is row 9 of 10,90%,88.9%,18-12-1977,09SEP91:01:46:40,5:28:03 +10,100,This is row 10 of 10,100%,90.0%,19-05-1987,19NOV76:17:46:40,16:24:09 diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml index ae12fc5..45df965 100644 --- a/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml +++ b/tika-parsers/src/test/resources/test-documents/test-columnar.sas.xml @@ -8,6 +8,7 @@ <pctincr missing="M" /> <date>0</date> <datetime>1960-01-01T00:00:01</datetime> + <time>00:00:01</time> </TESTXML> <TESTXML> <recnum>1</recnum> @@ -17,6 +18,7 @@ <pctincr>0</pctincr> <date>1</date> <datetime>1960-01-01T00:00:10</datetime> + <time>00:00:03</time> </TESTXML> <TESTXML> <recnum>2</recnum> @@ -26,6 +28,7 @@ <pctincr>0.5</pctincr> <date>16</date> <datetime>1960-01-01T00:01:40</datetime> + <time>00:00:09</time> </TESTXML> <TESTXML> <recnum>3</recnum> @@ -35,6 +38,7 @@ <pctincr>0.6666666667</pctincr> <date>81</date> <datetime>1960-01-01T00:16:40</datetime> + <time>00:00:27</time> </TESTXML> <TESTXML> <recnum>4</recnum> @@ -44,6 +48,7 @@ <pctincr>0.75</pctincr> <date>256</date> <datetime>1960-01-01T02:46:40</datetime> + <time>00:01:21</time> </TESTXML> <TESTXML> <recnum>5</recnum> @@ -53,6 +58,7 @@ <pctincr>0.8</pctincr> <date>625</date> <datetime>1960-01-02T03:46:40</datetime> + <time>00:04:03</time> </TESTXML> <TESTXML> <recnum>6</recnum> @@ -62,6 +68,7 @@ <pctincr>0.8333333333</pctincr> <date>1296</date> <datetime>1960-01-12T13:46:40</datetime> + <time>00:12:09</time> </TESTXML> <TESTXML> <recnum>7</recnum> @@ -71,6 +78,7 @@ <pctincr>0.8571428571</pctincr> <date>2401</date> <datetime>1960-04-25T17:46:40</datetime> + <time>00:36:27</time> </TESTXML> <TESTXML> <recnum>8</recnum> @@ -80,6 +88,7 @@ <pctincr>0.875</pctincr> <date>4096</date> <datetime>1963-03-03T09:46:40</datetime> + <time>01:49:21</time> </TESTXML> <TESTXML> <recnum>9</recnum> @@ -89,6 +98,7 @@ <pctincr>0.8888888889</pctincr> <date>6561</date> <datetime>1991-09-09T01:46:40</datetime> + <time>05:28:03</time> </TESTXML> <TESTXML> <recnum>10</recnum> @@ -98,5 +108,6 @@ <pctincr>0.9</pctincr> <date>10000</date> <datetime>2276-11-19T17:46:40</datetime> + <time>16:24:09</time> </TESTXML> </TABLE> diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat index 553c45c..33ee412 100644 Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat and b/tika-parsers/src/test/resources/test-documents/test-columnar.sas7bdat differ diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xls b/tika-parsers/src/test/resources/test-documents/test-columnar.xls new file mode 100644 index 0000000..1d7b2cf Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xls differ diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx new file mode 100644 index 0000000..58ffd47 Binary files /dev/null and b/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx differ diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt index d908228..bbb59b5 100644 Binary files a/tika-parsers/src/test/resources/test-documents/test-columnar.xpt and b/tika-parsers/src/test/resources/test-documents/test-columnar.xpt differ diff --git a/tika-parsers/src/test/resources/test-documents/testSAS2.sas b/tika-parsers/src/test/resources/test-documents/testSAS2.sas index bc8c1fe..96a9121 100644 --- a/tika-parsers/src/test/resources/test-documents/testSAS2.sas +++ b/tika-parsers/src/test/resources/test-documents/testSAS2.sas @@ -2,6 +2,7 @@ data testing; begin=0; end=10; msg="This is row %x of %y"; + do i = begin to end by 1; drop msg begin end i; recnum=i; @@ -11,10 +12,13 @@ format pctdone percent8.0; format pctincr percent7.1; pctdone=divide(i,end); pctincr=divide(i-1,i); +/* Days / Seconds since Epoc / Seconds since midnight */ format date ddmmyyd10.; format datetime datetime.; +format time time.; date=i**4; datetime=10**i; +time=3**i; output; end; label recnum="Record Number" @@ -24,10 +28,11 @@ label recnum="Record Number" pctincr="Percent Increment"; run; -libname out '/home/tika/testing/sas'; -libname outxpt XPORT '/home/tika/testing/sas/testing.xpt'; -libname outv6 v6 '/home/tika/testing/sas'; -libname outxml xmlv2 '/home/tika/testing/sas'; +%let outpath = /home/tika/testing/sas; +libname out "&outpath"; +libname outxpt XPORT "&outpath./testing.xpt"; +libname outv6 v6 "&outpath"; +libname outxml xmlv2 "&outpath"; data out.testing; set testing; @@ -46,3 +51,17 @@ run; proc print data=testing; run; +proc export data=testing label + outfile="&outpath./testing.csv" + dbms=CSV REPLACE; +putnames=yes; +run; + +proc export data=testing label + outfile="&outpath./testing.xls" + dbms=XLS; +run; +proc export data=testing label + outfile="&outpath./testing.xlsx" + dbms=XLSX; +run; -- To stop receiving notification emails like this one, please contact [email protected].
