JENA-625: 1. update google guava dependency in jena-csv pom.xml 2. add some tests for the real world csv data
git-svn-id: http://svn.apache.org/repos/asf/jena/Experimental/jena-csv@1617106 13f79535-47bb-0310-9956-ffa450edef68 Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e7e92294 Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e7e92294 Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e7e92294 Branch: refs/heads/master Commit: e7e92294caa889aa66dc82975edc526611bd7f89 Parents: 973eab6 Author: Ying Jiang <[email protected]> Authored: Sun Aug 10 13:33:14 2014 +0000 Committer: Ying Jiang <[email protected]> Committed: Sun Aug 10 13:33:14 2014 +0000 ---------------------------------------------------------------------- pom.xml | 28 ++------ .../jena/propertytable/impl/GraphCSVTest.java | 72 ++++++++++++++++++++ .../HEFCE_organogram_senior_data_31032011.csv | 5 ++ src/test/resources/PLOSone-search-results.csv | 6 ++ src/test/resources/Palo_Alto_Trees.csv | 6 ++ 5 files changed, 96 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/e7e92294/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 00dd6e5..ad246ac 100644 --- a/pom.xml +++ b/pom.xml @@ -47,22 +47,22 @@ <dependency> <groupId>org.apache.jena</groupId> <artifactId>apache-jena-libs</artifactId> - <version>2.12.0-SNAPSHOT</version> + <version>2.12.1-SNAPSHOT</version> <type>pom</type> </dependency> <!-- Google Code Guava --> <dependency> - <groupId>com.googlecode.guava-osgi</groupId> - <artifactId>guava-osgi</artifactId> - <version>11.0.0</version> - </dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>17.0</version> + </dependency> <!-- Testing support --> <dependency> <groupId>org.apache.jena</groupId> <artifactId>jena-arq</artifactId> - <version>2.12.0-SNAPSHOT</version> + <version>2.12.1-SNAPSHOT</version> <type>jar</type> <classifier>tests</classifier> <scope>test</scope> @@ -162,19 +162,5 @@ </plugins> </build> - - - <repositories> - <repository> - <id>maven-restlet</id> - <name>Public online Restlet repository</name> - <url>http://maven.restlet.org</url> - </repository> - <repository> - <id>maven2-repository.dev.java.net</id> - <name>Java.net repository</name> - <url>http://download.java.net/maven/2</url> - </repository> - </repositories> - + </project> http://git-wip-us.apache.org/repos/asf/jena/blob/e7e92294/src/test/java/org/apache/jena/propertytable/impl/GraphCSVTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/jena/propertytable/impl/GraphCSVTest.java b/src/test/java/org/apache/jena/propertytable/impl/GraphCSVTest.java index 04634c7..c5fa87e 100644 --- a/src/test/java/org/apache/jena/propertytable/impl/GraphCSVTest.java +++ b/src/test/java/org/apache/jena/propertytable/impl/GraphCSVTest.java @@ -32,11 +32,13 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.sparql.engine.main.StageBuilder; import com.hp.hpl.jena.sparql.engine.main.StageGenerator; +import com.hp.hpl.jena.util.PrintUtil; public class GraphCSVTest extends Assert { @Test public void testGraphCSV() throws Exception { + //String file = "src/test/resources/HEFCE_organogram_senior_data_31032011.csv";test.csv String file = "src/test/resources/test.csv"; Model csv = ModelFactory.createModelForGraph(new GraphCSV(file)); @@ -67,4 +69,74 @@ public class GraphCSVTest extends Assert { StageGenerator stageGenerator = new StageGeneratorPropertyTable(orig) ; StageBuilder.setGenerator(ARQ.getContext(), stageGenerator) ; } + + //http://www.w3.org/TR/csvw-ucr/#UC-OrganogramData + //2.4 Use Case #4 - Publication of public sector roles and salaries + @Test + public void testUseCase4(){ + String file = "src/test/resources/HEFCE_organogram_senior_data_31032011.csv"; + + Model csv = ModelFactory.createModelForGraph(new GraphCSV(file)); + assertEquals(72, csv.size()); + + Query query = QueryFactory + .create("PREFIX : <src/test/resources/HEFCE_organogram_senior_data_31032011.csv#> SELECT ?name ?unit {?x :Name ?name ; :Unit ?unit ; :Actual%20Pay%20Floor%20%28%A3%29 ?floor ; :Actual%20Pay%20Ceiling%20%28%A3%29 ?ceiling . FILTER(?floor > 100000 && ?ceiling <120000 )}"); + + QueryExecution qexec = QueryExecutionFactory.create(query, csv); + ResultSet results = qexec.execSelect(); + + assertTrue(results.hasNext()); + QuerySolution soln = results.nextSolution(); + assertEquals( "David Sweeney", soln.getLiteral("name").getString()); + assertEquals( "Research, Innovation and Skills", soln.getLiteral("unit").getString()); + + assertFalse(results.hasNext()); + } + + + //http://www.w3.org/TR/csvw-ucr/#UC-JournalArticleSearch + //2.6 Use Case #6 - Journal Article Solr Search Results + @Test + public void testUseCase6(){ + String file = "src/test/resources/PLOSone-search-results.csv"; + + Model csv = ModelFactory.createModelForGraph(new GraphCSV(file)); + assertEquals(30, csv.size()); + + Query query = QueryFactory + .create("PREFIX : <src/test/resources/PLOSone-search-results.csv#> SELECT ?author {?x :author ?author ; :doi '10.1371/journal.pone.0095156' }"); + + QueryExecution qexec = QueryExecutionFactory.create(query, csv); + ResultSet results = qexec.execSelect(); + + assertTrue(results.hasNext()); + QuerySolution soln = results.nextSolution(); + assertEquals( "Oshrat Raz,Dorit L Lev,Alexander Battler,Eli I Lev", soln.getLiteral("author").getString()); + + assertFalse(results.hasNext()); + } + + //http://www.w3.org/TR/csvw-ucr/#UC-PaloAltoTreeData + //2.11 Use Case #11 - City of Palo Alto Tree Data + @Test + public void testUseCase11(){ + String file = "src/test/resources/Palo_Alto_Trees.csv"; + + Model csv = ModelFactory.createModelForGraph(new GraphCSV(file)); + assertEquals(199, csv.size()); + + Query query = QueryFactory + .create("PREFIX : <src/test/resources/Palo_Alto_Trees.csv#> SELECT ?longitude ?latitude {?x :Longitude ?longitude ; :Latitude ?latitude ; :Distance%20from%20Property ?distance . FILTER(?distance > 50 )}"); + + QueryExecution qexec = QueryExecutionFactory.create(query, csv); + ResultSet results = qexec.execSelect(); + + assertTrue(results.hasNext()); + QuerySolution soln = results.nextSolution(); + assertEquals( -122.1566921, soln.getLiteral("longitude").getDouble(), 0); + assertEquals( 37.4408948, soln.getLiteral("latitude").getDouble(), 0); + + assertFalse(results.hasNext()); + } + } http://git-wip-us.apache.org/repos/asf/jena/blob/e7e92294/src/test/resources/HEFCE_organogram_senior_data_31032011.csv ---------------------------------------------------------------------- diff --git a/src/test/resources/HEFCE_organogram_senior_data_31032011.csv b/src/test/resources/HEFCE_organogram_senior_data_31032011.csv new file mode 100644 index 0000000..77df38f --- /dev/null +++ b/src/test/resources/HEFCE_organogram_senior_data_31032011.csv @@ -0,0 +1,5 @@ +Post Unique Reference,Name,Grade,Job Title,Job/Team Function,Parent Department,Organisation,Unit,Contact Phone,Contact E-mail,Reports to Senior Post,Salary Cost of Reports (£),FTE,Actual Pay Floor (£),Actual Pay Ceiling (£),,Profession,Notes,Valid? +90115,Steve Egan,SCS1A,Deputy Chief Executive,Finance and Corporate Resources,Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,0117 931 7408,[email protected],90334,5883433,1,120000,124999,,Finance,,1 +90250,David Sweeney,SCS1A,Director,"Research, Innovation and Skills",Department for Business Innovation and Skills,Higher Education Funding Council for England,"Research, Innovation and Skills",0117 931 7304,[email protected],90334,1207171,1,110000,114999,,Policy,,1 +90284,Heather Fry,SCS1A,Director,Education and Participation,Department for Business Innovation and Skills,Higher Education Funding Council for England,Education and Participation,0117 931 7280,[email protected],90334,1645195,1,100000,104999,,Policy,,1 +90334,Sir Alan Langlands,SCS4,Chief Executive,Chief Executive,Department for Business Innovation and Skills,Higher Education Funding Council for England,HEFCE,0117 931 7300/7341,[email protected],xx,0,1,230000,234999,,Policy,,1 http://git-wip-us.apache.org/repos/asf/jena/blob/e7e92294/src/test/resources/PLOSone-search-results.csv ---------------------------------------------------------------------- diff --git a/src/test/resources/PLOSone-search-results.csv b/src/test/resources/PLOSone-search-results.csv new file mode 100644 index 0000000..ab6ae2c --- /dev/null +++ b/src/test/resources/PLOSone-search-results.csv @@ -0,0 +1,6 @@ +id,doi,publication_date,title_display,author +10.1371/journal.pone.0095131,10.1371/journal.pone.0095131,2014-06-05T00:00:00Z,"Genotyping of French <i>Bacillus anthracis</i> Strains Based on 31-Loci Multi Locus VNTR Analysis: Epidemiology, Marker Evaluation, and Update of the Internet Genotype Database","Simon Thierry,Christophe Tourterel,Philippe Le Flèche,Sylviane Derzelle,Neira Dekhil,Christiane Mendy,Cécile Colaneri,Gilles Vergnaud,Nora Madani" +10.1371/journal.pone.0095156,10.1371/journal.pone.0095156,2014-06-05T00:00:00Z,Pathways Mediating the Interaction between Endothelial Progenitor Cells (EPCs) and Platelets,"Oshrat Raz,Dorit L Lev,Alexander Battler,Eli I Lev" +10.1371/journal.pone.0095275,10.1371/journal.pone.0095275,2014-06-05T00:00:00Z,Identification of Divergent Protein Domains by Combining HMM-HMM Comparisons and Co-Occurrence Detection,"Amel Ghouila,Isabelle Florent,Fatma Zahra Guerfali,Nicolas Terrapon,Dhafer Laouini,Sadok Ben Yahia,Olivier Gascuel,Laurent Bréhélin" +10.1371/journal.pone.0096098,10.1371/journal.pone.0096098,2014-06-05T00:00:00Z,Baseline CD4 Cell Counts of Newly Diagnosed HIV Cases in China: 2006â2012,"Houlin Tang,Yurong Mao,Cynthia X Shi,Jing Han,Liyan Wang,Juan Xu,Qianqian Qin,Roger Detels,Zunyou Wu" +10.1371/journal.pone.0097475,10.1371/journal.pone.0097475,2014-06-05T00:00:00Z,Crystal Structure of the Open State of the <i>Neisseria gonorrhoeae</i> MtrE Outer Membrane Channel,"Hsiang-Ting Lei,Tsung-Han Chou,Chih-Chia Su,Jani Reddy Bolla,Nitin Kumar,Abhijith Radhakrishnan,Feng Long,Jared A Delmar,Sylvia V Do,Kanagalaghatta R Rajashankar,William M Shafer,Edward W Yu" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/e7e92294/src/test/resources/Palo_Alto_Trees.csv ---------------------------------------------------------------------- diff --git a/src/test/resources/Palo_Alto_Trees.csv b/src/test/resources/Palo_Alto_Trees.csv new file mode 100644 index 0000000..c534c47 --- /dev/null +++ b/src/test/resources/Palo_Alto_Trees.csv @@ -0,0 +1,6 @@ +GID,Private,Tree ID,Admin Area,Side of Street,On Street,From Street,To Street,Street_Name,Situs Number,Address Estimated,Lot Side,Serial Number,Tree Site,Species,Trim Cycle,Diameter at Breast Ht,Trunk Count,Height Code,Canopy Width,Trunk Condition,Structure Condition,Crown Condition,Pest Condition,Condition Calced,Condition Rating,Vigor,Cable Presence,Stake Presence,Grow Space,Utility Presence,Distance from Property,Inventory Date,Staff Name,Comments,Zip,City Name,Longitude,Latitude,Protected,Designated,Heritage,Appraised Value,Hardscape,Identifier,Location Feature ID,Install Date,Feature Name,KML,FusionMarkerIcon +1,True,29,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,203,,Front,,2,Celtis australis,Large Tree Routine Prune,11,1,25-30,15-30,,Good,5,,,Good,2,False,False,Planting Strip,,44,10/18/2010,BK,,,Palo Alto,-122.1565172,37.4409561,False,False,False,,None,40,13872,,"Tree: 29 site 2 at 203 ADDISON AV, on ADDISON AV 44 from pl","<Point><coordinates>-122.156485,37.440963</coordinates></Point>",small_green +2,True,30,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,1,Liquidambar styraciflua,Large Tree Routine Prune,11,1,50-55,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1567812,37.440951,False,False,False,,None,41,13872,,"Tree: 30 site 1 at 203 ADDISON AV, on EMERSON ST 21 from pl","<Point><coordinates>-122.156749,37.440958</coordinates></Point>",small_green +3,True,31,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,2,Liquidambar styraciflua,Large Tree Routine Prune,11,1,40-45,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,54,6/2/2010,BK,,,Palo Alto,-122.1566921,37.4408948,False,False,False,,Low,42,13872,,"Tree: 31 site 2 at 203 ADDISON AV, on EMERSON ST 54 from pl","<Point><coordinates>-122.156659,37.440902</coordinates></Point>",small_green +4,True,32,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,209,,Front,,1,Ulmus parvifolia,Large Tree Routine Prune,18,1,35-40,30-45,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1564595,37.4410143,False,False,False,,Medium,43,13873,,"Tree: 32 site 1 at 209 ADDISON AV, on ADDISON AV 21 from pl","<Point><coordinates>-122.156427,37.441022</coordinates></Point>",small_green +5,True,33,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,219,,Front,,1,Eriobotrya japonica,Large Tree Routine Prune,7,1,15-20,0-15,Good,Good,3,,,Good,1,False,False,Planting Strip,,16,6/1/2010,BK,,,Palo Alto,-122.1563676,37.441107,False,False,False,,None,44,13874,,"Tree: 33 site 1 at 219 ADDISON AV, on ADDISON AV 16 from pl","<Point><coordinates>-122.156335,37.441114</coordinates></Point>",small_green
