Revision: 5482 http://sourceforge.net/p/jump-pilot/code/5482 Author: edso Date: 2017-07-30 16:26:14 +0000 (Sun, 30 Jul 2017) Log Message: ----------- speeding up JML/GML reader when reading time attributes by parsing them lazy (during access later) - adding FlexibleFeature, FlexibleFeatureSchema - porting GMLReader to use the flexible classes
Modified Paths: -------------- core/trunk/src/com/vividsolutions/jump/io/GMLInputTemplate.java core/trunk/src/com/vividsolutions/jump/io/GMLReader.java Added Paths: ----------- core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeature.java core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeatureSchema.java Added: core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeature.java =================================================================== --- core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeature.java (rev 0) +++ core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeature.java 2017-07-30 16:26:14 UTC (rev 5482) @@ -0,0 +1,156 @@ +package com.vividsolutions.jump.feature; + +import java.security.InvalidParameterException; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +import com.vividsolutions.jts.geom.Geometry; +import com.vividsolutions.jump.util.FlexibleDateParser; + +/** + * a FlexibleFeature based on {@link BasicFeature} originally used by the + * GeoJSON reader. currently adding + * - "autoextends" by returning null for undefined attribs + * - lazy conversion of attributes (see {@link #getAttribute(int)}) + * currently types String, Date, Time, Timestamp + */ +public class FlexibleFeature extends BasicFeature { + private FlexibleFeatureSchema featureSchema; + + public FlexibleFeature(FlexibleFeatureSchema featureSchema) { + super(featureSchema); + this.featureSchema = featureSchema; + } + + @Override + public Object getAttribute(int i) { + if (i < 0) + throw new InvalidParameterException("index must be greater or equal zero"); + + Object attrib = null; + + Object[] attributes = super.getAttributes(); + // only grab attrib if stack holds it already + if (i < attributes.length) + attrib = attributes[i]; + + // OJ does not allow null geoms! + if (i == featureSchema.getGeometryIndex()) { + // create and set an empty geom + if (attrib == null) { + attrib = featureSchema.createEmptyGeometry(); + setGeometry((Geometry) attrib); + } + } + // enforce String if schema says so + else if (featureSchema.getAttributeType(i).equals(AttributeType.STRING) && attrib != null + && !(attrib instanceof String)) { + attrib = String.valueOf(attrib); + } + // enforce date object if not converted already + else if (featureSchema.getAttributeType(i).equals(AttributeType.DATE) && attrib != null + && !AttributeType.DATE.toJavaClass().isInstance(attrib)) { + try { + Date d; + // the celleditor replaces us w/ a java.util.Date object + if (java.util.Date.class.isInstance(attrib)) + d = (java.util.Date) attrib; + else + d = FlexibleDateParser.getDefaultInstance().parse(attrib.toString(), false); + attrib = (d==null) ? null : new java.sql.Date(d.getTime()); + // update the attribute object, so the conversion does not happen on + // every getAttrib() + setAttribute(i, attrib); + } catch (ParseException e) { + // TODO: we should find a way to tell the user + attrib = null; + } + } + // enforce time object if not converted already + else if (featureSchema.getAttributeType(i).equals(AttributeType.TIME) && attrib != null + && !AttributeType.TIME.toJavaClass().isInstance(attrib)) { + try { + Date d; + // the celleditor replaces us w/ a java.util.Date object + if (java.util.Date.class.isInstance(attrib)) + d = (java.util.Date) attrib; + else + d = FlexibleDateParser.getDefaultInstance().parse(attrib.toString(), false); + attrib = (d==null) ? null : new java.sql.Time(d.getTime()); + // update the attribute object, so the conversion does not happen on + // every getAttrib() + setAttribute(i, attrib); + } catch (ParseException e) { + // TODO: we should find a way to tell the user + attrib = null; + } + } + // enforce timestamp object if not converted already + else if (featureSchema.getAttributeType(i).equals(AttributeType.TIMESTAMP) && attrib != null + && !AttributeType.TIMESTAMP.toJavaClass().isInstance(attrib)) { + try { + Date d; + // the celleditor replaces us w/ a java.util.Date object + if (java.util.Date.class.isInstance(attrib)) + d = (java.util.Date) attrib; + else + d = FlexibleDateParser.getDefaultInstance().parse(attrib.toString(), false); + attrib = (d==null) ? null : new java.sql.Timestamp(d.getTime()); + // update the attribute object, so the conversion does not happen on + // every getAttrib() + setAttribute(i, attrib); + } catch (ParseException e) { + // TODO: we should find a way to tell the user + attrib = null; + } + } + + return attrib; + } + + /** + * setting an attribute, fixing the underlying array in case the schema + * changed inbetween + */ + public void setAttribute(int attributeIndex, Object newAttribute) { + FeatureSchema schema = super.getSchema(); + Object[] oldAttribs = super.getAttributes(); + // add fields if schema changed in between + int diffCount = schema.getAttributeCount() - oldAttribs.length; + if (diffCount > 0) { + List attributes = new ArrayList(Arrays.asList(oldAttribs)); + attributes.addAll(Arrays.asList(new Object[diffCount])); + super.setAttributes(attributes.toArray()); + } + super.setAttribute(attributeIndex, newAttribute); + } + + /** + * setting the geometry by explicitly using the flexible setAttribute() method + * above + */ + public void setGeometry(Geometry geometry) { + setAttribute(getSchema().getGeometryIndex(), geometry); + } + + /** + * getting the geometry by explicitly using the flexible getAttribute() method + * above + */ + public Geometry getGeometry() { + return (Geometry) getAttribute(featureSchema.getGeometryIndex()); + } + + /** + * TODO: the method shouldn't be used anyway, still maybe we will have to + * implement it later + */ + @Override + public Object[] getAttributes() { + throw new UnsupportedOperationException("currently not implemented"); + } + +} Property changes on: core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeature.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Added: core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeatureSchema.java =================================================================== --- core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeatureSchema.java (rev 0) +++ core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeatureSchema.java 2017-07-30 16:26:14 UTC (rev 5482) @@ -0,0 +1,61 @@ +package com.vividsolutions.jump.feature; + +import org.openjump.core.ui.util.GeometryUtils; + +import com.vividsolutions.jts.geom.Geometry; +import com.vividsolutions.jts.geom.GeometryFactory; +import com.vividsolutions.jump.workbench.Logger; + +/** + * a FlexibleFeatureSchema originally used by the GeoJSON reader. + * extends the basic {@link FeatureSchema} by + * - allow changing attrib types on the fly + * - creates empty geoms matching a previous set geomType + */ +public class FlexibleFeatureSchema extends FeatureSchema { + Class geometryClass = null; + GeometryFactory geometryFactory = new GeometryFactory(); + + public FlexibleFeatureSchema() { + } + + public FlexibleFeatureSchema(FeatureSchema featureSchema) { + super(featureSchema); + } + + public void setAttributeType(int attributeIndex, AttributeType type) { + attributeTypes.set(attributeIndex, type); + } + + public void setAttributeType(String name, AttributeType type) { + setAttributeType(super.getAttributeIndex(name), type); + } + + public void setGeometryType(Class clazz) { + geometryClass = clazz; + } + + public Class getGeometryType() { + return geometryClass; + } + + /** + * creates an empty geometry matching the geom type set already or an empty + * geom collection if that fails + * + * @return geometry + */ + public Geometry createEmptyGeometry() { + if (geometryClass != null) { + try { + return GeometryUtils + .createEmptyGeometry(geometryClass, geometryFactory); + } catch (Exception e) { + Logger.debug(e); + } + } + + return geometryFactory.createGeometryCollection(null); + } + +} Property changes on: core/trunk/src/com/vividsolutions/jump/feature/FlexibleFeatureSchema.java ___________________________________________________________________ Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Modified: core/trunk/src/com/vividsolutions/jump/io/GMLInputTemplate.java =================================================================== --- core/trunk/src/com/vividsolutions/jump/io/GMLInputTemplate.java 2017-07-30 16:11:49 UTC (rev 5481) +++ core/trunk/src/com/vividsolutions/jump/io/GMLInputTemplate.java 2017-07-30 16:26:14 UTC (rev 5482) @@ -36,7 +36,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; +import java.util.HashMap; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -46,7 +49,7 @@ import org.xml.sax.helpers.DefaultHandler; import com.vividsolutions.jump.feature.AttributeType; -import com.vividsolutions.jump.feature.FeatureSchema; +import com.vividsolutions.jump.feature.FlexibleFeatureSchema; import com.vividsolutions.jump.util.FlexibleDateParser; @@ -87,6 +90,16 @@ private String lastStartTag_qName; private Attributes lastStartTag_atts; + // a list of date attrib types that are treated differently + static private ArrayList<AttributeType> dateAttributeTypes = new ArrayList(); + static { + dateAttributeTypes.add(AttributeType.DATE); + dateAttributeTypes.add(AttributeType.TIMESTAMP); + dateAttributeTypes.add(AttributeType.TIME); + } + + private HashMap<AttributeType, Constructor> constructorCache = new HashMap(); + /** * constructor - makes a new org.apache.xerces.parser and makes this class be the SAX * content and error handler. @@ -114,13 +127,13 @@ /** * Converts this GMLInputTemplate to a feature schema. */ - public FeatureSchema toFeatureSchema() throws ParseException { + public FlexibleFeatureSchema toFeatureSchema() throws ParseException { if (!(loaded)) { throw new ParseException( "requested toFeatureSchema w/o loading the template"); } - FeatureSchema fcmd = new FeatureSchema(); + FlexibleFeatureSchema fcmd = new FlexibleFeatureSchema(); fcmd.addAttribute("GEOMETRY", AttributeType.GEOMETRY); @@ -291,86 +304,110 @@ val = xmlAtts.getValue(cd.valueAttribute); } - //have the value as a string, make it an object - if (cd.type == AttributeType.STRING || - cd.type == AttributeType.VARCHAR || - cd.type == AttributeType.LONGVARCHAR || - cd.type == AttributeType.CHAR || - cd.type == AttributeType.TEXT) { - return val; - } - - if (cd.type == AttributeType.INTEGER || - cd.type == AttributeType.SMALLINT || - cd.type == AttributeType.TINYINT) { - try { - //Was Long, but JUMP expects AttributeType.INTEGER to hold Integers. - //e.g. open JML file then save as Shapefile => get ClassCastException. - //Dave Blasby says there was a reason for changing it to Long, but - //can't remember -- suspects there were datasets whose INTEGER - //values didn't fit in an Integer. [Jon Aquino 1/13/2004] - - //Compromise -- try Long if Integer fails. Some other parts of JUMP - //won't like it (exceptions), but it's better than null. Actually I don't like - //this null business -- future: warn the user. [Jon Aquino 1/13/2004] - return Integer.parseInt(val); - } catch (Exception e) { - return null; + //try running the default constructor from string for the class defined in AttributeType + if (!dateAttributeTypes.contains(cd.type)) + try { + Constructor c = null; + c = constructorCache.get(cd.type); + if (c == null) { + c = cd.type.toJavaClass().getConstructor(new Class[] { String.class }); + constructorCache.put(cd.type, c); } + + return c.newInstance(val); + } catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException + | SecurityException e) { + // TODO not sure if ignoring is safe, on the other hand we are currently + // processing lots of attributes, we can't just spit out a plethora of + // error + } catch (NoSuchMethodException e) { + // TODO maybe printing warning for classes without a String + // constructor?; + } + + // we can return the string safely as the attrib is lazily converted by FlexibleFeature + // REASON: utilizing FlexibleDateParser slowed down reading JML by magnitudes here + if (dateAttributeTypes.contains(cd.type)) { + return val; } - - if (cd.type == AttributeType.LONG || cd.type == AttributeType.BIGINT) { - try { - return Long.parseLong(val); - } catch (Exception e) { - return null; - } - } - - if (cd.type == AttributeType.DOUBLE || - cd.type == AttributeType.REAL || - cd.type == AttributeType.FLOAT || - cd.type == AttributeType.NUMERIC || - cd.type == AttributeType.DECIMAL || - cd.type == AttributeType.BIGDECIMAL) { - try { - return new Double(val); - } catch (Exception e) { - return null; - } - } - //Adding date support. Can we throw an exception if an exception - //occurs or if the type is unrecognized? [Jon Aquino] - if (cd.type == AttributeType.DATE || - cd.type == AttributeType.TIMESTAMP || - cd.type == AttributeType.TIME) { - try { - return dateParser.parse(val, false); - } catch (Exception e) { - return null; - } - } + // previous code for attrib parsing, rplaced by the above 07.2017 [ede] +// //have the value as a string, make it an object +// if (cd.type == AttributeType.STRING || +// cd.type == AttributeType.VARCHAR || +// cd.type == AttributeType.LONGVARCHAR || +// cd.type == AttributeType.CHAR || +// cd.type == AttributeType.TEXT) { +// return val; +// } +// +// if (cd.type == AttributeType.INTEGER || +// cd.type == AttributeType.SMALLINT || +// cd.type == AttributeType.TINYINT) { +// try { +// //Was Long, but JUMP expects AttributeType.INTEGER to hold Integers. +// //e.g. open JML file then save as Shapefile => get ClassCastException. +// //Dave Blasby says there was a reason for changing it to Long, but +// //can't remember -- suspects there were datasets whose INTEGER +// //values didn't fit in an Integer. [Jon Aquino 1/13/2004] +// +// //Compromise -- try Long if Integer fails. Some other parts of JUMP +// //won't like it (exceptions), but it's better than null. Actually I don't like +// //this null business -- future: warn the user. [Jon Aquino 1/13/2004] +// return Integer.parseInt(val); +// } catch (Exception e) { +// return null; +// } +// } +// +// if (cd.type == AttributeType.LONG || cd.type == AttributeType.BIGINT) { +// try { +// return Long.parseLong(val); +// } catch (Exception e) { +// return null; +// } +// } +// +// if (cd.type == AttributeType.DOUBLE || +// cd.type == AttributeType.REAL || +// cd.type == AttributeType.FLOAT || +// cd.type == AttributeType.NUMERIC || +// cd.type == AttributeType.DECIMAL || +// cd.type == AttributeType.BIGDECIMAL) { +// try { +// return new Double(val); +// } catch (Exception e) { +// return null; +// } +// } +// +//// //Adding date support. Can we throw an exception if an exception +//// //occurs or if the type is unrecognized? [Jon Aquino] +//// if (dateAttributeTypes.contains(cd.type)) { +//// try { +//// return dateParser.parse(val, false); +//// } catch (Exception e) { +//// return null; +//// } +//// } +// +// if (cd.type == AttributeType.BOOLEAN || cd.type == AttributeType.BIT) { +// try { +// return Boolean.parseBoolean(val); +// } catch (Exception e) { +// return null; +// } +// } +// +// if (cd.type == AttributeType.OBJECT) +// { +// return val; // the GML file has text in it and we want to convert it to an "object" +// // just return a String since we dont know anything else about it! +// } - if (cd.type == AttributeType.BOOLEAN || cd.type == AttributeType.BIT) { - try { - return Boolean.parseBoolean(val); - } catch (Exception e) { - return null; - } - } - - if (cd.type == AttributeType.OBJECT) - { - return val; // the GML file has text in it and we want to convert it to an "object" - // just return a String since we dont know anything else about it! - } - return null; //unknown type } - private FlexibleDateParser dateParser = new FlexibleDateParser(); - //////////////////////////////////////////////////////////////////// // Error handlers. //////////////////////////////////////////////////////////////////// Modified: core/trunk/src/com/vividsolutions/jump/io/GMLReader.java =================================================================== --- core/trunk/src/com/vividsolutions/jump/io/GMLReader.java 2017-07-30 16:11:49 UTC (rev 5481) +++ core/trunk/src/com/vividsolutions/jump/io/GMLReader.java 2017-07-30 16:26:14 UTC (rev 5482) @@ -31,9 +31,39 @@ */ package com.vividsolutions.jump.io; -import com.vividsolutions.jts.geom.*; +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.StringTokenizer; +import java.util.regex.Pattern; + +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; + +import com.vividsolutions.jts.geom.Coordinate; +import com.vividsolutions.jts.geom.Geometry; +import com.vividsolutions.jts.geom.GeometryFactory; +import com.vividsolutions.jts.geom.LineString; +import com.vividsolutions.jts.geom.LinearRing; +import com.vividsolutions.jts.geom.Point; +import com.vividsolutions.jts.geom.Polygon; +import com.vividsolutions.jts.geom.PrecisionModel; import com.vividsolutions.jump.I18N; -import com.vividsolutions.jump.feature.*; +import com.vividsolutions.jump.feature.AttributeType; +import com.vividsolutions.jump.feature.Feature; +import com.vividsolutions.jump.feature.FeatureCollection; +import com.vividsolutions.jump.feature.FeatureDataset; +import com.vividsolutions.jump.feature.FlexibleFeature; +import com.vividsolutions.jump.feature.FlexibleFeatureSchema; import com.vividsolutions.jump.task.DummyTaskMonitor; import com.vividsolutions.jump.task.TaskMonitor; import com.vividsolutions.jump.task.TaskMonitorSupport; @@ -40,15 +70,6 @@ import com.vividsolutions.jump.task.TaskMonitorUtil; import com.vividsolutions.jump.util.Timer; -import org.xml.sax.*; -import org.xml.sax.helpers.DefaultHandler; - -import java.io.*; -import java.util.Collection; -import java.util.List; -import java.util.ArrayList; -import java.util.StringTokenizer; - /** * GMLReader is a {@link JUMPReader} specialized to read GML files. * @@ -264,7 +285,7 @@ private Feature currentFeature; private int currentGeometryNumb = 1; private FeatureCollection fc; - private FeatureSchema fcmd; // list of geometries + private FlexibleFeatureSchema fcmd; // list of geometries private Geometry finalGeometry; // list of geometrycollections - list of list of // geometry private String current_geom_qname = ""; @@ -287,7 +308,7 @@ // (Coordinate) private Polygon polygon; // polygon - // higherlevel geomery object + // higher level geometry object private ArrayList<ArrayList> recursivegeometry = new ArrayList<>(); // low-level geometry objects @@ -305,6 +326,15 @@ */ public boolean multiItemsAsLists = false; + // precompiled patterns for performance reasons + private static Pattern regex_geomMultiPoint, regex_geomMultiLineString, regex_geomMultiPolygon, regex_geomLinearRing; + static { + regex_geomMultiPoint = Pattern.compile("^(?i)(gml:)?multipoint$"); + regex_geomMultiLineString = Pattern.compile("^(?i)(gml:)?multilinestring$"); + regex_geomMultiPolygon = Pattern.compile("^(?i)(gml:)?multipolygon$"); + regex_geomLinearRing = Pattern.compile("^(?i)(gml:)?linearring$"); + } + /** * Constructor - make a SAXParser and have this GMLReader be its * ContentHandler and ErrorHandler. @@ -435,13 +465,13 @@ linearRing = null; } - if (current_geom_qname.matches("^(?i)(gml:)?multipoint$")) + if (regex_geomMultiPoint.matcher(current_geom_qname).matches()) finalGeometry = geometryFactory .createMultiPoint(geometry.toArray(new Point[0])); - else if (current_geom_qname.matches("^(?i)(gml:)?multilinestring$")) + else if (regex_geomMultiLineString.matcher(current_geom_qname).matches()) finalGeometry = geometryFactory .createMultiLineString((geometry.toArray(new LineString[0]))); - else if (current_geom_qname.matches("^(?i)(gml:)?multipolygon$")) + else if (regex_geomMultiPolygon.matcher(current_geom_qname).matches()) finalGeometry = geometryFactory .createMultiPolygon((geometry.toArray(new Polygon[0]))); // else if (current_geom_qname.matches("^(?i)(gml:)?linearring$")) @@ -841,7 +871,7 @@ && (qName.compareToIgnoreCase(GMLinput.featureTag) == 0)) { // found the feature tag // System.out.println("found feature"); - currentFeature = new BasicFeature(fcmd); + currentFeature = new FlexibleFeature(fcmd); STATE = STATE_GET_COLUMNS; SRID = 0;// default SRID (reset for each feature, but should be constant // for a featurecollection) ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ Jump-pilot-devel mailing list Jump-pilot-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/jump-pilot-devel