Hi,
in order to speed up rendering in QGIS as a part of my GSoC project,
I've took some time to profile reading of shapefiles in OGR. From the
results I'd like to suggest some changes that significantly contribute
to the speed of data retrieval. On a test shapefile of a road network
(about 100 thousand polylines), I have seen 3-4 times faster retrieval
when I've implemented the following changes:
1. allow users of OGR library set which fields they really need. Most
of time is wasted by fetching all the attributes, but typically none
or just one attribute is necessary when rendering. For that, I've
added the following call:
OGRLayer::SetDesiredFields(int numFields, int* fields);
The user passes an array of ints, each item tells whether the field
should be fetched (1) or not (0). The numFields tells the size of the
array. If numFields < 0 then the layer will return all fields (default
behavior). The driver implementation then just before fetching a field
checks whether to fetch the field or not. This optimization could be
easily used in any driver, I've implemented it only for shapefiles.
The speedup will vary depending on the size of the attribute table and
number of desired fields. On my test shapefile containing 16 fields,
the data has been fetched up to 3x faster when no fields were set as
desired.
2. reuse allocated memory. When a new shape is going to be read within
shapelib, new OGRShape object and its coordinate arrays are allocated.
By reusing one such temporary OGRShape object within a layer together
with the coordinate arrays (only allowing them to grow - to
accommodate larger shapes), I have obtained further speedup of about
30%.
I'm attaching patch for both cases. I'd like to hear from you if there
is interest in making the OGR library faster using the suggested
strategies. I don't expect the patch to be applied as-is since it is
kind of a quick hack, though I hope it can serve well for a
discussion.
If there is interest, I think of some further optimizations by reusing
OGRFeature instances and possibly also geometries - I expect further
performance improvement of 10-20% in read access to features for all
drivers.
Regards
Martin
Index: ogr/ogr_feature.h
===================================================================
--- ogr/ogr_feature.h (revision 20106)
+++ ogr/ogr_feature.h (working copy)
@@ -130,6 +130,7 @@
int GetFieldCount() { return nFieldCount; }
OGRFieldDefn *GetFieldDefn( int i );
+ OGRFieldDefn**GetFieldDefns() { return papoFieldDefn; }
int GetFieldIndex( const char * );
void AddFieldDefn( OGRFieldDefn * );
Index: ogr/ogrsf_frmts/shape/shapefil.h
===================================================================
--- ogr/ogrsf_frmts/shape/shapefil.h (revision 20106)
+++ ogr/ogrsf_frmts/shape/shapefil.h (working copy)
@@ -344,6 +344,10 @@
double dfMMax;
int bMeasureIsUsed;
+
+ int nMaxParts;
+ int nMaxPoints;
+
} SHPObject;
/* -------------------------------------------------------------------- */
@@ -367,7 +371,7 @@
double * padfMinBound, double * padfMaxBound );
SHPObject SHPAPI_CALL1(*)
- SHPReadObject( SHPHandle hSHP, int iShape );
+ SHPReadObject( SHPHandle hSHP, int iShape, SHPObject * oldObject );
int SHPAPI_CALL
SHPWriteObject( SHPHandle hSHP, int iShape, SHPObject * psObject );
Index: ogr/ogrsf_frmts/shape/ogrshapelayer.cpp
===================================================================
--- ogr/ogrsf_frmts/shape/ogrshapelayer.cpp (revision 20106)
+++ ogr/ogrsf_frmts/shape/ogrshapelayer.cpp (working copy)
@@ -55,6 +55,17 @@
OGRwkbGeometryType eReqType )
{
+ m_psTmpShape = (SHPObject *) calloc(1,sizeof(SHPObject));
+ m_psTmpShape->panPartStart = NULL;
+ m_psTmpShape->panPartType = NULL;
+ m_psTmpShape->padfX = NULL;
+ m_psTmpShape->padfY = NULL;
+ m_psTmpShape->padfZ = NULL;
+ m_psTmpShape->padfM = NULL;
+ m_psTmpShape->nMaxParts = 0;
+ m_psTmpShape->nMaxPoints = 0;
+ //m_psTmpShape = NULL;
+
poSRS = poSRSIn;
pszFullName = CPLStrdup(pszName);
@@ -80,6 +91,7 @@
hSHP, hDBF );
eRequestedGeomType = eReqType;
+
}
/************************************************************************/
@@ -95,6 +107,8 @@
(int) m_nFeaturesRead,
poFeatureDefn->GetName() );
}
+
+ //SHPDestroyObject(m_psTmpShape);
CPLFree( panMatchingFIDs );
panMatchingFIDs = NULL;
@@ -291,7 +305,7 @@
{
SHPObject *psShape;
- psShape = SHPReadObject( hSHP, iShapeId );
+ psShape = SHPReadObject( hSHP, iShapeId, m_psTmpShape );
// do not trust degenerate bounds or bounds on null shapes.
if( psShape == NULL || psShape->dfXMin == psShape->dfXMax
@@ -299,26 +313,26 @@
|| psShape->nSHPType == SHPT_NULL )
{
poFeature = SHPReadOGRFeature( hSHP, hDBF, poFeatureDefn,
- iShapeId, psShape );
+ iShapeId, psShape, m_psTmpShape, m_nDesiredCount, m_pnDesiredFields );
}
else if( m_sFilterEnvelope.MaxX < psShape->dfXMin
|| m_sFilterEnvelope.MaxY < psShape->dfYMin
|| psShape->dfXMax < m_sFilterEnvelope.MinX
|| psShape->dfYMax < m_sFilterEnvelope.MinY )
{
- SHPDestroyObject(psShape);
+ //SHPDestroyObject(psShape);
poFeature = NULL;
}
else
{
poFeature = SHPReadOGRFeature( hSHP, hDBF, poFeatureDefn,
- iShapeId, psShape );
+ iShapeId, psShape, m_psTmpShape, m_nDesiredCount, m_pnDesiredFields );
}
}
else
{
poFeature = SHPReadOGRFeature( hSHP, hDBF, poFeatureDefn,
- iShapeId, NULL );
+ iShapeId, NULL, m_psTmpShape, m_nDesiredCount, m_pnDesiredFields );
}
return poFeature;
@@ -413,7 +427,7 @@
{
OGRFeature *poFeature = NULL;
- poFeature = SHPReadOGRFeature( hSHP, hDBF, poFeatureDefn, nFeatureId, NULL);
+ poFeature = SHPReadOGRFeature( hSHP, hDBF, poFeatureDefn, nFeatureId, NULL, m_psTmpShape, m_nDesiredCount, m_pnDesiredFields );
if( poFeature != NULL )
{
@@ -1240,7 +1254,7 @@
{
SHPObject *hObject;
- hObject = SHPReadObject( hSHP, iShape );
+ hObject = SHPReadObject( hSHP, iShape, NULL );
if( hObject == NULL )
eErr = OGRERR_FAILURE;
else if( SHPWriteObject( hNewSHP, -1, hObject ) == -1 )
Index: ogr/ogrsf_frmts/shape/shape2ogr.cpp
===================================================================
--- ogr/ogrsf_frmts/shape/shape2ogr.cpp (revision 20106)
+++ ogr/ogrsf_frmts/shape/shape2ogr.cpp (working copy)
@@ -95,14 +95,14 @@
/* representation. */
/************************************************************************/
-OGRGeometry *SHPReadOGRObject( SHPHandle hSHP, int iShape, SHPObject *psShape )
+OGRGeometry *SHPReadOGRObject( SHPHandle hSHP, int iShape, SHPObject *psShape, SHPObject* psTmpShape )
{
// CPLDebug( "Shape", "SHPReadOGRObject( iShape=%d )\n", iShape );
OGRGeometry *poOGR = NULL;
if( psShape == NULL )
- psShape = SHPReadObject( hSHP, iShape );
+ psShape = SHPReadObject( hSHP, iShape, psTmpShape );
if( psShape == NULL )
{
@@ -442,7 +442,8 @@
/* -------------------------------------------------------------------- */
/* Cleanup shape, and set feature id. */
/* -------------------------------------------------------------------- */
- SHPDestroyObject( psShape );
+ if (!psTmpShape)
+ SHPDestroyObject( psShape );
return poOGR;
}
@@ -965,7 +966,8 @@
OGRFeature *SHPReadOGRFeature( SHPHandle hSHP, DBFHandle hDBF,
OGRFeatureDefn * poDefn, int iShape,
- SHPObject *psShape )
+ SHPObject *psShape, SHPObject* psTmpShape,
+ int nDesiredCount, int* pnDesiredFields)
{
if( iShape < 0
@@ -994,7 +996,7 @@
if( hSHP != NULL )
{
OGRGeometry* poGeometry = NULL;
- poGeometry = SHPReadOGRObject( hSHP, iShape, psShape );
+ poGeometry = SHPReadOGRObject( hSHP, iShape, psShape, psTmpShape );
/*
* NOTE - mloskot:
@@ -1014,6 +1016,9 @@
for( int iField = 0; iField < poDefn->GetFieldCount(); iField++ )
{
+ if (nDesiredCount >= 0 && iField < nDesiredCount && !pnDesiredFields[iField])
+ continue;
+
// Skip null fields.
if( DBFIsAttributeNULL( hDBF, iShape, iField ) )
continue;
Index: ogr/ogrsf_frmts/shape/shptree.c
===================================================================
--- ogr/ogrsf_frmts/shape/shptree.c (revision 20106)
+++ ogr/ogrsf_frmts/shape/shptree.c (working copy)
@@ -261,7 +261,7 @@
{
SHPObject *psShape;
- psShape = SHPReadObject( hSHP, iShape );
+ psShape = SHPReadObject( hSHP, iShape, NULL );
if( psShape != NULL )
{
SHPTreeAddShapeId( psTree, psShape );
Index: ogr/ogrsf_frmts/shape/shpopen.c
===================================================================
--- ogr/ogrsf_frmts/shape/shpopen.c (revision 20106)
+++ ogr/ogrsf_frmts/shape/shpopen.c (working copy)
@@ -1535,7 +1535,7 @@
/************************************************************************/
SHPObject SHPAPI_CALL1(*)
-SHPReadObject( SHPHandle psSHP, int hEntity )
+SHPReadObject( SHPHandle psSHP, int hEntity, SHPObject* oldShape )
{
int nEntitySize, nRequiredSize;
@@ -1615,7 +1615,10 @@
/* -------------------------------------------------------------------- */
/* Allocate and minimally initialize the object. */
/* -------------------------------------------------------------------- */
- psShape = (SHPObject *) calloc(1,sizeof(SHPObject));
+ if (oldShape)
+ psShape = oldShape;
+ else
+ psShape = (SHPObject *) calloc(1,sizeof(SHPObject));
psShape->nShapeId = hEntity;
psShape->bMeasureIsUsed = FALSE;
@@ -1624,7 +1627,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nEntitySize = %d",
hEntity, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
memcpy( &psShape->nSHPType, psSHP->pabyRec + 8, 4 );
@@ -1649,7 +1652,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nEntitySize = %d",
hEntity, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
/* -------------------------------------------------------------------- */
@@ -1681,7 +1684,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d, nPoints=%d, nParts=%d.",
hEntity, nPoints, nParts);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
@@ -1704,19 +1707,35 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d, nPoints=%d, nParts=%d, nEntitySize=%d.",
hEntity, nPoints, nParts, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
+ if (oldShape)
+ {
+ if (nPoints > psShape->nMaxPoints)
+ {
+ free(psShape->padfX);
+ free(psShape->padfY);
+ free(psShape->padfZ);
+ free(psShape->padfM);
+ psShape->padfX = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfY = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfZ = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfM = (double *) calloc(nPoints,sizeof(double));
+ psShape->nMaxPoints = nPoints;
+ }
+ if (nParts > psShape->nMaxParts)
+ {
+ free(psShape->panPartStart);
+ free(psShape->panPartType);
+ psShape->panPartStart = (int *) calloc(nParts,sizeof(int));
+ psShape->panPartType = (int *) calloc(nParts,sizeof(int));
+ psShape->nMaxParts = nParts;
+ }
+ }
psShape->nVertices = nPoints;
- psShape->padfX = (double *) calloc(nPoints,sizeof(double));
- psShape->padfY = (double *) calloc(nPoints,sizeof(double));
- psShape->padfZ = (double *) calloc(nPoints,sizeof(double));
- psShape->padfM = (double *) calloc(nPoints,sizeof(double));
-
psShape->nParts = nParts;
- psShape->panPartStart = (int *) calloc(nParts,sizeof(int));
- psShape->panPartType = (int *) calloc(nParts,sizeof(int));
if (psShape->padfX == NULL ||
psShape->padfY == NULL ||
@@ -1729,7 +1748,7 @@
"Not enough memory to allocate requested memory (nPoints=%d, nParts=%d) for shape %d. "
"Probably broken SHP file", hEntity, nPoints, nParts );
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
@@ -1751,7 +1770,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : panPartStart[%d] = %d, nVertices = %d",
hEntity, i, psShape->panPartStart[i], psShape->nVertices);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
if (i > 0 && psShape->panPartStart[i] <= psShape->panPartStart[i-1])
@@ -1759,7 +1778,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : panPartStart[%d] = %d, panPartStart[%d] = %d",
hEntity, i, psShape->panPartStart[i], i - 1, psShape->panPartStart[i - 1]);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
}
@@ -1861,7 +1880,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nEntitySize = %d",
hEntity, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
memcpy( &nPoints, psSHP->pabyRec + 44, 4 );
@@ -1873,7 +1892,7 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nPoints = %d",
hEntity, nPoints);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
@@ -1887,15 +1906,26 @@
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nPoints = %d, nEntitySize = %d",
hEntity, nPoints, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
+ if (oldShape)
+ {
+ if (nPoints > psShape->nMaxPoints)
+ {
+ free(psShape->padfX);
+ free(psShape->padfY);
+ free(psShape->padfZ);
+ free(psShape->padfM);
+ psShape->padfX = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfY = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfZ = (double *) calloc(nPoints,sizeof(double));
+ psShape->padfM = (double *) calloc(nPoints,sizeof(double));
+ psShape->nMaxPoints = nPoints;
+ }
+ }
psShape->nVertices = nPoints;
- psShape->padfX = (double *) calloc(nPoints,sizeof(double));
- psShape->padfY = (double *) calloc(nPoints,sizeof(double));
- psShape->padfZ = (double *) calloc(nPoints,sizeof(double));
- psShape->padfM = (double *) calloc(nPoints,sizeof(double));
if (psShape->padfX == NULL ||
psShape->padfY == NULL ||
@@ -1988,18 +2018,29 @@
{
int nOffset;
+ if (oldShape)
+ {
+ if (1 > psShape->nMaxPoints)
+ {
+ free(psShape->padfX);
+ free(psShape->padfY);
+ free(psShape->padfZ);
+ free(psShape->padfM);
+ psShape->padfX = (double *) calloc(1,sizeof(double));
+ psShape->padfY = (double *) calloc(1,sizeof(double));
+ psShape->padfZ = (double *) calloc(1,sizeof(double));
+ psShape->padfM = (double *) calloc(1,sizeof(double));
+ psShape->nMaxPoints = 1;
+ }
+ }
psShape->nVertices = 1;
- psShape->padfX = (double *) calloc(1,sizeof(double));
- psShape->padfY = (double *) calloc(1,sizeof(double));
- psShape->padfZ = (double *) calloc(1,sizeof(double));
- psShape->padfM = (double *) calloc(1,sizeof(double));
if (20 + 8 + (( psShape->nSHPType == SHPT_POINTZ ) ? 8 : 0)> nEntitySize)
{
snprintf(pszErrorMsg, 128, "Corrupted .shp file : shape %d : nEntitySize = %d",
hEntity, nEntitySize);
psSHP->sHooks.Error( pszErrorMsg );
- SHPDestroyObject(psShape);
+ if (!oldShape) SHPDestroyObject(psShape);
return NULL;
}
memcpy( psShape->padfX, psSHP->pabyRec + 12, 8 );
Index: ogr/ogrsf_frmts/shape/ogrshape.h
===================================================================
--- ogr/ogrsf_frmts/shape/ogrshape.h (revision 20106)
+++ ogr/ogrsf_frmts/shape/ogrshape.h (working copy)
@@ -39,8 +39,9 @@
/* ==================================================================== */
OGRFeature *SHPReadOGRFeature( SHPHandle hSHP, DBFHandle hDBF,
OGRFeatureDefn * poDefn, int iShape,
- SHPObject *psShape );
-OGRGeometry *SHPReadOGRObject( SHPHandle hSHP, int iShape, SHPObject *psShape );
+ SHPObject *psShape, SHPObject* psTmpShape,
+ int nDesiredCount, int* pnDesiredFields );
+OGRGeometry *SHPReadOGRObject( SHPHandle hSHP, int iShape, SHPObject *psShape, SHPObject* psTmpShape );
OGRFeatureDefn *SHPReadOGRFeatureDefn( const char * pszName,
SHPHandle hSHP, DBFHandle hDBF );
OGRErr SHPWriteOGRFeature( SHPHandle hSHP, DBFHandle hDBF,
@@ -79,6 +80,8 @@
FILE *fpQIX;
int CheckForQIX();
+
+ SHPObject *m_psTmpShape;
public:
OGRErr CreateSpatialIndex( int nMaxDepth );
Index: ogr/ogrsf_frmts/ogrsf_frmts.h
===================================================================
--- ogr/ogrsf_frmts/ogrsf_frmts.h (revision 20106)
+++ ogr/ogrsf_frmts/ogrsf_frmts.h (working copy)
@@ -107,6 +107,8 @@
virtual const char *GetFIDColumn();
virtual const char *GetGeometryColumn();
+
+ virtual void SetDesiredFields(int num, int* fields);
int Reference();
int Dereference();
@@ -126,6 +128,9 @@
int m_nRefCount;
GIntBig m_nFeaturesRead;
+
+ int m_nDesiredCount;
+ int* m_pnDesiredFields;
};
Index: ogr/ogrsf_frmts/generic/ogrlayer.cpp
===================================================================
--- ogr/ogrsf_frmts/generic/ogrlayer.cpp (revision 20106)
+++ ogr/ogrsf_frmts/generic/ogrlayer.cpp (working copy)
@@ -50,6 +50,9 @@
m_poFilterGeom = NULL;
m_bFilterIsEnvelope = FALSE;
+
+ m_nDesiredCount = -1; // all fields
+ m_pnDesiredFields = NULL;
}
/************************************************************************/
@@ -1031,3 +1034,36 @@
((OGRLayer *) hLayer)->SetStyleTable( (OGRStyleTable *) hStyleTable);
}
+
+/************************************************************************/
+/* SetDesiredFields() */
+/************************************************************************/
+
+void OGRLayer::SetDesiredFields(int num, int* fields)
+{
+ if (m_pnDesiredFields)
+ free(m_pnDesiredFields);
+
+ if (num < 0)
+ {
+ m_pnDesiredFields = NULL;
+ m_nDesiredCount = -1;
+ }
+ else
+ {
+ m_nDesiredCount = num;
+ m_pnDesiredFields = (int*) malloc(sizeof(int) * num);
+ memcpy(m_pnDesiredFields, fields, sizeof(int) * num);
+ }
+}
+
+/************************************************************************/
+/* OGR_L_SetDesiredFields() */
+/************************************************************************/
+
+void OGR_L_SetDesiredFields( OGRLayerH hLayer, int num, int* fields)
+{
+ VALIDATE_POINTER0( hLayer, "OGR_L_SetDesiredFields" );
+
+ ((OGRLayer *) hLayer)->SetDesiredFields(num, fields);
+}
Index: ogr/ogrfeature.cpp
===================================================================
--- ogr/ogrfeature.cpp (revision 20106)
+++ ogr/ogrfeature.cpp (working copy)
@@ -114,9 +114,10 @@
if( poGeometry != NULL )
delete poGeometry;
+ OGRFieldDefn** poFDefns = poDefn->GetFieldDefns();
for( int i = 0; i < poDefn->GetFieldCount(); i++ )
{
- OGRFieldDefn *poFDefn = poDefn->GetFieldDefn(i);
+ OGRFieldDefn *poFDefn = poFDefns[i];
if( !IsFieldSet(i) )
continue;
Index: ogr/ogr_api.h
===================================================================
--- ogr/ogr_api.h (revision 20106)
+++ ogr/ogr_api.h (working copy)
@@ -342,6 +342,7 @@
OGRStyleTableH CPL_DLL OGR_L_GetStyleTable( OGRLayerH );
void CPL_DLL OGR_L_SetStyleTableDirectly( OGRLayerH, OGRStyleTableH );
void CPL_DLL OGR_L_SetStyleTable( OGRLayerH, OGRStyleTableH );
+void CPL_DLL OGR_L_SetDesiredFields( OGRLayerH, int, int* );
/* OGRDataSource */
_______________________________________________
gdal-dev mailing list
[email protected]
http://lists.osgeo.org/mailman/listinfo/gdal-dev