Hello,
I've attached an ugly python script that does some manipulations to
relations in an OSM file it reads from stdin. In particular, for
relations that aren't degenerate, it puts role=outer on the largest
polygon and role=inner on all others. It also removes tags from inner
ways that are the same as on the outer way.
It only works on multipolygons that consist of closed ways. There's a
lot of multipolygons with two opposing riverbanks in the database.
Also some worse ones like http://api.openstreetmap.org/api/0.5/
relation/240. These really aren't multipolygons and should be
relations of a different type -- should I modify them to
type=multi_way_area or something?
There's both relations with less than two members, and ways with less
than two nodes in here. Ok if I delete these?
Here's some statistics for the relations with id up to 500:
393 relations
235 with non-closed ways
53 with less than two members
32 containing ways with less than two nodes
56 modified relations
122 modified ways
Cheers
Robert
from osmxml import *
nodes = {}
ways = {}
relations = {}
def area(way):
global nodes
nds = way.nodes
assert len(nds) > 2, "less than two nodes in way %d" % way.id
assert nds[0] == nds[-1], "trying to compute area for non-closed way: %d" % way.id
coords = [ (nodes[id].lat,nodes[id].lon) for id in nds ]
area = 0
for i in range(1, len(coords)):
area += coords[i][0] * coords[i-1][1] - coords[i-1][0] * coords[i][1]
return 0.5 * abs(area)
from xml.dom import minidom
def splitosm(dom):
assert dom.documentElement.nodeName == u'osm'
global nodes
global ways
global relations
nodes = {}
ways = {}
relations = {}
for elem in dom.documentElement.childNodes:
if elem.nodeType == minidom.Element.nodeType:
id = int(elem.getAttribute("id"))
if elem.nodeName == 'node':
obj = Node(elem)
nodes[obj.id] = obj
elif elem.nodeName == 'way':
obj = Way(elem)
ways[obj.id] = obj
elif elem.nodeName == 'relation':
obj = Relation(elem)
relations[obj.id] = obj
def fix(filein, fileout):
dom = minidom.parse(filein)
splitosm(dom)
global deleted_tags
deleted_tags = {}
for r in relations.values():
try:
type = r["type"]
except KeyError:
sys.stderr.write("rel %d: no type, delete?\n" % r.id)
continue
if r["type"] == "multipolygon":
fixmultipolygon(r)
for r in relations.values():
if r._modified:
r.action = "modify"
for w in ways.values():
if w._modified:
w.action = "modify"
sys.stderr.write("%s\n" % str(deleted_tags))
fileout.write(dom.toxml("utf-8"))
# some more things we could check:
# * tags on the relation
# * member ways with roles other than "", inner, outer
# tags that should not be removed from inner polygons
ignored_tags = ["created_by"]
def fixmultipolygon(rel):
assert rel["type"] == "multipolygon"
if len(rel.members) < 2:
sys.stderr.write("rel %d: multipolygon with less than two members, delete?\n" % rel.id)
return
mnodes = [ m for m in rel.members if m.type == 'node' ]
if mnodes:
sys.stderr.write("rel %d: multipolygon with nodes: %s\n" % (rel.id, mnodes))
return
mrels = [ m for m in rel.members if m.type == 'relation' ]
if mrels:
sys.stderr.write("rel %d: multipolygon with relations: %s\n" % (rel.id, mrels))
return
mways = [ m for m in rel.members if m.type == 'way' ]
rempty = [ m for m in mways if m.role == '' ]
rinner = [ m for m in mways if m.role == 'inner' ]
router = [ m for m in mways if m.role == 'outer' ]
if len(router) > 1:
sys.stderr.write("rel %d: more than one outer way\n" % rel.id)
return
if len(router) == 1:
if rempty:
log("one outer, some empty")
for m in rempty:
log('setting role: inner')
m.role = 'inner'
if len(router) == 0:
try:
byarea = [ (m, area(ways[m.ref])) for m in rempty ]
except AssertionError, e:
sys.stderr.write("rel %d: %s\n" % (rel.id,str(e)))
return
byarea.sort(lambda x, y: cmp(y[1], x[1]))
rempty = [ m for (m,s) in byarea ]
rempty[0].role = 'outer'
for m in rempty[1:]:
m.role = 'inner'
# now delete outer tags from inner ways
rempty = [ m for m in mways if m.role == '' ]
rinner = [ m for m in mways if m.role == 'inner' ]
router = [ m for m in mways if m.role == 'outer' ]
assert not rempty
assert len(router) == 1
outer = ways[router[0].ref]
rkeys = [ k for k in outer.keys() if k not in ignored_tags ]
for m in rinner:
inner = ways[m.ref]
for k in rkeys:
if inner.get(k, "") == outer[k]:
del inner[k]
try:
deleted_tags[k] += 1
except:
deleted_tags[k] = 1
import sys
if __name__ == "__main__":
fix(sys.stdin, sys.stdout)
class Element(object):
def __init__(self, elem):
object.__init__(self)
self._elem = elem
self._modified = False
self._tags = {}
for t in elem.getElementsByTagName("tag"):
self._tags[t.attributes["k"].value] = t
def _getattr(self, name):
"returns an attribute's value (unicode)"
return self._elem.attributes[name].value
def _setattr(self, name, value):
"sets an attribute's value (unicode)"
self._elem.attributes[name] = value
self._modified = True
def getid(self):
return int(self._getattr("id"))
id = property(getid)
def getaction(self):
return self._getattr("action")
def setaction(self, value):
self._setattr("action", value)
action = property(getaction, setaction)
# handling of tags
def __getitem__(self, name):
return self._tags[name].attributes["v"].value
def __setitem__(self, name, value):
if value == "":
self.__delitem__(name)
t = self._tags.get(name)
if t is not None:
if t.attributes["v"].value != value:
t.attributes["v"].value = value
self._modified = True
else:
t = self._elem.ownerDocument.createElement("tag")
t.setAttribute(name, value)
self._elem.appendChild(t)
self._modified = True
def __delitem__(self, name):
"delete given tag if present, no error otherwise"
t = self._tags.get(name)
if t is not None:
self._elem.removeChild(t)
self._modified = True
def keys(self):
return self._tags.keys()
def get(self, name, default=None):
try:
return self[name]
except:
return default
class Node(Element):
def __init__(self, elem):
Element.__init__(self, elem)
def getlat(self):
return float(self._getattr("lat"))
lat = property(getlat)
def getlon(self):
return float(self._getattr("lon"))
lon = property(getlon)
class Way(Element):
def __init__(self, elem):
Element.__init__(self, elem)
self.nodes = [ int(n.attributes["ref"].value) for n in elem.getElementsByTagName("nd") ]
def isclosed(self):
return self.nodes[0] == self.nodes[-1]
closed = property(isclosed)
class Relation(Element):
def __init__(self, elem):
Element.__init__(self, elem)
self.members = [ Member(m,self) for m in elem.getElementsByTagName("member") ]
class Member(object):
def __init__(self, domnode, rel):
self._node = domnode
self._rel = rel
def setrole(self, role):
self._node.setAttribute('role', role)
self._rel._modified = True
def getrole(self):
return self._node.getAttribute('role')
role = property(getrole, setrole)
def gettype(self):
return self._node.getAttribute('type')
type = property(gettype)
def getref(self):
return int(self._node.getAttribute('ref'))
ref = property(getref)
_______________________________________________
talk mailing list
[email protected]
http://lists.openstreetmap.org/cgi-bin/mailman/listinfo/talk