Hello,

I've attached an ugly python script that does some manipulations to relations in an OSM file it reads from stdin. In particular, for relations that aren't degenerate, it puts role=outer on the largest polygon and role=inner on all others. It also removes tags from inner ways that are the same as on the outer way.

It only works on multipolygons that consist of closed ways. There's a lot of multipolygons with two opposing riverbanks in the database. Also some worse ones like http://api.openstreetmap.org/api/0.5/ relation/240. These really aren't multipolygons and should be relations of a different type -- should I modify them to type=multi_way_area or something?

There's both relations with less than two members, and ways with less than two nodes in here. Ok if I delete these?

Here's some statistics for the relations with id up to 500:

393 relations
235 with non-closed ways
 53 with less than two members
 32 containing ways with less than two nodes
 56 modified relations

122 modified ways

Cheers
Robert

from osmxml import *

nodes = {}
ways = {}
relations = {}

def area(way):
	global nodes
	nds = way.nodes
	assert len(nds) > 2, "less than two nodes in way %d" % way.id
	assert nds[0] == nds[-1], "trying to compute area for non-closed way: %d" % way.id
	coords = [ (nodes[id].lat,nodes[id].lon) for id in nds ]
	area = 0
	for i in range(1, len(coords)):
		area += coords[i][0] * coords[i-1][1] - coords[i-1][0] * coords[i][1]
	return 0.5 * abs(area)

from xml.dom import minidom

def splitosm(dom):
	assert dom.documentElement.nodeName == u'osm'
	global nodes
	global ways
	global relations
	nodes = {}
	ways = {}
	relations = {}
	for elem in dom.documentElement.childNodes:
		if elem.nodeType == minidom.Element.nodeType:
			id = int(elem.getAttribute("id"))
			if elem.nodeName == 'node':
				obj = Node(elem)
				nodes[obj.id] = obj
			elif elem.nodeName == 'way':
				obj = Way(elem)
				ways[obj.id] = obj
			elif elem.nodeName == 'relation':
				obj = Relation(elem)
				relations[obj.id] = obj

def fix(filein, fileout):
	dom = minidom.parse(filein)
	splitosm(dom)

	global deleted_tags
	deleted_tags = {}

	for r in relations.values():
		try:
			type = r["type"]
		except KeyError:
			sys.stderr.write("rel %d: no type, delete?\n" % r.id)
			continue
		if r["type"] == "multipolygon":
			fixmultipolygon(r)

	for r in relations.values():
		if r._modified:
			r.action = "modify"
	for w in ways.values():
		if w._modified:
			w.action = "modify"

	sys.stderr.write("%s\n" % str(deleted_tags))

	fileout.write(dom.toxml("utf-8")) 

# some more things we could check:
#  * tags on the relation
#  * member ways with roles other than "", inner, outer

# tags that should not be removed from inner polygons
ignored_tags = ["created_by"]

def fixmultipolygon(rel):
	assert rel["type"] == "multipolygon"

	if len(rel.members) < 2:
		sys.stderr.write("rel %d: multipolygon with less than two members, delete?\n" % rel.id)
		return

	mnodes = [ m for m in rel.members if m.type == 'node' ]
	if mnodes:
		sys.stderr.write("rel %d: multipolygon with nodes: %s\n" % (rel.id, mnodes))
		return

	mrels = [ m for m in rel.members if m.type == 'relation' ]
	if mrels:
		sys.stderr.write("rel %d: multipolygon with relations: %s\n" % (rel.id, mrels))
		return

	mways = [ m for m in rel.members if m.type == 'way' ]
	rempty = [ m for m in mways if m.role == '' ]
	rinner = [ m for m in mways if m.role == 'inner' ]
	router = [ m for m in mways if m.role == 'outer' ]

	if len(router) > 1:
		sys.stderr.write("rel %d: more than one outer way\n" % rel.id)
		return
	if len(router) == 1:
		if rempty:
			log("one outer, some empty")
		for m in rempty:
			log('setting role: inner')
			m.role = 'inner'
	if len(router) == 0:
		try:
			byarea = [ (m, area(ways[m.ref])) for m in rempty ]
		except AssertionError, e:
			sys.stderr.write("rel %d: %s\n" % (rel.id,str(e)))
			return
		byarea.sort(lambda x, y: cmp(y[1], x[1]))
		rempty = [ m for (m,s) in byarea ]
		rempty[0].role = 'outer'
		for m in rempty[1:]:
			m.role = 'inner'

	# now delete outer tags from inner ways
	rempty = [ m for m in mways if m.role == '' ]
	rinner = [ m for m in mways if m.role == 'inner' ]
	router = [ m for m in mways if m.role == 'outer' ]
	assert not rempty
	assert len(router) == 1

	outer = ways[router[0].ref]
	rkeys = [ k for k in outer.keys() if k not in ignored_tags ]
	for m in rinner:
		inner = ways[m.ref]
		for k in rkeys:
			if inner.get(k, "") == outer[k]:
				del inner[k]
				try:
					deleted_tags[k] += 1
				except:
					deleted_tags[k] = 1


import sys

if __name__ == "__main__":
	fix(sys.stdin, sys.stdout)

class Element(object):
	def __init__(self, elem):
		object.__init__(self)
		self._elem = elem
		self._modified = False
		self._tags = {}
		for t in elem.getElementsByTagName("tag"):
			self._tags[t.attributes["k"].value] = t

	def _getattr(self, name):
		"returns an attribute's value (unicode)"
		return self._elem.attributes[name].value

	def _setattr(self, name, value):
		"sets an attribute's value (unicode)"
		self._elem.attributes[name] = value
		self._modified = True

	def getid(self):
		return int(self._getattr("id"))
	id = property(getid)

	def getaction(self):
		return self._getattr("action")
	def setaction(self, value):
		self._setattr("action", value)
	action = property(getaction, setaction)

	# handling of tags

	def __getitem__(self, name):
		return self._tags[name].attributes["v"].value

	def __setitem__(self, name, value):
		if value == "":
			self.__delitem__(name)
		t = self._tags.get(name)
		if t is not None:
			if t.attributes["v"].value != value:
				t.attributes["v"].value = value
				self._modified = True
		else:
			t = self._elem.ownerDocument.createElement("tag")
			t.setAttribute(name, value)
			self._elem.appendChild(t)
			self._modified = True

	def __delitem__(self, name):
		"delete given tag if present, no error otherwise"
		t = self._tags.get(name)
		if t is not None:
			self._elem.removeChild(t)
			self._modified = True

	def keys(self):
		return self._tags.keys()

	def get(self, name, default=None):
		try:
			return self[name]
		except:
			return default

class Node(Element):
	def __init__(self, elem):
		Element.__init__(self, elem)

	def getlat(self):
		return float(self._getattr("lat"))
	lat = property(getlat)

	def getlon(self):
		return float(self._getattr("lon"))
	lon = property(getlon)

class Way(Element):
	def __init__(self, elem):
		Element.__init__(self, elem)
		self.nodes = [ int(n.attributes["ref"].value) for n in elem.getElementsByTagName("nd") ]

	def isclosed(self):
		return self.nodes[0] == self.nodes[-1]

	closed = property(isclosed)

class Relation(Element):
	def __init__(self, elem):
		Element.__init__(self, elem)
		self.members = [ Member(m,self) for m in elem.getElementsByTagName("member") ]

class Member(object):
	def __init__(self, domnode, rel):
		self._node = domnode
		self._rel = rel

	def setrole(self, role):
		self._node.setAttribute('role', role)
		self._rel._modified = True

	def getrole(self):
		return self._node.getAttribute('role')

	role = property(getrole, setrole)

	def gettype(self):
		return self._node.getAttribute('type')
	type = property(gettype)

	def getref(self):
		return int(self._node.getAttribute('ref'))
	ref = property(getref)
_______________________________________________
talk mailing list
[email protected]
http://lists.openstreetmap.org/cgi-bin/mailman/listinfo/talk

Reply via email to