Hi Folks,
Dan Mosedale over at Mozilla asked if I had anything that could diff ics
files. Last week I decided to whip together a script to compare ics
files, ignoring details like order that mess up normal diff tools.
The tool turned out to be useful to me today when I was tweaking the way
iCalendar data gets saved and re-serialized (although I realized I had a
bug which I just fixed). I'll probably add this script to the vobject
package one day, but for now, I thought other people might eventually
have a use for it, so it's attached.
This script:
- ignores timezones, i.e. it doesn't diff VTIMEZONEs, and it ignores
differences between 2PM PST and 5PM EST, and in fact now that I think
about it if you happen to try to diff an event with floating time and an
identical event in a timezone, it'll barf, because Python's timezone
support is sort of broken
- Pays attention to VEVENTs and VTODOs, not VJOURNALs or random crap at
the start of the ics file (X-CALENDAR-NAME and the like), although those
would be reasonably easy to add, it just didn't seem that useful at
first blush
- treats a UID/RECURRENCE-ID/SEQUENCE triple as being a unique event, so
if you have two events who only differ in, say, their sequence, it'll
just see them as two totally different events
- considers the order of parameters and content lines WITHIN a component
to be significant. This didn't seem like a big deal to me.
- I output only the bits that are different between similar
vevents/vtodos, with the exception that I go ahead and always output a
UID (maybe I should output SEQUENCE and RECURRENCE-ID, too) so you have
some hope of finding which event was different.
- The output is a pretty-print, not the original ics.
Sincerely,
Jeffrey
# Copyright (c) 2006 Open Source Applications Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from vobject.base import Component, getBehavior, newFromBehavior
def getSortKey(component):
def getUID(component):
return component.getChildValue('uid', '')
# it's not quite as simple as getUID, need to account for recurrenceID and
# sequence
def getSequence(component):
sequence = component.getChildValue('sequence', 0)
return "%05d" % int(sequence)
def getRecurrenceID(component):
recurrence_id = component.getChildValue('recurrence_id', None)
if recurrence_id is None:
return '0000-00-00'
else:
return recurrence_id.isoformat()
return getUID(component) + getSequence(component) +
getRecurrenceID(component)
def sortByUID(components):
return sorted(components, key=getSortKey)
def deleteExtraneous(component):
"""
Recursively walk the component's children, deleting extraneous details like
X-VOBJ-ORIGINAL-TZID.
"""
for comp in component.components():
deleteExtraneous(comp)
for line in component.lines():
if line.params.has_key('X-VOBJ-ORIGINAL-TZID'):
del line.params['X-VOBJ-ORIGINAL-TZID']
def diff(left, right):
"""
Take two VCALENDAR components, compare VEVENTs and VTODOs in them,
return a list of object pairs containing just UID and the bits
that didn't match, using None for objects that weren't present in one
version or the other.
When there are multiple ContentLines in one VEVENT, for instance many
DESCRIPTION lines, such lines original order is assumed to be
meaningful. Order is also preserved when comparing (the unlikely case
of) multiple parameters of the same type in a ContentLine
"""
def processComponentLists(leftList, rightList):
output = []
rightIndex = 0
rightListSize = len(rightList)
for comp in leftList:
if rightIndex >= rightListSize:
output.append((comp, None))
else:
leftKey = getSortKey(comp)
rightComp = rightList[rightIndex]
rightKey = getSortKey(rightComp)
while leftKey > rightKey:
output.append((None, rightComp))
rightIndex += 1
if rightIndex >= rightListSize:
output.append((comp, None))
break
else:
rightComp = rightList[rightIndex]
rightKey = getSortKey(rightComp)
if leftKey < rightKey:
output.append((comp, None))
elif leftKey == rightKey:
rightIndex += 1
matchResult = processComponentPair(comp, rightComp)
if matchResult is not None:
output.append(matchResult)
return output
def newComponent(name, body):
if body is None:
return None
else:
c = Component(name)
c.behavior = getBehavior(name)
c.isNative = True
return c
def processComponentPair(leftComp, rightComp):
"""
Return None if a match, or a pair of components including UIDs and
any differing children.
"""
leftChildKeys = leftComp.contents.keys()
rightChildKeys = rightComp.contents.keys()
differentContentLines = []
differentComponents = {}
for key in leftChildKeys:
rightList = rightComp.contents.get(key, [])
if isinstance(leftComp.contents[key][0], Component):
compDifference = processComponentLists(leftComp.contents[key],
rightList)
if len(compDifference) > 0:
differentComponents[key] = compDifference
elif leftComp.contents[key] != rightList:
differentContentLines.append((leftComp.contents[key],
rightList))
for key in rightChildKeys:
if key not in leftChildKeys:
if isinstance(rightComp.contents[key][0], Component):
differentComponents[key] = ([], rightComp.contents[key])
else:
differentContentLines.append(([], rightComp.contents[key]))
if len(differentContentLines) == 0 and len(differentComponents) == 0:
return None
else:
left = newFromBehavior(leftComp.name)
right = newFromBehavior(leftComp.name)
# add a UID, if one existed, despite the fact that they'll always be
# the same
uid = leftComp.getChildValue('uid')
if uid is not None:
left.add( 'uid').value = uid
right.add('uid').value = uid
for name, childPairList in differentComponents.iteritems():
leftComponents, rightComponents = zip(*childPairList)
if len(leftComponents) > 0:
# filter out None
left.contents[name] = filter(None, leftComponents)
if len(rightComponents) > 0:
# filter out None
right.contents[name] = filter(None, rightComponents)
for leftChildLine, rightChildLine in differentContentLines:
nonEmpty = leftChildLine or rightChildLine
name = nonEmpty[0].name
if leftChildLine is not None:
left.contents[name] = leftChildLine
if rightChildLine is not None:
right.contents[name] = rightChildLine
return left, right
vevents = processComponentLists(sortByUID(getattr(left, 'vevent_list', [])),
sortByUID(getattr(right, 'vevent_list',
[])))
vtodos = processComponentLists(sortByUID(getattr(left, 'vtodo_list', [])),
sortByUID(getattr(right, 'vtodo_list', [])))
return vevents + vtodos
def prettyDiff(leftObj, rightObj):
for left, right in diff(leftObj, rightObj):
print "<<<<<<<<<<<<<<<"
if left is not None:
left.prettyPrint()
print "==============="
if right is not None:
right.prettyPrint()
print ">>>>>>>>>>>>>>>"
print
from optparse import OptionParser
import vobject
import os
import codecs
def main():
#options = getOptions()
options = 'C:\\Documents and Settings\\Jeffrey\\Desktop\\oracle_pain.ics', \
'C:\\Documents and Settings\\Jeffrey\\Desktop\\oracle_pain2.ics'
if options:
ics_file1, ics_file2 = options
cal1 = vobject.readOne(file(ics_file1))
cal2 = vobject.readOne(file(ics_file2))
deleteExtraneous(cal1)
deleteExtraneous(cal2)
prettyDiff(cal1, cal2)
def getOptions():
##### Configuration options #####
usage = "usage: %prog ics_file1 ics_file2"
parser = OptionParser(usage=usage)
parser.set_description("ics_diff will print a comparison of two ics files ")
(cmdline_options, args) = parser.parse_args()
if len(args) < 2:
print "error: too few arguments given"
print
print parser.format_help()
return False
return args
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print "Aborted"_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
Open Source Applications Foundation "chandler-dev" mailing list
http://lists.osafoundation.org/mailman/listinfo/chandler-dev