Hi Folks,

Dan Mosedale over at Mozilla asked if I had anything that could diff ics
files.  Last week I decided to whip together a script to compare ics
files, ignoring details like order that mess up normal diff tools.

The tool turned out to be useful to me today when I was tweaking the way
iCalendar data gets saved and re-serialized (although I realized I had a
bug which I just fixed).  I'll probably add this script to the vobject
package one day, but for now, I thought other people might eventually
have a use for it, so it's attached.

This script:

- ignores timezones, i.e. it doesn't diff VTIMEZONEs, and it ignores
differences between 2PM PST and 5PM EST, and in fact now that I think
about it if you happen to try to diff an event with floating time and an
identical event in a timezone, it'll barf, because Python's timezone
support is sort of broken

- Pays attention to VEVENTs and VTODOs, not VJOURNALs or random crap at
the start of the ics file (X-CALENDAR-NAME and the like), although those
would be reasonably easy to add, it just didn't seem that useful at
first blush

- treats a UID/RECURRENCE-ID/SEQUENCE triple as being a unique event, so
if you have two events who only differ in, say, their sequence, it'll
just see them as two totally different events

- considers the order of parameters and content lines WITHIN a component
to be significant.  This didn't seem like a big deal to me.

- I output only the bits that are different between similar
vevents/vtodos, with the exception that I go ahead and always output a
UID (maybe I should output SEQUENCE and RECURRENCE-ID, too) so you have
some hope of finding which event was different.

- The output is a pretty-print, not the original ics.

Sincerely,
Jeffrey
#   Copyright (c) 2006 Open Source Applications Foundation
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

from vobject.base import Component, getBehavior, newFromBehavior

def getSortKey(component):
    def getUID(component):
        return component.getChildValue('uid', '')
    
    # it's not quite as simple as getUID, need to account for recurrenceID and 
    # sequence

    def getSequence(component):
        sequence = component.getChildValue('sequence', 0)
        return "%05d" % int(sequence)
    
    def getRecurrenceID(component):
        recurrence_id = component.getChildValue('recurrence_id', None)
        if recurrence_id is None:
            return '0000-00-00'
        else:
            return recurrence_id.isoformat()
    
    return getUID(component) + getSequence(component) + 
getRecurrenceID(component)

def sortByUID(components):
    return sorted(components, key=getSortKey)    

def deleteExtraneous(component):
    """
    Recursively walk the component's children, deleting extraneous details like
    X-VOBJ-ORIGINAL-TZID.
    """
    for comp in component.components():
        deleteExtraneous(comp)
    for line in component.lines():
        if line.params.has_key('X-VOBJ-ORIGINAL-TZID'):
            del line.params['X-VOBJ-ORIGINAL-TZID']
    

def diff(left, right):
    """
    Take two VCALENDAR components, compare VEVENTs and VTODOs in them,
    return a list of object pairs containing just UID and the bits
    that didn't match, using None for objects that weren't present in one 
    version or the other.
    
    When there are multiple ContentLines in one VEVENT, for instance many
    DESCRIPTION lines, such lines original order is assumed to be 
    meaningful.  Order is also preserved when comparing (the unlikely case
    of) multiple parameters of the same type in a ContentLine
    
    """                
    
    def processComponentLists(leftList, rightList):
        output = []
        rightIndex = 0
        rightListSize = len(rightList)
        
        for comp in leftList:
            if rightIndex >= rightListSize:
                output.append((comp, None))
            else:
                leftKey  = getSortKey(comp)
                rightComp = rightList[rightIndex]
                rightKey = getSortKey(rightComp)
                while leftKey > rightKey:
                    output.append((None, rightComp))
                    rightIndex += 1
                    if rightIndex >= rightListSize:
                        output.append((comp, None))                    
                        break
                    else:
                        rightComp = rightList[rightIndex]
                        rightKey = getSortKey(rightComp)
                
                if leftKey < rightKey:
                    output.append((comp, None))
                elif leftKey == rightKey:
                    rightIndex += 1
                    matchResult = processComponentPair(comp, rightComp)
                    if matchResult is not None:
                        output.append(matchResult)
        
        return output

    def newComponent(name, body):
        if body is None:
            return None
        else:
            c = Component(name)
            c.behavior = getBehavior(name)
            c.isNative = True
            return c

    def processComponentPair(leftComp, rightComp):
        """
        Return None if a match, or a pair of components including UIDs and
        any differing children.
        
        """        
        leftChildKeys = leftComp.contents.keys()
        rightChildKeys = rightComp.contents.keys()
        
        differentContentLines = []
        differentComponents = {}
        
        for key in leftChildKeys:
            rightList = rightComp.contents.get(key, [])
            if isinstance(leftComp.contents[key][0], Component):
                compDifference = processComponentLists(leftComp.contents[key],
                                                       rightList)
                if len(compDifference) > 0:
                    differentComponents[key] = compDifference
                    
            elif leftComp.contents[key] != rightList:
                differentContentLines.append((leftComp.contents[key],
                                              rightList))
                
        for key in rightChildKeys:
            if key not in leftChildKeys:
                if isinstance(rightComp.contents[key][0], Component):
                    differentComponents[key] = ([], rightComp.contents[key])
                else:
                    differentContentLines.append(([], rightComp.contents[key]))
        
        if len(differentContentLines) == 0 and len(differentComponents) == 0:
            return None
        else:
            left  = newFromBehavior(leftComp.name)
            right = newFromBehavior(leftComp.name)
            # add a UID, if one existed, despite the fact that they'll always be
            # the same
            uid = leftComp.getChildValue('uid')
            if uid is not None:
                left.add( 'uid').value = uid
                right.add('uid').value = uid
                
            for name, childPairList in differentComponents.iteritems():
                leftComponents, rightComponents = zip(*childPairList)
                if len(leftComponents) > 0:
                    # filter out None
                    left.contents[name] = filter(None, leftComponents)
                if len(rightComponents) > 0:
                    # filter out None
                    right.contents[name] = filter(None, rightComponents)
            
            for leftChildLine, rightChildLine in differentContentLines:
                nonEmpty = leftChildLine or rightChildLine
                name = nonEmpty[0].name
                if leftChildLine is not None:
                    left.contents[name] = leftChildLine
                if rightChildLine is not None:
                    right.contents[name] = rightChildLine
            
            return left, right


    vevents = processComponentLists(sortByUID(getattr(left, 'vevent_list', [])),
                                    sortByUID(getattr(right, 'vevent_list', 
[])))
    
    vtodos = processComponentLists(sortByUID(getattr(left, 'vtodo_list', [])),
                                   sortByUID(getattr(right, 'vtodo_list', [])))
    
    return vevents + vtodos

def prettyDiff(leftObj, rightObj):
    for left, right in diff(leftObj, rightObj):
        print "<<<<<<<<<<<<<<<"
        if left is not None:
            left.prettyPrint()
        print "==============="
        if right is not None:
            right.prettyPrint()
        print ">>>>>>>>>>>>>>>"
        print
        
        
from optparse import OptionParser
import vobject
import os
import codecs

def main():
    #options = getOptions()
    options = 'C:\\Documents and Settings\\Jeffrey\\Desktop\\oracle_pain.ics', \
              'C:\\Documents and Settings\\Jeffrey\\Desktop\\oracle_pain2.ics'
    if options:
        ics_file1, ics_file2 = options
        cal1 = vobject.readOne(file(ics_file1))
        cal2 = vobject.readOne(file(ics_file2))
        deleteExtraneous(cal1)
        deleteExtraneous(cal2)
        prettyDiff(cal1, cal2)

def getOptions():
    ##### Configuration options #####

    usage = "usage: %prog ics_file1 ics_file2"
    parser = OptionParser(usage=usage)
    parser.set_description("ics_diff will print a comparison of two ics files ")

    (cmdline_options, args) = parser.parse_args()
    if len(args) < 2:
        print "error: too few arguments given"
        print
        print parser.format_help()
        return False

    return args

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print "Aborted"
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

Open Source Applications Foundation "chandler-dev" mailing list
http://lists.osafoundation.org/mailman/listinfo/chandler-dev

Reply via email to