Update of /cvsroot/freevo/kaa/metadata/src/image
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14035/src/image

Added Files:
        .cvsignore EXIF.py IPTC.py __init__.py bins.py core.py 
        jpginfo.py pilinfo.py pnginfo.py tiffinfo.py 
Log Message:
move current mmpython cvs to kaa.metadata

--- NEW FILE: pilinfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# pilinfo.py - basic image parsing using Imaging (PIL)
# -----------------------------------------------------------------------------
# $Id: pilinfo.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

# python imports
import os

# kaa imports
from kaa.metadata import factory
from kaa.metadata import mediainfo
from core import ImageInfo, PIL

class PILInfo(ImageInfo):
    """
    Simple class getting informations based on PIL
    """
    def __init__(self, file):
        ImageInfo.__init__(self)
        if not os.path.splitext(file.name)[1].lower() in ('.gif', '.bmp'):
            raise mediainfo.KaaMetadataParseError()
        if not PIL:
            raise mediainfo.KaaMetadataParseError()
        self.mime  = ''
        self.type  = ''
        self.add_imaging_information(file.name)
        self.parse_external_files(file.name)

factory.register( 'image/gif', ('gif',), mediainfo.TYPE_IMAGE, PILInfo )
factory.register( 'image/bmp', ('bmp',), mediainfo.TYPE_IMAGE, PILInfo )

--- NEW FILE: .cvsignore ---
*.pyc *.pyo

--- NEW FILE: IPTC.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# IPTC.py
# -----------------------------------------------------------------------------
# $Id: IPTC.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------


# http://www.ap.org/apserver/userguide/codes.htm

from struct import unpack

def flatten(list):
    try:
        for i in list.keys():
            val = list[i]
            if len(val) == 0: list[i] = None
            elif len(val) == 1: list[i] = val[0]
            else: list[i] = tuple(val)
        return list
    except:
        return []


def parseiptc(app):
    iptc = {}
    if app[:14] == "Photoshop 3.0\x00":
       app = app[14:]
    if 1:
       # parse the image resource block
       offset = 0
       data = None
       while app[offset:offset+4] == "8BIM":
          offset = offset + 4
          # resource code
          code = unpack("<H", app[offset:offset+2])[0]
          offset = offset + 2
          # resource name (usually empty)
          name_len = ord(app[offset])
          name = app[offset+1:offset+1+name_len]
          offset = 1 + offset + name_len
          if offset & 1:
              offset = offset + 1
          # resource data block
          size = unpack("<L", app[offset:offset+4])[0]
          offset = offset + 4
          if code == 0x0404:
              # 0x0404 contains IPTC/NAA data
              data = app[offset:offset+size]
              break
          offset = offset + size
          if offset & 1:
              offset = offset + 1
       if not data:
          return None
       offset = 0
       iptc = {}
       while 1:
           try:
               intro = ord(data[offset])
           except IndexError:
               return ''
           if intro != 0x1c:
               return iptc
           (key,len) = unpack('>HH',data[offset+1:offset+5])
           val = data[offset+5:offset+len+5]
           if iptc.has_key(key):
               iptc[key].append(val)
           else:
               iptc[key] = [val]
           offset += len + 5
    return iptc

--- NEW FILE: pnginfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# pnginfo.py - png file parsing
# -----------------------------------------------------------------------------
# $Id: pnginfo.py,v 1.1 2005/07/02 16:33:12 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

# python imports
import struct
import zlib
import logging

# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory

# image imports
import IPTC
import EXIF
import core

# get logging object
log = logging.getLogger('metadata')

# interesting file format info:
# http://www.libpng.org/pub/png/png-sitemap.html#programming
# http://pmt.sourceforge.net/pngmeta/

PNGSIGNATURE = "\211PNG\r\n\032\n"


class PNGInfo(core.ImageInfo):

    def __init__(self,file):
        core.ImageInfo.__init__(self)
        self.iptc = None
        self.mime = 'image/png'
        self.type = 'PNG image'

        signature = file.read(8)
        if ( signature != PNGSIGNATURE ):
            raise mediainfo.KaaMetadataParseError()

        self.meta = {}
        while self._readChunk(file):
            pass
        if len(self.meta.keys()):
            self.appendtable( 'PNGMETA', self.meta )
        for key, value in self.meta.items():
            if key.startswith('Thumb:') or key == 'Software':
                setattr(self, key, value)
                if not key in self.keys:
                    self.keys.append(key)

        # core stuff
        self.add_imaging_information(file.name)
        if core.PIL:
            self.parse_external_files(file.name)


    def _readChunk(self,file):
        try:
            (length, type) = struct.unpack('>I4s', file.read(8))
        except:
            return 0
        if ( type == 'tEXt' ):
          log.debug('latin-1 Text found.')
          (data, crc) = struct.unpack('>%isI' % length,file.read(length+4))
          (key, value) = data.split('\0')
          self.meta[key] = value

        elif ( type == 'zTXt' ):
          log.debug('Compressed Text found.')
          (data,crc) = struct.unpack('>%isI' % length,file.read(length+4))
          split = data.split('\0')
          key = split[0]
          value = "".join(split[1:])
          compression = ord(value[0])
          value = value[1:]
          if compression == 0:
              decompressed = zlib.decompress(value)
              log.debug("%s (Compressed %i) -> %s" % \
                        (key,compression,decompressed))
          else:
              log.debug("%s has unknown Compression %c" % (key,compression))
          self.meta[key] = value

        elif ( type == 'iTXt' ):
          log.debug('International Text found.')
          (data,crc) = struct.unpack('>%isI' % length,file.read(length+4))
          (key, value) = data.split('\0')
          self.meta[key] = value

        else:
          file.seek(length+4,1)
          log.debug("%s of length %d ignored." % (type, length))
        return 1


factory.register( 'image/png', ('png',), mediainfo.TYPE_IMAGE, PNGInfo )

--- NEW FILE: jpginfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# jpginfo.py - jpg file parsing
# -----------------------------------------------------------------------------
# $Id: jpginfo.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

# python imports
import struct
import logging

# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory

# image imports
import IPTC
import EXIF
import core

# get logging object
log = logging.getLogger('metadata')

# interesting file format info:
# http://www.dcs.ed.ac.uk/home/mxr/gfx/2d-hi.html
# http://www.funducode.com/freec/Fileformats/format3/format3b.htm

SOF = { 0xC0 : "Baseline",
        0xC1 : "Extended sequential",
        0xC2 : "Progressive",
        0xC3 : "Lossless",
        0xC5 : "Differential sequential",
        0xC6 : "Differential progressive",
        0xC7 : "Differential lossless",
        0xC9 : "Extended sequential, arithmetic coding",
        0xCA : "Progressive, arithmetic coding",
        0xCB : "Lossless, arithmetic coding",
        0xCD : "Differential sequential, arithmetic coding",
        0xCE : "Differential progressive, arithmetic coding",
        0xCF : "Differential lossless, arithmetic coding",
}


class JPGInfo(core.ImageInfo):

    def __init__(self,file):
        core.ImageInfo.__init__(self)
        iptc_info = None
        self.mime = 'image/jpeg'
        self.type = 'jpeg image'

        if file.read(2) != '\xff\xd8':
            raise mediainfo.KaaMetadataParseError()

        file.seek(-2,2)
        if file.read(2) != '\xff\xd9':
            # Normally an JPEG should end in ffd9. This does not however
            # we assume it's an jpeg for now
            log.info("Wrong encode found for jpeg")

        file.seek(2)
        app = file.read(4)
        self.meta = {}

        while (len(app) == 4):
            (ff,segtype,seglen) = struct.unpack(">BBH", app)
            if ff != 0xff: break
            log.debug("SEGMENT: 0x%x%x, len=%d" % (ff,segtype,seglen))
            if segtype == 0xd9:
                break
            elif SOF.has_key(segtype):
                data = file.read(seglen-2)
                (precision,self.height,self.width,\
                 num_comp) = struct.unpack('>BHHB', data[:6])
            elif segtype == 0xed:
                app = file.read(seglen-2)
                iptc_info = IPTC.flatten(IPTC.parseiptc(app))
                break
            elif segtype == 0xe7:
                # information created by libs like epeg
                data = file.read(seglen-2)
                if data.count('\n') == 1:
                    key, value = data.split('\n')
                    self.meta[key] = value
            else:
                file.seek(seglen-2,1)
            app = file.read(4)
        file.seek(0)
        exif_info = EXIF.process_file(file)

        if exif_info:
            self.setitem( 'date', exif_info, 'Image DateTime', True )
            self.setitem( 'artist', exif_info, 'Image Artist', True )
            self.setitem( 'hardware', exif_info, 'Image Model', True )
            self.setitem( 'software', exif_info, 'Image Software', True )
            self.setitem( 'thumbnail', exif_info, 'JPEGThumbnail', True )
            self.appendtable( 'EXIF', exif_info )

        if iptc_info:
            self.setitem( 'title', iptc_info, 517, True )
            self.setitem( 'date' , iptc_info, 567, True )
            self.setitem( 'comment', iptc_info, 617, True )
            self.setitem( 'keywords', iptc_info, 537, True )
            self.setitem( 'artist', iptc_info, 592, True )
            self.setitem( 'country', iptc_info, 612, True )
            self.setitem( 'caption', iptc_info, 632, True )
            self.appendtable( 'IPTC', iptc_info )

        if len(self.meta.keys()):
            self.appendtable( 'JPGMETA', self.meta )

        for key, value in self.meta.items():
            if key.startswith('Thumb:') or key == 'Software':
                setattr(self, key, value)
                if not key in self.keys:
                    self.keys.append(key)

        # core stuff
        self.add_imaging_information(file.name)
        if core.PIL:
            self.parse_external_files(file.name)


factory.register( 'image/jpeg', ('jpg','jpeg'), mediainfo.TYPE_IMAGE,
                       JPGInfo )

--- NEW FILE: core.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# core.py - basic image parsing using Imaging
# -----------------------------------------------------------------------------
# $Id: core.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

# python imports
import os
import gzip
import logging
from xml.utils import qp_xml

# kaa imports
from kaa.metadata import factory
from kaa.metadata import mediainfo
import bins

# get logging object
log = logging.getLogger('metadata')

try:
    import Image as PIL
except:
    log.info('Python Imaging not found')
    PIL = None


# attributes for image files
ATTRIBUTES = ['description', 'people', 'location', 'event', 'width', 'height',
              'thumbnail','software','hardware', 'dpi']


class ImageInfo(mediainfo.MediaInfo):
    """
    Digital Images, Photos, Pictures.
    """
    def __init__(self):
        mediainfo.MediaInfo.__init__(self)
        for k in ATTRIBUTES:
            setattr(self,k,None)
            self.keys.append(k)

    def parse_external_files(self, filename):
        """
        Parse external files like bins and .comments.
        """
        if os.path.isfile(filename + '.xml'):
            try:
                binsinfo = bins.get_bins_desc(filename)
                # get needed keys from exif infos
                for key in ATTRIBUTES + mediainfo.MEDIACORE:
                    if not self[key] and binsinfo['exif'].has_key(key):
                        self[key] = binsinfo['exif'][key]
                # get _all_ infos from description
                for key in binsinfo['desc']:
                    self[key] = binsinfo['desc'][key]
                    if not key in ATTRIBUTES + mediainfo.MEDIACORE:
                        # if it's in desc it must be important
                        self.keys.append(key)
            except Exception, e:
                log.exception('problem reading the image information')
                pass

        comment_file = os.path.join(os.path.dirname(filename), '.comments',
                                    os.path.basename(filename) + '.xml')
        if os.path.isfile(comment_file):
            try:
                f = gzip.open(comment_file)
                p = qp_xml.Parser()
                tree = p.parse(f)
                f.close()
                for c in tree.children:
                    if c.name == 'Place':
                        self.location = c.textof()
                    if c.name == 'Note':
                        self.description = c.textof()
            except:
                pass


    def add_imaging_information(self, filename):
        """
        Add informations based on imaging (PIL)
        """
        if not PIL:
            return
        try:
            i = PIL.open(filename)
        except:
            raise mediainfo.KaaMetadataParseError()

        if not self.mime:
            self.mime = 'image/%s' % i.format.lower()

        self.type = i.format_description

        if i.info.has_key('dpi'):
            self['dpi'] = '%sx%s' % i.info['dpi']

        for info in i.info:
            if not info == 'exif':
                log.debug('%s: %s' % (info, i.info[info]))

        self.mode = i.mode
        if not self.height:
            self.width, self.height = i.size

--- NEW FILE: EXIF.py ---
# Library to extract EXIF information in digital camera image files
#
# To use this library call with:
#    f=open(path_name, 'rb')
#    tags=EXIF.process_file(f)
# tags will now be a dictionary mapping names of EXIF tags to their
# values in the file named by path_name.  You can process the tags
# as you wish.  In particular, you can iterate through all the tags with:
#     for tag in tags.keys():
#         if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename',
#                        'EXIF MakerNote'):
#             print "Key: %s, value %s" % (tag, tags[tag])
# (This code uses the if statement to avoid printing out a few of the
# tags that tend to be long or boring.)
#
# The tags dictionary will include keys for all of the usual EXIF
# tags, and will also include keys for Makernotes used by some
# cameras, for which we have a good specification.
#
[...1155 lines suppressed...]
        print filename+':'
        # data=process_file(file, 1) # with debug info
        data=process_file(file)
        if not data:
            print 'No EXIF information found'
            continue

        x=data.keys()
        x.sort()
        for i in x:
            if i in ('JPEGThumbnail', 'TIFFThumbnail'):
                continue
            try:
                print '   %s (%s): %s' % \
                      (i, FIELD_TYPES[data[i].field_type][2], data[i].printable)
            except:
                print 'error', i, '"', data[i], '"'
        if data.has_key('JPEGThumbnail'):
            print 'File has JPEG thumbnail'
        print

--- NEW FILE: __init__.py ---

--- NEW FILE: tiffinfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# tiffinfo.py - tiff file parsing
# -----------------------------------------------------------------------------
# $Id: tiffinfo.py,v 1.1 2005/07/02 16:33:12 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------

# python imports
import struct
import zlib
import logging

# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory

# image imports
import IPTC
import EXIF
import core

# get logging object
log = logging.getLogger('metadata')


MOTOROLASIGNATURE = 'MM\x00\x2a'
INTELSIGNATURE = 'II\x2a\x00'

# http://partners.adobe.com/asn/developer/pdfs/tn/TIFF6.pdf

class TIFFInfo(core.ImageInfo):

    def __init__(self,file):
        core.ImageInfo.__init__(self)
        self.iptc = None
        self.mime = 'image/tiff'
        self.type = 'TIFF image'
        self.intel = 0
        iptc = {}
        header = file.read(8)

        if header[:4] == MOTOROLASIGNATURE:
            self.intel = 0
            (offset,) = struct.unpack(">I", header[4:8])
            file.seek(offset)
            (len,) = struct.unpack(">H", file.read(2))
            app = file.read(len*12)
            for i in range(len):
                (tag, type, length, value, offset) = \
                      struct.unpack('>HHIHH', app[i*12:i*12+12])
                if tag == 0x8649:
                    file.seek(offset,0)
                    iptc = IPTC.flatten(IPTC.parseiptc(file.read(1000)))
                elif tag == 0x0100:
                    if value != 0:
                        self.width = value
                    else:
                        self.width = offset
                elif tag == 0x0101:
                    if value != 0:
                        self.height = value
                    else:
                        self.height = offset

        elif header[:4] == INTELSIGNATURE:
            self.intel = 1
            (offset,) = struct.unpack("<I", header[4:8])
            file.seek(offset,0)
            (len,) = struct.unpack("<H", file.read(2))
            app = file.read(len*12)
            for i in range(len):
                (tag, type, length, offset, value) = \
                      struct.unpack('<HHIHH', app[i*12:i*12+12])
                if tag == 0x8649:
                    file.seek(offset)
                    iptc = IPTC.flatten(IPTC.parseiptc(file.read(1000)))
                elif tag == 0x0100:
                    if value != 0:
                        self.width = value
                    else:
                        self.width = offset
                elif tag == 0x0101:
                    if value != 0:
                        self.height = value
                    else:
                        self.height = offset
        else:
            raise mediainfo.KaaMetadataParseError()

        if iptc:
            self.setitem( 'title', iptc, 517 )
            self.setitem( 'date' , iptc, 567 )
            self.setitem( 'comment', iptc, 617 )
            self.setitem( 'keywords', iptc, 537 )
            self.setitem( 'artist', iptc, 592 )
            self.setitem( 'country', iptc, 612 )
            self.setitem( 'caption', iptc, 632 )
            self.appendtable('IPTC', iptc)

        # core stuff
        self.add_imaging_information(file.name)
        if core.PIL:
            self.parse_external_files(file.name)
        return


factory.register( 'image/tiff', ('tif','tiff'), mediainfo.TYPE_IMAGE,
                       TIFFInfo )

--- NEW FILE: bins.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# bins.py - bins xml parser
# -----------------------------------------------------------------------------
# $Id: bins.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: John Cooper <[EMAIL PROTECTED]>
# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------


from xml.sax import make_parser, ContentHandler
from xml.sax.handler import feature_namespaces
import string
import os
import re


def norm_whitespace(text):
    # Remove Redundant whitespace from a string
    return ' '.join(text.split())

RE_TEXT = re.compile("^[ \n\t]*(.*[^ \n\t])[ \n\t]*$").match

# remove redundant whitespaces/tabs/newlines at the beginning and the end
def norm_text(text):
    m = RE_TEXT(text)
    if m:
        return m.group(1)
    return text


def format_text(text):
    while len(text) and text[0] in (' ', '\t', '\n'):
        text = text[1:]
    text = re.sub('\n[\t *]', ' ', text)
    while len(text) and text[-1] in (' ', '\t', '\n'):
        text = text[:-1]
    return text


class BinsDiscription(ContentHandler):
    """
    This is a handler for getting the information from a bins Album.
    """
    def __init__(self):
        self.desc = {}
        self.exif = {}
        self.inDisc = 0
        self.inField = 0
        self.inExif = 0
        self.inTag = 0

    def startElement(self,name,attrs):
        # Check that we  have a discription section
        if name == u'description':
            self.inDisc = 1
        if name == u'field':
            self.thisField = norm_whitespace(attrs.get('name', ''))
            self.inField = 1
            self.desc[self.thisField] = ''
        if name == u'exif':
            self.inExif = 1
        if name == u'tag':
            self.inTag = 1
            self.thisTag = norm_whitespace(attrs.get('name', ''))
            self.exif[self.thisTag] = ''


    def characters(self,ch):
        if self.inDisc:
            if self.inField:
                self.desc[self.thisField] = self.desc[self.thisField] + ch
        if self.inExif:
            if self.inTag:
                self.exif[self.thisTag] = self.exif[self.thisTag] + ch


    def endElement(self,name):
        if name == 'discription':
            self.inDisc = 0
        if name == 'field':
            self.desc[self.thisField] = norm_text(self.desc[self.thisField])
            self.inField = 0
        if name == 'exif':
            try:
                self.exif[self.thisTag] = norm_text(self.exif[self.thisTag])
            except:
                pass
            self.inExif = 0

        if name == 'tag':
            self.inTag = 0

def get_bins_desc(binsname):
     parser = make_parser()
     parser.setFeature(feature_namespaces,0)
     dh = BinsDiscription()
     parser.setContentHandler(dh)
     # check that the xml file exists for a dir or image
     if os.path.isfile(binsname + '/album.xml'):
         binsname = binsname + '/album.xml'
     elif os.path.isfile(binsname + '.xml'):
         binsname = binsname + '.xml'
     else:
         dh.desc['title'] == os.path.basename(dirname)

     # Check that there is a title
     parser.parse(binsname)

     # remove whitespace at the beginning
     for d in dh.desc:
         dh.desc[d] = format_text(dh.desc[d])
     for d in dh.exif:
         dh.exif[d] = format_text(dh.exif[d])

     return {'desc':dh.desc , 'exif':dh.exif}


if __name__ == '__main__':
    parser = make_parser()
    parser.setFeature(feature_namespaces,0)
    dh = GetAlbum()
    parser.setContentHandler(dh)
    parser.parse('album.xml')
    print dh.desc



-------------------------------------------------------
SF.Net email is sponsored by: Discover Easy Linux Migration Strategies
from IBM. Find simple to follow Roadmaps, straightforward articles,
informative Webcasts and more! Get everything you need to get up to
speed, fast. http://ads.osdn.com/?ad_id=7477&alloc_id=16492&op=click
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to