Update of /cvsroot/freevo/kaa/metadata/src/image
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14035/src/image
Added Files:
.cvsignore EXIF.py IPTC.py __init__.py bins.py core.py
jpginfo.py pilinfo.py pnginfo.py tiffinfo.py
Log Message:
move current mmpython cvs to kaa.metadata
--- NEW FILE: pilinfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# pilinfo.py - basic image parsing using Imaging (PIL)
# -----------------------------------------------------------------------------
# $Id: pilinfo.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# python imports
import os
# kaa imports
from kaa.metadata import factory
from kaa.metadata import mediainfo
from core import ImageInfo, PIL
class PILInfo(ImageInfo):
"""
Simple class getting informations based on PIL
"""
def __init__(self, file):
ImageInfo.__init__(self)
if not os.path.splitext(file.name)[1].lower() in ('.gif', '.bmp'):
raise mediainfo.KaaMetadataParseError()
if not PIL:
raise mediainfo.KaaMetadataParseError()
self.mime = ''
self.type = ''
self.add_imaging_information(file.name)
self.parse_external_files(file.name)
factory.register( 'image/gif', ('gif',), mediainfo.TYPE_IMAGE, PILInfo )
factory.register( 'image/bmp', ('bmp',), mediainfo.TYPE_IMAGE, PILInfo )
--- NEW FILE: .cvsignore ---
*.pyc *.pyo
--- NEW FILE: IPTC.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# IPTC.py
# -----------------------------------------------------------------------------
# $Id: IPTC.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# http://www.ap.org/apserver/userguide/codes.htm
from struct import unpack
def flatten(list):
try:
for i in list.keys():
val = list[i]
if len(val) == 0: list[i] = None
elif len(val) == 1: list[i] = val[0]
else: list[i] = tuple(val)
return list
except:
return []
def parseiptc(app):
iptc = {}
if app[:14] == "Photoshop 3.0\x00":
app = app[14:]
if 1:
# parse the image resource block
offset = 0
data = None
while app[offset:offset+4] == "8BIM":
offset = offset + 4
# resource code
code = unpack("<H", app[offset:offset+2])[0]
offset = offset + 2
# resource name (usually empty)
name_len = ord(app[offset])
name = app[offset+1:offset+1+name_len]
offset = 1 + offset + name_len
if offset & 1:
offset = offset + 1
# resource data block
size = unpack("<L", app[offset:offset+4])[0]
offset = offset + 4
if code == 0x0404:
# 0x0404 contains IPTC/NAA data
data = app[offset:offset+size]
break
offset = offset + size
if offset & 1:
offset = offset + 1
if not data:
return None
offset = 0
iptc = {}
while 1:
try:
intro = ord(data[offset])
except IndexError:
return ''
if intro != 0x1c:
return iptc
(key,len) = unpack('>HH',data[offset+1:offset+5])
val = data[offset+5:offset+len+5]
if iptc.has_key(key):
iptc[key].append(val)
else:
iptc[key] = [val]
offset += len + 5
return iptc
--- NEW FILE: pnginfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# pnginfo.py - png file parsing
# -----------------------------------------------------------------------------
# $Id: pnginfo.py,v 1.1 2005/07/02 16:33:12 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# python imports
import struct
import zlib
import logging
# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory
# image imports
import IPTC
import EXIF
import core
# get logging object
log = logging.getLogger('metadata')
# interesting file format info:
# http://www.libpng.org/pub/png/png-sitemap.html#programming
# http://pmt.sourceforge.net/pngmeta/
PNGSIGNATURE = "\211PNG\r\n\032\n"
class PNGInfo(core.ImageInfo):
def __init__(self,file):
core.ImageInfo.__init__(self)
self.iptc = None
self.mime = 'image/png'
self.type = 'PNG image'
signature = file.read(8)
if ( signature != PNGSIGNATURE ):
raise mediainfo.KaaMetadataParseError()
self.meta = {}
while self._readChunk(file):
pass
if len(self.meta.keys()):
self.appendtable( 'PNGMETA', self.meta )
for key, value in self.meta.items():
if key.startswith('Thumb:') or key == 'Software':
setattr(self, key, value)
if not key in self.keys:
self.keys.append(key)
# core stuff
self.add_imaging_information(file.name)
if core.PIL:
self.parse_external_files(file.name)
def _readChunk(self,file):
try:
(length, type) = struct.unpack('>I4s', file.read(8))
except:
return 0
if ( type == 'tEXt' ):
log.debug('latin-1 Text found.')
(data, crc) = struct.unpack('>%isI' % length,file.read(length+4))
(key, value) = data.split('\0')
self.meta[key] = value
elif ( type == 'zTXt' ):
log.debug('Compressed Text found.')
(data,crc) = struct.unpack('>%isI' % length,file.read(length+4))
split = data.split('\0')
key = split[0]
value = "".join(split[1:])
compression = ord(value[0])
value = value[1:]
if compression == 0:
decompressed = zlib.decompress(value)
log.debug("%s (Compressed %i) -> %s" % \
(key,compression,decompressed))
else:
log.debug("%s has unknown Compression %c" % (key,compression))
self.meta[key] = value
elif ( type == 'iTXt' ):
log.debug('International Text found.')
(data,crc) = struct.unpack('>%isI' % length,file.read(length+4))
(key, value) = data.split('\0')
self.meta[key] = value
else:
file.seek(length+4,1)
log.debug("%s of length %d ignored." % (type, length))
return 1
factory.register( 'image/png', ('png',), mediainfo.TYPE_IMAGE, PNGInfo )
--- NEW FILE: jpginfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# jpginfo.py - jpg file parsing
# -----------------------------------------------------------------------------
# $Id: jpginfo.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# python imports
import struct
import logging
# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory
# image imports
import IPTC
import EXIF
import core
# get logging object
log = logging.getLogger('metadata')
# interesting file format info:
# http://www.dcs.ed.ac.uk/home/mxr/gfx/2d-hi.html
# http://www.funducode.com/freec/Fileformats/format3/format3b.htm
SOF = { 0xC0 : "Baseline",
0xC1 : "Extended sequential",
0xC2 : "Progressive",
0xC3 : "Lossless",
0xC5 : "Differential sequential",
0xC6 : "Differential progressive",
0xC7 : "Differential lossless",
0xC9 : "Extended sequential, arithmetic coding",
0xCA : "Progressive, arithmetic coding",
0xCB : "Lossless, arithmetic coding",
0xCD : "Differential sequential, arithmetic coding",
0xCE : "Differential progressive, arithmetic coding",
0xCF : "Differential lossless, arithmetic coding",
}
class JPGInfo(core.ImageInfo):
def __init__(self,file):
core.ImageInfo.__init__(self)
iptc_info = None
self.mime = 'image/jpeg'
self.type = 'jpeg image'
if file.read(2) != '\xff\xd8':
raise mediainfo.KaaMetadataParseError()
file.seek(-2,2)
if file.read(2) != '\xff\xd9':
# Normally an JPEG should end in ffd9. This does not however
# we assume it's an jpeg for now
log.info("Wrong encode found for jpeg")
file.seek(2)
app = file.read(4)
self.meta = {}
while (len(app) == 4):
(ff,segtype,seglen) = struct.unpack(">BBH", app)
if ff != 0xff: break
log.debug("SEGMENT: 0x%x%x, len=%d" % (ff,segtype,seglen))
if segtype == 0xd9:
break
elif SOF.has_key(segtype):
data = file.read(seglen-2)
(precision,self.height,self.width,\
num_comp) = struct.unpack('>BHHB', data[:6])
elif segtype == 0xed:
app = file.read(seglen-2)
iptc_info = IPTC.flatten(IPTC.parseiptc(app))
break
elif segtype == 0xe7:
# information created by libs like epeg
data = file.read(seglen-2)
if data.count('\n') == 1:
key, value = data.split('\n')
self.meta[key] = value
else:
file.seek(seglen-2,1)
app = file.read(4)
file.seek(0)
exif_info = EXIF.process_file(file)
if exif_info:
self.setitem( 'date', exif_info, 'Image DateTime', True )
self.setitem( 'artist', exif_info, 'Image Artist', True )
self.setitem( 'hardware', exif_info, 'Image Model', True )
self.setitem( 'software', exif_info, 'Image Software', True )
self.setitem( 'thumbnail', exif_info, 'JPEGThumbnail', True )
self.appendtable( 'EXIF', exif_info )
if iptc_info:
self.setitem( 'title', iptc_info, 517, True )
self.setitem( 'date' , iptc_info, 567, True )
self.setitem( 'comment', iptc_info, 617, True )
self.setitem( 'keywords', iptc_info, 537, True )
self.setitem( 'artist', iptc_info, 592, True )
self.setitem( 'country', iptc_info, 612, True )
self.setitem( 'caption', iptc_info, 632, True )
self.appendtable( 'IPTC', iptc_info )
if len(self.meta.keys()):
self.appendtable( 'JPGMETA', self.meta )
for key, value in self.meta.items():
if key.startswith('Thumb:') or key == 'Software':
setattr(self, key, value)
if not key in self.keys:
self.keys.append(key)
# core stuff
self.add_imaging_information(file.name)
if core.PIL:
self.parse_external_files(file.name)
factory.register( 'image/jpeg', ('jpg','jpeg'), mediainfo.TYPE_IMAGE,
JPGInfo )
--- NEW FILE: core.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# core.py - basic image parsing using Imaging
# -----------------------------------------------------------------------------
# $Id: core.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# python imports
import os
import gzip
import logging
from xml.utils import qp_xml
# kaa imports
from kaa.metadata import factory
from kaa.metadata import mediainfo
import bins
# get logging object
log = logging.getLogger('metadata')
try:
import Image as PIL
except:
log.info('Python Imaging not found')
PIL = None
# attributes for image files
ATTRIBUTES = ['description', 'people', 'location', 'event', 'width', 'height',
'thumbnail','software','hardware', 'dpi']
class ImageInfo(mediainfo.MediaInfo):
"""
Digital Images, Photos, Pictures.
"""
def __init__(self):
mediainfo.MediaInfo.__init__(self)
for k in ATTRIBUTES:
setattr(self,k,None)
self.keys.append(k)
def parse_external_files(self, filename):
"""
Parse external files like bins and .comments.
"""
if os.path.isfile(filename + '.xml'):
try:
binsinfo = bins.get_bins_desc(filename)
# get needed keys from exif infos
for key in ATTRIBUTES + mediainfo.MEDIACORE:
if not self[key] and binsinfo['exif'].has_key(key):
self[key] = binsinfo['exif'][key]
# get _all_ infos from description
for key in binsinfo['desc']:
self[key] = binsinfo['desc'][key]
if not key in ATTRIBUTES + mediainfo.MEDIACORE:
# if it's in desc it must be important
self.keys.append(key)
except Exception, e:
log.exception('problem reading the image information')
pass
comment_file = os.path.join(os.path.dirname(filename), '.comments',
os.path.basename(filename) + '.xml')
if os.path.isfile(comment_file):
try:
f = gzip.open(comment_file)
p = qp_xml.Parser()
tree = p.parse(f)
f.close()
for c in tree.children:
if c.name == 'Place':
self.location = c.textof()
if c.name == 'Note':
self.description = c.textof()
except:
pass
def add_imaging_information(self, filename):
"""
Add informations based on imaging (PIL)
"""
if not PIL:
return
try:
i = PIL.open(filename)
except:
raise mediainfo.KaaMetadataParseError()
if not self.mime:
self.mime = 'image/%s' % i.format.lower()
self.type = i.format_description
if i.info.has_key('dpi'):
self['dpi'] = '%sx%s' % i.info['dpi']
for info in i.info:
if not info == 'exif':
log.debug('%s: %s' % (info, i.info[info]))
self.mode = i.mode
if not self.height:
self.width, self.height = i.size
--- NEW FILE: EXIF.py ---
# Library to extract EXIF information in digital camera image files
#
# To use this library call with:
# f=open(path_name, 'rb')
# tags=EXIF.process_file(f)
# tags will now be a dictionary mapping names of EXIF tags to their
# values in the file named by path_name. You can process the tags
# as you wish. In particular, you can iterate through all the tags with:
# for tag in tags.keys():
# if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'Filename',
# 'EXIF MakerNote'):
# print "Key: %s, value %s" % (tag, tags[tag])
# (This code uses the if statement to avoid printing out a few of the
# tags that tend to be long or boring.)
#
# The tags dictionary will include keys for all of the usual EXIF
# tags, and will also include keys for Makernotes used by some
# cameras, for which we have a good specification.
#
[...1155 lines suppressed...]
print filename+':'
# data=process_file(file, 1) # with debug info
data=process_file(file)
if not data:
print 'No EXIF information found'
continue
x=data.keys()
x.sort()
for i in x:
if i in ('JPEGThumbnail', 'TIFFThumbnail'):
continue
try:
print ' %s (%s): %s' % \
(i, FIELD_TYPES[data[i].field_type][2], data[i].printable)
except:
print 'error', i, '"', data[i], '"'
if data.has_key('JPEGThumbnail'):
print 'File has JPEG thumbnail'
print
--- NEW FILE: __init__.py ---
--- NEW FILE: tiffinfo.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# tiffinfo.py - tiff file parsing
# -----------------------------------------------------------------------------
# $Id: tiffinfo.py,v 1.1 2005/07/02 16:33:12 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: Thomas Schueppel <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
# python imports
import struct
import zlib
import logging
# kaa imports
from kaa.metadata import mediainfo
from kaa.metadata import factory
# image imports
import IPTC
import EXIF
import core
# get logging object
log = logging.getLogger('metadata')
MOTOROLASIGNATURE = 'MM\x00\x2a'
INTELSIGNATURE = 'II\x2a\x00'
# http://partners.adobe.com/asn/developer/pdfs/tn/TIFF6.pdf
class TIFFInfo(core.ImageInfo):
def __init__(self,file):
core.ImageInfo.__init__(self)
self.iptc = None
self.mime = 'image/tiff'
self.type = 'TIFF image'
self.intel = 0
iptc = {}
header = file.read(8)
if header[:4] == MOTOROLASIGNATURE:
self.intel = 0
(offset,) = struct.unpack(">I", header[4:8])
file.seek(offset)
(len,) = struct.unpack(">H", file.read(2))
app = file.read(len*12)
for i in range(len):
(tag, type, length, value, offset) = \
struct.unpack('>HHIHH', app[i*12:i*12+12])
if tag == 0x8649:
file.seek(offset,0)
iptc = IPTC.flatten(IPTC.parseiptc(file.read(1000)))
elif tag == 0x0100:
if value != 0:
self.width = value
else:
self.width = offset
elif tag == 0x0101:
if value != 0:
self.height = value
else:
self.height = offset
elif header[:4] == INTELSIGNATURE:
self.intel = 1
(offset,) = struct.unpack("<I", header[4:8])
file.seek(offset,0)
(len,) = struct.unpack("<H", file.read(2))
app = file.read(len*12)
for i in range(len):
(tag, type, length, offset, value) = \
struct.unpack('<HHIHH', app[i*12:i*12+12])
if tag == 0x8649:
file.seek(offset)
iptc = IPTC.flatten(IPTC.parseiptc(file.read(1000)))
elif tag == 0x0100:
if value != 0:
self.width = value
else:
self.width = offset
elif tag == 0x0101:
if value != 0:
self.height = value
else:
self.height = offset
else:
raise mediainfo.KaaMetadataParseError()
if iptc:
self.setitem( 'title', iptc, 517 )
self.setitem( 'date' , iptc, 567 )
self.setitem( 'comment', iptc, 617 )
self.setitem( 'keywords', iptc, 537 )
self.setitem( 'artist', iptc, 592 )
self.setitem( 'country', iptc, 612 )
self.setitem( 'caption', iptc, 632 )
self.appendtable('IPTC', iptc)
# core stuff
self.add_imaging_information(file.name)
if core.PIL:
self.parse_external_files(file.name)
return
factory.register( 'image/tiff', ('tif','tiff'), mediainfo.TYPE_IMAGE,
TIFFInfo )
--- NEW FILE: bins.py ---
# -*- coding: iso-8859-1 -*-
# -----------------------------------------------------------------------------
# bins.py - bins xml parser
# -----------------------------------------------------------------------------
# $Id: bins.py,v 1.1 2005/07/02 16:33:11 dischi Exp $
#
# -----------------------------------------------------------------------------
# kaa-Metadata - Media Metadata for Python
# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
#
# First Edition: John Cooper <[EMAIL PROTECTED]>
# Maintainer: Dirk Meyer <[EMAIL PROTECTED]>
#
# Please see the file doc/CREDITS for a complete list of authors.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# -----------------------------------------------------------------------------
from xml.sax import make_parser, ContentHandler
from xml.sax.handler import feature_namespaces
import string
import os
import re
def norm_whitespace(text):
# Remove Redundant whitespace from a string
return ' '.join(text.split())
RE_TEXT = re.compile("^[ \n\t]*(.*[^ \n\t])[ \n\t]*$").match
# remove redundant whitespaces/tabs/newlines at the beginning and the end
def norm_text(text):
m = RE_TEXT(text)
if m:
return m.group(1)
return text
def format_text(text):
while len(text) and text[0] in (' ', '\t', '\n'):
text = text[1:]
text = re.sub('\n[\t *]', ' ', text)
while len(text) and text[-1] in (' ', '\t', '\n'):
text = text[:-1]
return text
class BinsDiscription(ContentHandler):
"""
This is a handler for getting the information from a bins Album.
"""
def __init__(self):
self.desc = {}
self.exif = {}
self.inDisc = 0
self.inField = 0
self.inExif = 0
self.inTag = 0
def startElement(self,name,attrs):
# Check that we have a discription section
if name == u'description':
self.inDisc = 1
if name == u'field':
self.thisField = norm_whitespace(attrs.get('name', ''))
self.inField = 1
self.desc[self.thisField] = ''
if name == u'exif':
self.inExif = 1
if name == u'tag':
self.inTag = 1
self.thisTag = norm_whitespace(attrs.get('name', ''))
self.exif[self.thisTag] = ''
def characters(self,ch):
if self.inDisc:
if self.inField:
self.desc[self.thisField] = self.desc[self.thisField] + ch
if self.inExif:
if self.inTag:
self.exif[self.thisTag] = self.exif[self.thisTag] + ch
def endElement(self,name):
if name == 'discription':
self.inDisc = 0
if name == 'field':
self.desc[self.thisField] = norm_text(self.desc[self.thisField])
self.inField = 0
if name == 'exif':
try:
self.exif[self.thisTag] = norm_text(self.exif[self.thisTag])
except:
pass
self.inExif = 0
if name == 'tag':
self.inTag = 0
def get_bins_desc(binsname):
parser = make_parser()
parser.setFeature(feature_namespaces,0)
dh = BinsDiscription()
parser.setContentHandler(dh)
# check that the xml file exists for a dir or image
if os.path.isfile(binsname + '/album.xml'):
binsname = binsname + '/album.xml'
elif os.path.isfile(binsname + '.xml'):
binsname = binsname + '.xml'
else:
dh.desc['title'] == os.path.basename(dirname)
# Check that there is a title
parser.parse(binsname)
# remove whitespace at the beginning
for d in dh.desc:
dh.desc[d] = format_text(dh.desc[d])
for d in dh.exif:
dh.exif[d] = format_text(dh.exif[d])
return {'desc':dh.desc , 'exif':dh.exif}
if __name__ == '__main__':
parser = make_parser()
parser.setFeature(feature_namespaces,0)
dh = GetAlbum()
parser.setContentHandler(dh)
parser.parse('album.xml')
print dh.desc
-------------------------------------------------------
SF.Net email is sponsored by: Discover Easy Linux Migration Strategies
from IBM. Find simple to follow Roadmaps, straightforward articles,
informative Webcasts and more! Get everything you need to get up to
speed, fast. http://ads.osdn.com/?ad_id=7477&alloc_id=16492&op=click
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog