Update of /cvsroot/freevo/freevo/src/mediadb
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22053
Modified Files:
__init__.py db.py debug.py item.py listing.py
Added Files:
audio_parser.py cache.py parser.py video_parser.py
Log Message:
improve parser
Index: debug.py
===================================================================
RCS file: /cvsroot/freevo/freevo/src/mediadb/debug.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** debug.py 13 Mar 2005 10:13:35 -0000 1.1
--- debug.py 4 Apr 2005 18:28:15 -0000 1.2
***************
*** 49,52 ****
--- 49,53 ----
import util.cache as cache
from util.callback import *
+ from listing import Listing
log = logging.getLogger('mediainfo')
--- NEW FILE: parser.py ---
# python imports
import os
import stat
import mmpython
import pickle
import cPickle
import re
# freevo imports
import config
import util.fxdparser
import util.vfs as vfs
# list of external parser
_parser = []
def _init():
"""
Init the parser module
"""
d = os.path.dirname(__file__)
for f in os.listdir(d):
if f.endswith('_parser.py'):
exec('import %s' % f[:-3])
_parser.append(eval(f[:-3]))
def _simplify(object):
"""
mmpython has huge objects to cache, we don't need them.
This function simplifies them to be only string, integer, dict or
list of one of those above. This makes the caching much faster
"""
ret = {}
for k in object.keys:
if not k in [ 'thumbnail', 'url' ] and getattr(object,k) != None:
value = getattr(object,k)
if isstring(value):
value = Unicode(value.replace('\0', '').lstrip().rstrip())
if value:
ret[k] = value
for k in ( 'video', 'audio'):
# if it's an AVCORE object, also simplify video and audio
# lists to string and it
if hasattr(object, k) and getattr(object, k):
ret[k] = []
for o in getattr(object, k):
ret[k].append(_simplify(o))
if hasattr(object, 'tracks') and object.tracks:
# read track informations for dvd
ret['tracks'] = []
for o in object.tracks:
track = _simplify(o)
if not track.has_key('audio'):
track['audio'] = []
if not track.has_key('subtitles'):
track['subtitles'] = []
ret['tracks'].append(track)
for k in ('subtitles', 'chapters', 'mime', 'id' ):
if hasattr(object, k) and getattr(object, k):
ret[k] = getattr(object, k)
return ret
def _parse_fxd_node(node):
"""
Parse a fxd node.
"""
children = []
for c in node.children:
children.append(_parse_fxd_node(c))
return (node.name, node.attrs, children, node.textof(), node.first_cdata,
node.following_cdata)
def _parse_fxd(filename):
"""
Parse a fxd file.
"""
data = util.fxdparser.FXDtree(filename, False)
if data.tree.name != 'freevo':
return {}
is_skin_fxd = False
for node in data.tree.children:
if node.name == 'skin':
is_skin_fxd = True
break
tree = []
for node in data.tree.children:
tree.append(_parse_fxd_node(node))
return is_skin_fxd, tree
# regexp for filenames used in _getname
_FILENAME_REGEXP = re.compile("^(.*?)_(.)(.*)$")
def _getname(file):
"""
make a nicer display name from file
"""
if len(file) < 2:
return Unicode(file)
# basename without ext
if file.rfind('/') < file.rfind('.'):
name = file[file.rfind('/')+1:file.rfind('.')]
else:
name = file[file.rfind('/')+1:]
if not name:
# Strange, it is a dot file, return the complete
# filename, I don't know what to do here. This should
# never happen
return Unicode(file)
name = name[0].upper() + name[1:]
while file.find('_') > 0 and _FILENAME_REGEXP.match(name):
m = _FILENAME_REGEXP.match(name)
if m:
name = m.group(1) + ' ' + m.group(2).upper() + m.group(3)
if name.endswith('_'):
name = name[:-1]
return Unicode(name)
def cover_filter(x):
"""
Filter function to get valid cover names
"""
return re.search(config.AUDIO_COVER_REGEXP, x, re.IGNORECASE)
def parse(filename, object):
"""
Add additional informations to filename, object.
"""
if not _parser:
_init()
mminfo = None
if not object['ext'] in [ 'xml', 'fxd' ]:
mminfo = mmpython.parse(filename)
title = _getname(filename)
object['title:filename'] = title
if mminfo:
# store mmpython data as pickle for faster loading
object['mminfo'] = cPickle.dumps(_simplify(mminfo),
pickle.HIGHEST_PROTOCOL)
if mminfo.title:
object['title'] = mminfo.title
else:
object['title'] = title
elif object.has_key('mminfo'):
del object['mminfo']
object['title'] = title
else:
object['title'] = title
if filename.endswith('.fxd'):
# store fxd tree as pickle for faster loading
object['fxd'] = cPickle.dumps(_parse_fxd(filename),
pickle.HIGHEST_PROTOCOL)
if os.path.isdir(filename):
object['isdir'] = True
listing = vfs.listdir(filename, include_overlay=True)
# get directory cover
for l in listing:
if l.endswith('/cover.png') or l.endswith('/cover.jpg') or \
l.endswith('/cover.gif'):
object['cover'] = l
break
else:
if object.has_key('cover'):
del object['cover']
if object.has_key('audiocover'):
del object['audiocover']
files = util.find_matches(listing, ('jpg', 'gif', 'png' ))
if len(files) == 1:
object['audiocover'] = files[0]
elif len(files) > 1 and len(files) < 10:
files = filter(cover_filter, files)
if files:
object['audiocover'] = files[0]
# save directory overlay mtime
overlay = vfs.getoverlay(filename)
if os.path.isdir(overlay):
mtime = os.stat(overlay)[stat.ST_MTIME]
object['overlay_mtime'] = mtime
else:
object['overlay_mtime'] = 0
else:
if object.has_key('isdir'):
del object['isdir']
# call external parser
for p in _parser:
p.parse(filename, object, mminfo)
def cache():
"""
Function for the 'cache' helper.
"""
if not _parser:
_init()
for p in _parser:
p.cache()
--- NEW FILE: video_parser.py ---
def parse(filename, object, mminfo):
"""
Parse additional data for video files.
"""
if object.has_key('type') and object['type'] in ('DVD',):
del object['url']
del object['type']
if mminfo and mminfo.type == 'DVD':
object['url'] = 'dvd://' + filename
object['type'] = 'dvd'
def cache():
"""
Function for the 'cache' helper.
"""
pass
--- NEW FILE: cache.py ---
import os
import sys
import util.fileops as fileops
import notifier
from listing import Listing
class ProgressBox:
def __init__(self, msg, max):
self.msg = msg
self.max = max
self.pos = 0
print '\r%-70s 0%%' % msg,
sys.__stdout__.flush()
def callback(self):
self.pos += 1
progress = '%3d%%' % (self.pos * 100 / self.max)
print '\r%-70s %s' % (self.msg, progress),
sys.__stdout__.flush()
def cache_directories(directories, rebuild=False):
"""
cache all directories with mmpython
"""
if rebuild:
print 'deleting cache files..................................',
sys.__stdout__.flush()
mediainfo.del_cache()
print 'done'
print 'checking mmpython cache files.........................',
sys.__stdout__.flush()
listings = []
for d in directories:
if d.num_changes:
listings.append(d)
print '%s changes' % len(listings)
# cache all dirs
for l in listings:
name = l.dirname
if len(name) > 55:
name = name[:15] + ' [...] ' + name[-35:]
msg = ProgressBox(' %4d/%-4d %s' % (listings.index(l) + 1,
len(listings), name),
l.num_changes)
l.update(msg.callback)
l.cache.save()
print
# MAIN
import config
import parser
import db
def get_direcories(dirlist, msg):
"""
cache a list of directories recursive
"""
if not dirlist:
return
all_dirs = []
all_listing = []
# create a list of all subdirs
for dir in dirlist:
progress = '%3d%%' % (dirlist.index(dir) * 100 / len(dirlist))
print '\r%s %s' % (msg, progress),
sys.__stdout__.flush()
for dirname in fileops.get_subdirs_recursively(dir):
if not dirname in all_dirs:
all_dirs.append(dirname)
all_listing.append(Listing(dirname))
if not dir in all_dirs:
all_dirs.append(dir)
all_listing.append(Listing(dir))
return all_dirs, all_listing
notifier.init( notifier.GENERIC )
all_dirs = []
msg = 'scanning directory structure..........................'
print msg,
sys.__stdout__.flush()
config.AUDIO_ITEMS = [ ( 'foo', '/local/mp3/Artists/Dixie Chicks' ) ]
config.VIDEO_ITEMS = [ ( 'foo', '/local/video/movie' ) ]
config.IMAGE_ITEMS = [ ( 'foo', '/local/images/fotos/misc' ) ]
for d in config.VIDEO_ITEMS + config.AUDIO_ITEMS + config.IMAGE_ITEMS:
if os.path.isdir(d[1]) and d[1] != '/':
all_dirs.append(d[1])
all_dirs, all_listing = get_direcories(all_dirs, msg)
print '\r%s done' % msg
cache_directories(all_listing)
parser.cache()
db.save()
Index: item.py
===================================================================
RCS file: /cvsroot/freevo/freevo/src/mediadb/item.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** item.py 13 Mar 2005 10:13:35 -0000 1.1
--- item.py 4 Apr 2005 18:28:15 -0000 1.2
***************
*** 55,59 ****
self.filename = self.dirname + '/' + self.basename
self.cache = cache
!
def __str__(self):
--- 55,62 ----
self.filename = self.dirname + '/' + self.basename
self.cache = cache
! if self.attr.has_key('url'):
! self.url = self.attr['url']
! else:
! self.url = 'file://' + self.filename
def __str__(self):
Index: db.py
===================================================================
RCS file: /cvsroot/freevo/freevo/src/mediadb/db.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** db.py 13 Mar 2005 10:13:35 -0000 1.1
--- db.py 4 Apr 2005 18:28:15 -0000 1.2
***************
*** 36,42 ****
import os
import stat
- import mmpython
- import pickle
- import cPickle
import re
import logging
--- 36,39 ----
***************
*** 44,217 ****
# freevo imports
import config
- import util.fxdparser
import util.vfs as vfs
import util.cache as cache
from util.callback import *
# get logging object
log = logging.getLogger('mediadb')
! VERSION = 1.6
!
! def _simplify(object):
! """
! mmpython has huge objects to cache, we don't need them.
! This function simplifies them to be only string, integer, dict or
! list of one of those above. This makes the caching much faster
! """
! ret = {}
! for k in object.keys:
! if not k in [ 'thumbnail', 'url' ] and getattr(object,k) != None:
! value = getattr(object,k)
! if isstring(value):
! value = Unicode(value.replace('\0', '').lstrip().rstrip())
! if value:
! ret[k] = value
!
! for k in ( 'video', 'audio'):
! # if it's an AVCORE object, also simplify video and audio
! # lists to string and it
! if hasattr(object, k) and getattr(object, k):
! ret[k] = []
! for o in getattr(object, k):
! ret[k].append(_simplify(o))
!
! if hasattr(object, 'tracks') and object.tracks:
! # read track informations for dvd
! ret['tracks'] = []
! for o in object.tracks:
! track = _simplify(o)
! if not track.has_key('audio'):
! track['audio'] = []
! if not track.has_key('subtitles'):
! track['subtitles'] = []
! ret['tracks'].append(track)
!
! for k in ('subtitles', 'chapters', 'mime', 'id' ):
! if hasattr(object, k) and getattr(object, k):
! ret[k] = getattr(object, k)
!
! return ret
!
! def _parse_fxd_node(node):
! children = []
! for c in node.children:
! children.append(_parse_fxd_node(c))
! return (node.name, node.attrs, children, node.textof(), node.first_cdata,
! node.following_cdata)
!
!
! def _parse_fxd(filename):
! data = util.fxdparser.FXDtree(filename, False)
! if data.tree.name != 'freevo':
! return {}
! is_skin_fxd = False
! for node in data.tree.children:
! if node.name == 'skin':
! is_skin_fxd = True
! break
! tree = []
! for node in data.tree.children:
! tree.append(_parse_fxd_node(node))
! return is_skin_fxd, tree
!
!
! def _cover_filter(x):
! """
! filter function to get valid cover names
! """
! return re.search(config.AUDIO_COVER_REGEXP, x, re.IGNORECASE)
!
!
! def _add_info(filename, object):
! mminfo = None
! if not object['ext'] in [ 'xml', 'fxd' ]:
! mminfo = mmpython.parse(filename)
! title = _getname(filename)
! object['title:filename'] = title
! if mminfo:
! # store mmpython data as pickle for faster loading
! object['mminfo'] = cPickle.dumps(_simplify(mminfo),
! pickle.HIGHEST_PROTOCOL)
! if mminfo.title:
! object['title'] = mminfo.title
! else:
! object['title'] = title
! elif object.has_key('mminfo'):
! del object['mminfo']
! object['title'] = title
! else:
! object['title'] = title
!
! if filename.endswith('.fxd'):
! # store fxd tree as pickle for faster loading
! object['fxd'] = cPickle.dumps(_parse_fxd(filename),
! pickle.HIGHEST_PROTOCOL)
!
! if os.path.isdir(filename):
! object['isdir'] = True
! listing = vfs.listdir(filename, include_overlay=True)
! # get directory cover
! for l in listing:
! if l.endswith('/cover.png') or l.endswith('/cover.jpg') or \
! l.endswith('/cover.gif'):
! object['cover'] = l
! break
! else:
! if object.has_key('cover'):
! del object['cover']
! if object.has_key('audiocover'):
! del object['audiocover']
! files = util.find_matches(listing, ('jpg', 'gif', 'png' ))
! if len(files) == 1:
! object['audiocover'] = files[0]
! elif len(files) > 1 and len(files) < 10:
! files = filter(_cover_filter, files)
! if files:
! object['audiocover'] = files[0]
!
! # save directory overlay mtime
! overlay = vfs.getoverlay(filename)
! if os.path.isdir(overlay):
! mtime = os.stat(overlay)[stat.ST_MTIME]
! object['overlay_mtime'] = mtime
! else:
! object['overlay_mtime'] = 0
! else:
! if object.has_key('isdir'):
! del object['isdir']
!
!
! _FILENAME_REGEXP = re.compile("^(.*?)_(.)(.*)$")
!
! def _getname(file):
! """
! make a nicer display name from file
! """
! if len(file) < 2:
! return Unicode(file)
!
! # basename without ext
! if file.rfind('/') < file.rfind('.'):
! name = file[file.rfind('/')+1:file.rfind('.')]
! else:
! name = file[file.rfind('/')+1:]
! if not name:
! # Strange, it is a dot file, return the complete
! # filename, I don't know what to do here. This should
! # never happen
! return Unicode(file)
!
! name = name[0].upper() + name[1:]
!
! while file.find('_') > 0 and _FILENAME_REGEXP.match(name):
! m = _FILENAME_REGEXP.match(name)
! if m:
! name = m.group(1) + ' ' + m.group(2).upper() + m.group(3)
! if name.endswith('_'):
! name = name[:-1]
! return Unicode(name)
!
!
class CacheList:
--- 41,55 ----
# freevo imports
import config
import util.vfs as vfs
import util.cache as cache
from util.callback import *
+ # mediadb imports
+ import parser
+
# get logging object
log = logging.getLogger('mediadb')
! VERSION = 2.4
class CacheList:
***************
*** 384,388 ****
if not self.changed:
return
! log.info('save %s' % self.file)
cache.save(self.file, self.data, VERSION)
self.mtime = os.stat(self.file)[stat.ST_MTIME]
--- 222,226 ----
if not self.changed:
return
! log.debug('save %s' % self.file)
cache.save(self.file, self.data, VERSION)
self.mtime = os.stat(self.file)[stat.ST_MTIME]
***************
*** 411,415 ****
'mtime_dep': []
}
! _add_info(filename, info)
prefix = basename[:-len(ext)]
--- 249,253 ----
'mtime_dep': []
}
! parser.parse(filename, info)
prefix = basename[:-len(ext)]
***************
*** 438,442 ****
# check changed files
log.debug('changed: %s' % filename)
! _add_info(filename, info)
log.debug(info['mtime_dep'])
for key in info['mtime_dep']:
--- 276,280 ----
# check changed files
log.debug('changed: %s' % filename)
! parser.parse(filename, info)
log.debug(info['mtime_dep'])
for key in info['mtime_dep']:
***************
*** 473,477 ****
self.data['audiocover'] = cover[0]
else:
! cover = filter(_cover_filter, cover)
if cover:
self.data['audiocover'] = cover[0]
--- 311,315 ----
self.data['audiocover'] = cover[0]
else:
! cover = filter(parser.cover_filter, cover)
if cover:
self.data['audiocover'] = cover[0]
***************
*** 499,503 ****
greater than zero, parse _must_ be called.
"""
! if self.check_time + 2 < time.time():
self.check()
changes = len(self.__added) + len(self.__changed)
--- 337,341 ----
greater than zero, parse _must_ be called.
"""
! if self.reduce_files or self.check_time + 2 < time.time():
self.check()
changes = len(self.__added) + len(self.__changed)
***************
*** 508,511 ****
--- 346,357 ----
return changes
+
+ def __str__(self):
+ """
+ Return string for debugging.
+ """
+ return 'mediadb.db.Cache for %s' % self.dirname
+
+
class FileCache:
"""
***************
*** 531,535 ****
# is saved to a new file.
self.changed = True
! _add_info(filename, self.data)
self.save()
--- 377,381 ----
# is saved to a new file.
self.changed = True
! parser.parse(filename, self.data)
self.save()
--- NEW FILE: audio_parser.py ---
import sys
import os
import stat
import md5
import config
import util
from listing import Listing, FileListing
#
# Interface
#
def cache():
"""
Function for the 'cache' helper.
"""
print 'creating audio metadata...............................',
sys.__stdout__.flush()
for dir in config.AUDIO_ITEMS:
if os.path.isdir(dir[1]):
AudioParser(dir[1], rescan=True)
print 'done'
def parse(filename, object, mminfo):
"""
Add aditional audio based data.
"""
pass
#
# Internal helper functions and classes
#
_VARIOUS = u'__various__'
class AudioParser:
def __init__(self, dirname, force=False, rescan=False):
self.artist = ''
self.album = ''
self.year = ''
self.length = 0
self.changed = False
self.force = force
cachefile = vfs.getoverlay(os.path.join(dirname, '..',
'freevo.cache'))
subdirs = util.getdirnames(dirname, softlinks=False)
filelist = None
parent = FileListing( [ dirname ] )
if parent.num_changes:
parent.update()
dirinfo = parent.get_by_name(os.path.basename(dirname))
if not rescan:
for subdir in subdirs:
d = AudioParser(subdir, rescan)
if d.changed:
break
else:
# no changes in all subdirs, looks good
if os.path.isfile(cachefile) and \
os.stat(dirname)[stat.ST_MTIME] <=
os.stat(cachefile)[stat.ST_MTIME]:
# and no changes in here. Do not parse everything again
if force:
# forces? We need to load our current values
for type in ('artist', 'album', 'year', 'length'):
if info.has_key(type):
setattr(self, type, info[type])
return
if not filelist:
filelist = util.match_files(dirname, config.AUDIO_SUFFIX)
if not filelist and not subdirs:
# no files in here? We are done
return
# ok, something changed here, too bad :-(
self.changed = True
self.force = False
# scan all subdirs
for subdir in subdirs:
d = AudioParser(subdir, force=True, rescan=rescan)
for type in ('artist', 'album', 'year'):
setattr(self, type, self.strcmp(getattr(self, type), getattr(d,
type)))
self.length += d.length
# cache dir first
listing = Listing(dirname)
if listing.num_changes:
listing.update()
use_tracks = True
for data in listing.match_suffix(config.AUDIO_SUFFIX):
try:
for type in ('artist', 'album'):
setattr(self, type, self.strcmp(getattr(self, type),
data[type]))
self.year = self.strcmp(self.year, data['date'])
if data['length']:
self.length += int(data['length'])
use_tracks = use_tracks and data['trackno']
except OSError:
pass
if use_tracks and (self.album or self.artist):
dirinfo.store_with_mtime('audio_advanced_sort', True)
if not self.length:
return
for type in ('artist', 'album', 'year', 'length'):
if getattr(self, type):
dirinfo.store_with_mtime(type, getattr(self, type))
modtime = os.stat(dirname)[stat.ST_MTIME]
if not dirinfo['coverscan'] or dirinfo['coverscan'] != modtime:
dirinfo.store('coverscan', modtime)
self.extract_image(dirname)
def strcmp(self, s1, s2):
s1 = Unicode(s1)
s2 = Unicode(s2)
if not s1 or not s2:
return s1 or s2
if s1 == _VARIOUS or s2 == _VARIOUS:
return _VARIOUS
if s1.replace(u' ', u'').lower() == s2.replace(u' ', u'').lower():
return s1
return _VARIOUS
def get_md5(self, obj):
m = md5.new()
if isinstance(obj,file): # file
for line in obj.readlines():
m.update(line)
return m.digest()
else: # str
m.update(obj)
return m.digest()
def extract_image(self, path):
for i in util.match_files(path, ['mp3']):
try:
id3 = eyeD3.Mp3AudioFile( i )
except:
continue
myname = vfs.getoverlay(os.path.join(path, 'cover.jpg'))
if id3.tag:
images = id3.tag.getImages();
for img in images:
if vfs.isfile(myname) and
(self.get_md5(vfs.open(myname,'rb')) == \
self.get_md5(img.imageData)):
# Image already there and has identical md5, skip
pass
elif not vfs.isfile(myname):
f = vfs.open(myname, "wb")
f.write(img.imageData)
f.flush()
f.close()
else:
# image exists, but sums are different, write a unique
cover
iname = os.path.splitext(os.path.basename(i))[0]+'.jpg'
myname = vfs.getoverlay(os.path.join(path, iname))
f = vfs.open(myname, "wb")
f.write(img.imageData)
f.flush()
f.close()
Index: listing.py
===================================================================
RCS file: /cvsroot/freevo/freevo/src/mediadb/listing.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** listing.py 13 Mar 2005 10:13:35 -0000 1.1
--- listing.py 4 Apr 2005 18:28:15 -0000 1.2
***************
*** 110,113 ****
--- 110,125 ----
+ def match_type(self, type_list):
+ visible = self.visible
+ self.visible = []
+ ret = []
+ for v in visible:
+ if v.attr.has_key('type') and \
+ v.attr['type'].lower() == type.lower():
+ ret.append(v)
+ else:
+ self.visible.append(v)
+ return ret
+
class FileListing(Listing):
***************
*** 138,142 ****
cache.reduce(files)
self.num_changes += cache.num_changes()
!
self.data = []
if self.num_changes > 0:
--- 150,154 ----
cache.reduce(files)
self.num_changes += cache.num_changes()
!
self.data = []
if self.num_changes > 0:
***************
*** 159,162 ****
self.num_changes = 0
self.visible = self.data
-
-
--- 171,172 ----
Index: __init__.py
===================================================================
RCS file: /cvsroot/freevo/freevo/src/mediadb/__init__.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** __init__.py 13 Mar 2005 10:13:35 -0000 1.1
--- __init__.py 4 Apr 2005 18:28:04 -0000 1.2
***************
*** 97,105 ****
- def cache_recursive(dirlist, verbose=False):
- # used by cache.py
- log.error('cache_recursive not defined anymore')
-
-
def cache_dir(dirname, callback=None):
# used by cache.py, extendedmeta.py
--- 97,100 ----
-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog