Author: tack
Date: Sat Apr 29 21:33:57 2006
New Revision: 1528

Modified:
   trunk/metadata/src/mediainfo.py
   trunk/metadata/src/video/mkvinfo.py

Log:
Fixed a few bugs and added several more features to matroska parser: title
and date attributes, new cover attribute which is exposed as an imlib2
image, chapter support, add title and track number to individual tracks;
created a SubtitleInfo class and create SubtitleInfo objects for the
subtitles list (some of the other parsers will have to be updated);
usurped maintainership of matroska parser.


Modified: trunk/metadata/src/mediainfo.py
==============================================================================
--- trunk/metadata/src/mediainfo.py     (original)
+++ trunk/metadata/src/mediainfo.py     Sat Apr 29 21:33:57 2006
@@ -53,14 +53,14 @@
 TYPE_HYPERTEXT = 8
 TYPE_MISC      = 10
 
-MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type',
+MEDIACORE = ['title', 'caption', 'comment', 'artist', 'size', 'type', 'cover',
              'subtype', 'date', 'keywords', 'country', 'language', 'url']
 
 AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format',
-             'samplebits', 'bitrate', 'language']
+             'samplebits', 'bitrate', 'language', 'title', 'trackno']
 
 VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format',
-             'samplebits', 'width', 'height', 'fps', 'aspect']
+             'samplebits', 'width', 'height', 'fps', 'aspect', 'title', 
'trackno']
 
 MUSICCORE = ['trackno', 'trackof', 'album', 'genre','discs', 'image',
              'raw_image']
@@ -309,10 +309,21 @@
     """
     Chapter in a Multiplexed Container.
     """
-    def __init__(self, name, pos=0):
-        self.keys = ['name', 'pos']
+    def __init__(self, name="", pos=0):
+        self.keys = ['name', 'pos', 'enabled']
         setattr(self,'name', name)
         setattr(self,'pos', pos)
+        setattr(self,'enabled', True)
+
+
+class SubtitleInfo(MediaInfo):
+    """
+    Subtitle Tracks in a Multiplexed Container.
+    """
+    def __init__(self):
+        self.keys = ['language', 'trackno', 'title']
+        for k in self.keys:
+            setattr(self, k, None)
 
 
 class AVInfo(MediaInfo):
@@ -352,11 +363,12 @@
                (os.path.isfile(base+'.sub') or os.path.isfile(base+'.rar')):
             file = open(base+'.idx')
             if file.readline().find('VobSub index file') > 0:
-                line = file.readline()
-                while (line):
+                for line in file.readlines():
                     if line.find('id') == 0:
-                        self.subtitles.append(line[4:6])
-                    line = file.readline()
+                        sub = SubtitleInfo()
+                        sub.language = line[4:6]
+                        sub.trackno = base + '.idx'  # Maybe not?
+                        self.subtitles.append(sub)
             file.close()
 
 
@@ -374,12 +386,15 @@
             if len(self.subtitles):
                 result += reduce( lambda a,b: a + u'  \n   Subtitle Stream:' +\
                                   unicode(b), self.subtitles, u'' )
+
         if not isinstance(self.chapters, int) and len(self.chapters) > 0:
             result += u'\n Chapter list:'
             for i in range(len(self.chapters)):
-                result += u'\n   %2s: "%s" %s' % \
+                pos = self.chapters[i]['pos']
+                result += u'\n   %2s: "%s" %02d:%02d:%02d.%03d' % \
                           (i+1, unicode(self.chapters[i]['name']),
-                           self.chapters[i]['pos'])
+                           int(pos)/60/60, int(pos/60) % 60, 
+                           int(pos)%60, (pos-int(pos))*1000)
         return result
 
 

Modified: trunk/metadata/src/video/mkvinfo.py
==============================================================================
--- trunk/metadata/src/video/mkvinfo.py (original)
+++ trunk/metadata/src/video/mkvinfo.py Sat Apr 29 21:33:57 2006
@@ -6,9 +6,9 @@
 #
 # -----------------------------------------------------------------------------
 # kaa-Metadata - Media Metadata for Python
-# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer
+# Copyright (C) 2003-2005 Thomas Schueppel, Dirk Meyer, Jason Tackaberry
 #
-# Maintainer:    Dirk Meyer <[EMAIL PROTECTED]>
+# Maintainer:    Jason Tackaberry <[EMAIL PROTECTED]>
 #
 # Please see the file AUTHORS for a complete list of authors.
 #
@@ -47,38 +47,58 @@
 log = logging.getLogger('metadata')
 
 # Main IDs for the Matroska streams
-MATROSKA_VIDEO_TRACK     = 0x01
-MATROSKA_AUDIO_TRACK     = 0x02
-MATROSKA_SUBTITLES_TRACK = 0x11
-
-MATROSKA_HEADER_ID  = 0x1A45DFA3
-MATROSKA_TRACKS_ID  = 0x1654AE6B
-MATROSKA_SEGMENT_ID = 0x18538067
-MATROSKA_SEGMENT_INFO_ID      = 0x1549A966
-MATROSKA_CLUSTER_ID           = 0x1F43B675
-MATROSKA_VOID_ID              = 0xEC
-MATROSKA_CRC_ID               = 0xBF
-MATROSKA_TIMECODESCALE_ID     = 0x2AD7B1
-MATROSKA_DURATION_ID          = 0x4489
-MATROSKA_CRC32_ID             = 0xBF
-MATROSKA_TRACK_TYPE_ID        = 0x83
-MATROSKA_TRACK_LANGUAGE_ID    = 0x22B59C
-MATROSKA_TIMECODESCALE_ID     = 0x4489
-MATROSKA_MUXING_APP_ID        = 0x4D80
-MATROSKA_WRITING_APP_ID       = 0x5741
-MATROSKA_CODEC_ID             = 0x86
-MATROSKA_CODEC_NAME_ID        = 0x258688
-MATROSKA_FRAME_DURATION_ID    = 0x23E383
-MATROSKA_VIDEO_SETTINGS_ID    = 0xE0
-MATROSKA_VID_WIDTH_ID         = 0xB0
-MATROSKA_VID_HEIGHT_ID        = 0xBA
-MATROSKA_DISPLAY_VID_WIDTH_ID = 0x54B0
-MATROSKA_DISPLAY_VID_HEIGHT_ID= 0x54BA
-MATROSKA_AUDIO_SETTINGS_ID    = 0xE1
-MATROSKA_AUDIO_SAMPLERATE_ID  = 0xB5
-MATROSKA_AUDIO_CHANNELS_ID    = 0x9F
-MATROSKA_TRACK_UID_ID         = 0x73C5
-MATROSKA_TRACK_NUMBER_ID      = 0xD7
+MATROSKA_VIDEO_TRACK              = 0x01
+MATROSKA_AUDIO_TRACK              = 0x02
+MATROSKA_SUBTITLES_TRACK          = 0x11
+
+MATROSKA_HEADER_ID                = 0x1A45DFA3
+MATROSKA_TRACKS_ID                = 0x1654AE6B
+MATROSKA_SEGMENT_ID               = 0x18538067
+MATROSKA_SEGMENT_INFO_ID          = 0x1549A966
+MATROSKA_CLUSTER_ID               = 0x1F43B675
+MATROSKA_VOID_ID                  = 0xEC
+MATROSKA_CRC_ID                   = 0xBF
+MATROSKA_TIMECODESCALE_ID         = 0x2AD7B1
+MATROSKA_DURATION_ID              = 0x4489
+MATROSKA_CRC32_ID                 = 0xBF
+MATROSKA_TRACK_TYPE_ID            = 0x83
+MATROSKA_TRACK_LANGUAGE_ID        = 0x22B59C
+MATROSKA_TIMECODESCALE_ID         = 0x2AD7B1
+MATROSKA_MUXING_APP_ID            = 0x4D80
+MATROSKA_WRITING_APP_ID           = 0x5741
+MATROSKA_CODEC_ID                 = 0x86
+MATROSKA_CODEC_PRIVATE_ID         = 0x63A2
+MATROSKA_FRAME_DURATION_ID        = 0x23E383
+MATROSKA_VIDEO_SETTINGS_ID        = 0xE0
+MATROSKA_VID_WIDTH_ID             = 0xB0
+MATROSKA_VID_HEIGHT_ID            = 0xBA
+MATROSKA_DISPLAY_VID_WIDTH_ID     = 0x54B0
+MATROSKA_DISPLAY_VID_HEIGHT_ID    = 0x54BA
+MATROSKA_AUDIO_SETTINGS_ID        = 0xE1
+MATROSKA_AUDIO_SAMPLERATE_ID      = 0xB5
+MATROSKA_AUDIO_CHANNELS_ID        = 0x9F
+MATROSKA_TRACK_UID_ID             = 0x73C5
+MATROSKA_TRACK_NUMBER_ID          = 0xD7
+MATROSKA_TITLE_ID                 = 0x7BA9
+MATROSKA_DATE_UTC_ID              = 0x4461
+MATROSKA_NAME_ID                  = 0x536E
+
+MATROSKA_CHAPTERS_ID              = 0x1043A770
+MATROSKA_EDITION_ENTRY_ID         = 0x45B9
+MATROSKA_CHAPTER_ATOM_ID          = 0xB6
+MATROSKA_CHAPTER_TIME_START_ID    = 0x91
+MATROSKA_CHAPTER_TIME_END_ID      = 0x92
+MATROSKA_CHAPTER_FLAG_ENABLED_ID  = 0x4598
+MATROSKA_CHAPTER_DISPLAY_ID       = 0x80
+MATROSKA_CHAPTER_LANGUAGE_ID      = 0x437C
+MATROSKA_CHAPTER_STRING_ID        = 0x85
+
+MATROSKA_ATTACHMENTS_ID           = 0x1941A469
+MATROSKA_ATTACHED_FILE_ID         = 0x61A7
+MATROSKA_FILE_DESC_ID             = 0x467E
+MATROSKA_FILE_NAME_ID             = 0x466E
+MATROSKA_FILE_MIME_TYPE_ID        = 0x4660
+MATROSKA_FILE_DATA_ID             = 0x465C
 
 class EbmlEntity:
     """
@@ -116,15 +136,9 @@
 
         # if the size is 1, 2 3 or 4 it could be a numeric value, so do the job
         self.value = 0
-        if self.entity_len == 1:
-            self.value = ord(self.entity_data[0])
-        if self.entity_len == 2:
-            self.value = unpack('!H', self.entity_data)[0]
-        if self.entity_len == 3:
-            self.value = ord(self.entity_data[0])<<16 | \
-                         ord(self.entity_data[1])<<8 | ord(self.entity_data[2])
-        if self.entity_len == 4:
-            self.value = unpack('!I', self.entity_data)[0]
+        if self.entity_len <= 8:
+            for pos, shift in zip(range(self.entity_len), 
range((self.entity_len-1)*8, -1, -8)):
+                self.value |= ord(self.entity_data[pos]) << shift
 
     def compute_id(self, inbuf):
         first = ord(inbuf[0])
@@ -162,7 +176,8 @@
             return ((c1-0x20)<<16) | (c2<<8) | (c3)
         if first & 0x10:
             self.len_size = 4
-            return unpack('!I',inbuf[:4])
+            (c1, c2, c3, c4) = unpack('BBBB',inbuf[:4])
+            return ((c1-0x10)<<24) | (c2<<16) | (c3<<8) | c4
         if first & 0x08:
             self.len_size = 5
             return -1
@@ -180,8 +195,7 @@
         return self.crc_len
 
     def get_value(self):
-        value = self.value
-        return value
+        return self.value
 
     def get_data(self):
         return self.entity_data
@@ -230,24 +244,36 @@
             try:
                 # Express scalecode in ms instead of ns
                 # Rescale it to the second
-                tc = seginfotab[MATROSKA_TIMECODESCALE_ID].get_value()
-                scalecode = float(tc / (1000*1000))
+                scalecode = seginfotab[MATROSKA_TIMECODESCALE_ID].get_value() 
/ 1000.0
             except (ZeroDivisionError, KeyError, IndexError):
-                scalecode = 1000
+                scalecode = 1000.0
+
             try:
                 md = seginfotab[MATROSKA_DURATION_ID].get_data()
-                duration = float(unpack('!f', md)[0])
-                duration = float(duration / scalecode)
-                # Express the time in minutes
-                self.length = int(duration/60)
+                duration = unpack('!f', md)[0]
+                self.length = duration / scalecode
             except (ZeroDivisionError, KeyError, IndexError):
                 pass
+
+            if MATROSKA_TITLE_ID in seginfotab:
+                self.title = seginfotab[MATROSKA_TITLE_ID].get_data()
+
+            if MATROSKA_DATE_UTC_ID in seginfotab:
+                self.date =  unpack('!q', 
seginfotab[MATROSKA_DATE_UTC_ID].get_data())[0] / 10.0**9
+
             try:
                 log.debug("Searching for id : %X" % MATROSKA_TRACKS_ID)
                 entity = segtab[MATROSKA_TRACKS_ID]
                 self.process_tracks(entity)
             except (ZeroDivisionError, KeyError, IndexError):
                 log.debug("TRACKS ID not found !!" )
+
+            if MATROSKA_CHAPTERS_ID in segtab:
+                self.process_chapters(segtab[MATROSKA_CHAPTERS_ID])
+
+            if MATROSKA_ATTACHMENTS_ID in segtab:
+                self.process_attachments(segtab[MATROSKA_ATTACHMENTS_ID])
+
         else:
             log.debug("SEGMENT ID not found %08X" % segment.get_id())
 
@@ -280,69 +306,157 @@
         type = tabelem[MATROSKA_TRACK_TYPE_ID]
         mytype = type.get_value()
         log.debug ("Track type found with UID %d" % mytype)
+        track = None
+
         if mytype == MATROSKA_VIDEO_TRACK:
             log.debug("VIDEO TRACK found !!")
-            vi = mediainfo.VideoInfo()
+            track = mediainfo.VideoInfo()
             try:
                 elem = tabelem[MATROSKA_CODEC_ID]
-                vi.codec = elem.get_data()
-                if vi.codec.startswith('V_'):
-                    vi.codec = vi.codec[2:]
+                track.codec = elem.get_data()
+                if track.codec.startswith('V_'):
+                    track.codec = track.codec[2:]
             except (ZeroDivisionError, KeyError, IndexError):
-                vi.codec = 'Unknown'
+                track.codec = 'Unknown'
+
+            if MATROSKA_CODEC_PRIVATE_ID in tabelem:
+                if tabelem[MATROSKA_CODEC_PRIVATE_ID].get_len() == 40:
+                    # Assuming it's a alBITMAPINFOHEADER, grab fourcc
+                    track.format = 
tabelem[MATROSKA_CODEC_PRIVATE_ID].get_data()[16:20]
+
             try:
                 elem = tabelem[MATROSKA_FRAME_DURATION_ID]
-                vi.fps = 1 / (pow(10, -9) * (elem.get_value()))
+                track.fps = 1 / (pow(10, -9) * (elem.get_value()))
             except (ZeroDivisionError, KeyError, IndexError):
-                vi.fps = 0
+                track.fps = 0
+
             try:
                 vinfo = tabelem[MATROSKA_VIDEO_SETTINGS_ID]
                 vidtab = self.process_one_level(vinfo)
-                vi.width  = vidtab[MATROSKA_VID_WIDTH_ID].get_value()
-                vi.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value()
+                track.width  = vidtab[MATROSKA_VID_WIDTH_ID].get_value()
+                track.height = vidtab[MATROSKA_VID_HEIGHT_ID].get_value()
                 if vidtab.has_key(MATROSKA_DISPLAY_VID_WIDTH_ID) and \
                    vidtab.has_key(MATROSKA_DISPLAY_VID_HEIGHT_ID):
-                    vi.aspect = 
float(vidtab[MATROSKA_DISPLAY_VID_WIDTH_ID].get_value()) / \
+                    track.aspect = 
float(vidtab[MATROSKA_DISPLAY_VID_WIDTH_ID].get_value()) / \
                                 
vidtab[MATROSKA_DISPLAY_VID_HEIGHT_ID].get_value()
             except Exception, e:
                 log.debug("No other info about video track !!!")
-            self.video.append(vi)
+            self.video.append(track)
 
         elif mytype == MATROSKA_AUDIO_TRACK:
             log.debug("AUDIO TRACK found !!")
-            ai = mediainfo.AudioInfo()
-            try:
-                elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
-                ai.language = elem.get_data()
-                ai['language'] = elem.get_data()
-            except (KeyError, IndexError):
-                ai.language = 'en'
-                ai['language'] = 'en'
+            track = mediainfo.AudioInfo()
+
             try:
                 elem = tabelem[MATROSKA_CODEC_ID]
-                ai.codec = elem.get_data()
-                if ai.codec.startswith('A_'):
-                    ai.codec = ai.codec[2:]
+                track.codec = elem.get_data()
+                if track.codec.startswith('A_'):
+                    track.codec = track.codec[2:]
             except (KeyError, IndexError):
-                ai.codec = "Unknown"
+                track.codec = "Unknown"
+
             try:
                 ainfo = tabelem[MATROSKA_AUDIO_SETTINGS_ID]
                 audtab = self.process_one_level(ainfo)
-                as = audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_value()
-                ai.samplerate  = unpack('!f', pack("!I", as))[0]
-                ai.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value()
+                track.samplerate  = unpack('!f', 
audtab[MATROSKA_AUDIO_SAMPLERATE_ID].get_data())[0]
+                track.channels = audtab[MATROSKA_AUDIO_CHANNELS_ID].get_value()
             except (KeyError, IndexError):
                 log.debug("No other info about audio track !!!")
-            self.audio.append(ai)
+
+            self.audio.append(track)
 
         elif mytype == MATROSKA_SUBTITLES_TRACK:
+            track = mediainfo.SubtitleInfo()
+            self.subtitles.append(track)
+
+        if not track:
+            return
+
+        if MATROSKA_TRACK_LANGUAGE_ID in tabelem:
+            track.language = tabelem[MATROSKA_TRACK_LANGUAGE_ID].get_data()
+            log.debug("Track language found: %s" % track.language)
+        else:
+            track.language = "und"
+
+        if MATROSKA_NAME_ID in tabelem:
+            track.title = tabelem[MATROSKA_NAME_ID].get_data()
+
+        if MATROSKA_TRACK_NUMBER_ID in tabelem:
+            track.trackno = tabelem[MATROSKA_TRACK_NUMBER_ID].get_value()
+
+
+    def process_chapters(self, chapters):
+        tabelem = self.process_one_level(chapters)
+        if MATROSKA_EDITION_ENTRY_ID not in tabelem:
+            return
+
+        entry = tabelem[MATROSKA_EDITION_ENTRY_ID]
+        buf = entry.get_data()
+        indice = 0
+        while indice < entry.get_len():
+            elem = EbmlEntity(buf[indice:])
+            if elem.get_id() == MATROSKA_CHAPTER_ATOM_ID:
+                self.process_chapter_atom(elem)
+            indice += elem.get_total_len() + elem.get_crc_len()
+ 
+        
+    def process_chapter_atom(self, atom):
+        tabelem = self.process_one_level(atom)
+        chap = mediainfo.ChapterInfo()
+
+        if MATROSKA_CHAPTER_TIME_START_ID in tabelem:
+            # Scale timecode to seconds (float)
+            chap.pos = tabelem[MATROSKA_CHAPTER_TIME_START_ID].get_value() / 
1000000 / 1000.0
+
+        if MATROSKA_CHAPTER_FLAG_ENABLED_ID in tabelem:
+            chap.enabled = 
tabelem[MATROSKA_CHAPTER_FLAG_ENABLED_ID].get_value()
+
+        if MATROSKA_CHAPTER_DISPLAY_ID in tabelem:
+            # Matroska supports multiple (chapter name, language) pairs for
+            # each chapter, so chapter names can be internationalized.  This
+            # logic will only take the last one in the list.
+            tabelem = 
self.process_one_level(tabelem[MATROSKA_CHAPTER_DISPLAY_ID])
+            if MATROSKA_CHAPTER_STRING_ID in tabelem:
+                chap.name = tabelem[MATROSKA_CHAPTER_STRING_ID].get_data()
+
+        log.debug('Chapter "%s" found' % str(chap.name))
+        self.chapters.append(chap)
+
+
+    def process_attachments(self, attachments):
+        buf = attachments.get_data()
+        indice = 0
+        while indice < attachments.get_len():
+            elem = EbmlEntity(buf[indice:])
+            if elem.get_id() == MATROSKA_ATTACHED_FILE_ID:
+                self.process_attachment(elem)
+            indice += elem.get_total_len() + elem.get_crc_len()
+
+
+    def process_attachment(self, attachment):
+        tabelem = self.process_one_level(attachment)
+        name = desc = mimetype = ""
+
+        if MATROSKA_FILE_NAME_ID in tabelem:
+            name = tabelem[MATROSKA_FILE_NAME_ID].get_data()
+        if MATROSKA_FILE_DESC_ID in tabelem:
+            desc = tabelem[MATROSKA_DESC_NAME_ID].get_data()
+        if MATROSKA_FILE_MIME_TYPE_ID in tabelem:
+            mimetype = tabelem[MATROSKA_FILE_MIME_TYPE_ID].get_data()
+        if MATROSKA_FILE_DATA_ID in tabelem:
+            data = tabelem[MATROSKA_FILE_DATA_ID].get_data()
+        else:
+            data = None
+
+        # Right now we only support attachments that could be cover images.
+        # Make a guess to see if this attachment is a cover image.
+        if mimetype.startswith("image/") and "cover" in (name+desc).lower() 
and data:
             try:
-                elem = tabelem[MATROSKA_TRACK_LANGUAGE_ID]
-                language = elem.get_data()
-                log.debug ("Subtitle language found : %s" % elem.get_data() )
-            except (KeyError, IndexError):
-                language = "en" # By default
-            self.subtitles.append(language)
+                import kaa.imlib2
+                self.cover = kaa.imlib2.open_from_memory(data)
+            except:
+                pass
 
+        log.debug('Attachment "%s" found' % name)
 
 factory.register( 'application/mkv', ('mkv', 'mka',), mediainfo.TYPE_AV, 
MkvInfo )


-------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Freevo-cvslog mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freevo-cvslog

Reply via email to