http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11705

Revision: 11705
Author:   drtrigon
Date:     2013-07-01 11:30:01 +0000 (Mon, 01 Jul 2013)
Log Message:
-----------
improvement; refactored '_cat_meta' parts into one single 'general' method
improvement; changed 'Pages' to 'Length' in 'Properties' in order to reflect 
also temporal ranges

Modified Paths:
--------------
    trunk/pywikipedia/catimages.py

Modified: trunk/pywikipedia/catimages.py
===================================================================
--- trunk/pywikipedia/catimages.py      2013-06-30 17:01:59 UTC (rev 11704)
+++ trunk/pywikipedia/catimages.py      2013-07-01 11:30:01 UTC (rev 11705)
@@ -151,7 +151,7 @@
         self.image_size = (None, None)
 
         # available file properties and metadata
-        self._properties = { 'Properties':   [{'Format': u'-', 'Pages': 0}],
+        self._properties = { 'Properties':   [{'Format': u'-', 'Length': -1}],
                              'Metadata':     [], }
         # available feature to extract
         self._features   = { 'ColorAverage': [],
@@ -163,7 +163,7 @@
                              'History':      [],
                              'Text':         [],
                              'Streams':      [],
-                             'Audio':        [],
+                             #'Audio':        [],
                              'Legs':         [],
                              'Hands':        [],
                              'Torsos':       [],
@@ -193,35 +193,31 @@
 
         exif = self._util_get_DataTags_EXIF()
         #print exif
+
+        misc = []
+        misc += [exif['Output_extension']] if 'Output_extension' in exif else 
[]
+        misc += [exif['DescProducer']] if 'DescProducer' in exif else []
+        misc += [exif['DescCreator']] if 'DescCreator' in exif else []
+
         result = { 'Software':         exif['Software'] if 'Software' in exif 
else u'-',
-                   'Output_Extension': exif['Output_extension'] if 
'Output_extension' in exif else u'-',
                    'Desc':             exif['Desc'] if 'Desc' in exif else 
u'-',
-                   'DescProducer':     exif['DescProducer'] if 'DescProducer' 
in exif else u'-',
-                   'DescCreator':      exif['DescCreator'] if 'DescCreator' in 
exif else u'-',
                    'Comment':          exif['Comment'] if 'Comment' in exif 
else u'-',
-                   'Producer':         exif['Producer'] if 'Producer' in exif 
else u'-',}
+                   'Producer':         exif['Producer'] if 'Producer' in exif 
else u'-',
+                   'Misc':             u'\n'.join(misc) if misc else u'-',}
+                   #'Output_Extension': exif['Output_extension'] if 
'Output_extension' in exif else u'-',
+                   #'DescProducer':     exif['DescProducer'] if 'DescProducer' 
in exif else u'-',
+                   #'DescCreator':      exif['DescCreator'] if 'DescCreator' 
in exif else u'-',
                    #'Comments':         exif['Comments'] if 'Comments' in exif 
else u'-',
                    #'WorkDesc':         exif['WorkDescription'] if 
'WorkDescription' in exif else u'-',
                    ##'Dimensions':       tuple(map(int, 
exif['ImageSize'].split(u'x'))),}
                    #'Dimensions':       tuple(exif['ImageSize'].split(u'x')) 
if 'ImageSize' in exif else (None, None),}
                    #'Mode':             exif['ColorType'], }
 
-# TODO: vvv
-#* metadata template in commons has to be worked out and code adopted
-#* like in 'Streams' a nice content listing of MIDI (exif or music21 - if 
needed at all?)
-#* docu all this stuff in commons
-#* docu and do all open things on "commons TODO list"
-#
-#
-#
-#(* initial audio midi support (music21))
-#[TODO: docu on Commons ... / template ...]
-
 # TODO: if '_detect_History' is not needed here, moveit back into _JpegFile !!!
         #print "self._detect_History()"
         #print self._detect_History()
 
-        # https://pypi.python.org/pypi/hachoir-metadata (needs 'core' and 
'parser')
+        ## https://pypi.python.org/pypi/hachoir-metadata (needs 'core' and 
'parser')
         #
         #from hachoir_core.error import HachoirError
         #from hachoir_core.stream import InputStreamError
@@ -416,9 +412,9 @@
         self._detect_Faces_EXIF()
         # Faces and eyes (opencv pre-trained haar)
         self._detect_Faces()
-# TODO: test and use or switch off
         # Face via Landmark(s)
-#        self._detect_FaceLandmark_xBOB()
+        # SWITCHED OFF; needs lots of libraries and disk space for minor 
improvement
+        #self._detect_FaceLandmark_xBOB()
         # exclude duplicates (CV and EXIF)
         faces = [item['Position'] for item in self._features['Faces']]
         for i in self._util_merge_Regions(faces)[1]:
@@ -495,7 +491,7 @@
            as commons does in order to compare if those libraries (ImageMagick,
            ...) are buggy (thus explicitely use other software for 
independence)"""
 
-        result = {'Format': u'-', 'Pages': 0}
+        result = {'Format': u'-', 'Length': -1}
 
         try:
             i = Image.open(self.image_path)
@@ -529,7 +525,7 @@
                         #'info':       i.info,
                         #'stat':       os.stat(self.image_path),
                         'Palette':    str(len(i.palette.palette)) if i.palette 
else u'-',
-                        'Pages':      pc,
+                        'Length':     pc,   # num. of pages
                         'Dimensions': self.image_size,
                         'Filesize':   os.path.getsize(self.file_name),
                         'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), })
@@ -2364,7 +2360,7 @@
 
         result =      { 'Format':     u'%s' % self.file_mime[1].upper(),
         # DO NOT use ImageMagick (identify) instead of PIL to get these info !!
-                        'Pages':      0,
+                        'Length':     -1,   # pages/layers
                         'Dimensions': self.image_size,
                         'Filesize':   os.path.getsize(self.file_name),
                         'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }
@@ -2408,7 +2404,7 @@
            as commons does in order to compare if those libraries (ImageMagick,
            ...) are buggy (thus explicitely use other software for 
independence)"""
 
-        result = {'Format': u'-', 'Pages': 0}
+        result = {'Format': u'-', 'Length': -1}
 
         # similar to PDF page count OR use BeautifulSoup
         svgcountpages = re.compile("<page>")
@@ -2434,7 +2430,7 @@
         result.update({ 'Format':     valid,
                         'Mode':       u'-',
                         'Palette':    u'-',
-                        'Pages':      pc,
+                        'Length':     pc,   # pages
         # may be set {{validSVG}} also or do something in bot template to
         # recognize 'Format=SVG (valid)' ...
                         'Dimensions': self.image_size,
@@ -2480,7 +2476,7 @@
         result =      { 'Format':     u'PDF',
                         'Mode':       u'-',
                         'Palette':    u'-',
-                        'Pages':      pc,
+                        'Length':     pc,   # pages
                         'Dimensions': self.image_size,
                         'Filesize':   os.path.getsize(self.file_name),
                         'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }
@@ -2648,10 +2644,10 @@
 class _OggFile(_JpegFile):
     def getFeatures(self):
         # general handling of all audio and video formats
-        self._detect_Streams()
+        self._detect_Streams()          # Streams
 
         # general audio feature extraction
-#        self._detect_AudioFeatures()
+#        self._detect_AudioFeatures()    # Audio
 
         return self._features
 
@@ -2665,8 +2661,10 @@
         d = self._util_get_DataStreams_FFMPEG()
         #print d
 
+        #print self._util_get_DataTags_EXIF()['Duration']
+
         result =      { 'Format':     u'%s' % 
d['format']['format_name'].upper(),
-                        'Pages':      0,
+                        'Length':     float(d['format']['duration']),   # 
secs/frames
                         'Dimensions': self.image_size,
                         'Filesize':   os.path.getsize(self.file_name),
                         'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }
@@ -2701,6 +2699,7 @@
                             'Format':     u'%s/%s' % (s["codec_type"], 
s.get("codec_name",u'?')),
                             'Rate':       rate or u'-',
                             'Dimensions': dim or (None, None),
+                            'Duration':   float(s['duration']),
                             })
 
         if 'image' in d["format"]["format_name"]:
@@ -2893,25 +2892,21 @@
         return
 
 
-class _MidiFile(_UnknownFile):
-    def getFeatures(self):
-        self._detect_AudioFeatures()    # Audio
-        return self._features
-
+#class _MidiFile(_UnknownFile):
+class _MidiFile(_OggFile):
     def _detect_HeaderAndMetadata(self):
         #_UnknownFile._detect_HeaderAndMetadata(self)
         #result = {'Desc': 
self._properties['Metadata'][0]['Desc'].splitlines()}
 
-        result = {'Desc': []}
-
         # extract data from midi file
         # http://valentin.dasdeck.com/midi/midifile.htm
         # 
http://stackoverflow.com/questions/3943149/reading-and-interpreting-data-from-a-binary-file-in-python
         ba = bytearray(open(self.file_name, 'rb').read())
         i = -1
+        res = {'Desc': []}
         for key, data in [('Text', '\x01'), ('Copyright', '\x02')]:#, 
('Lyrics', '\x05')]:
             key = 'Desc'
-            #result[key] = []
+            #res[key] = []
             while True:
                 i = ba.find('\xff%s' % data, i+1)
                 if i < 0:       # something found?
@@ -2919,15 +2914,12 @@
                 e = (i+3+ba[i+2])
                 if ba[e] != 0:  # length match with string end (00)?
                     e = ba.find('\x00', (i+3+ba[i+2]))
-                result[key].append(ba[i+3:e].decode('latin-1').strip())
-            #result[key] = u'\n'.join(result[key])
-        result[key] = u'\n'.join(result[key])
-        if not result['Desc']:
-            result['Desc'] = u'-'
+                res[key].append(ba[i+3:e].decode('latin-1').strip())
+            #res[key] = u'\n'.join(res[key])
+        res['Desc'] = u'\n'.join(res['Desc'])
 
         ## find specific info in extracted data
         #print [item.strip() for item in re.findall('Generated .*?\n', 
result['Text'])]
-        ##u"Cr'eateur: GNU LilyPond 2.0.1"
         #import dateutil.parser
         #dates = []
         #for line in result['Text'].splitlines():
@@ -2938,6 +2930,12 @@
         #        pass
         #print dates
 
+        result = { 'Software': u'-',
+                   'Desc':     res['Desc'] if res['Desc'] else u'-',
+                   'Comment':  u'-',
+                   'Producer': u'-',
+                   'Misc':     u'-', }
+
         import _music21 as music21
         try:
             s = music21.converter.parse(self.file_name)
@@ -2956,8 +2954,11 @@
            as commons does in order to compare if those libraries (ImageMagick,
            ...) are buggy (thus explicitely use other software for 
independence)"""
 
+        # 'ffprobe' (ffmpeg); audio and video streams files (ogv, oga, ...)
+        d = self._util_get_DataStreams_MUSIC21()
+
         result =      { 'Format':     u'%s' % self.file_mime[1].upper(),
-                        'Pages':      0,
+                        'Length':     d["duration"],    # secs
                         'Dimensions': self.image_size,
                         'Filesize':   os.path.getsize(self.file_name),
                         'MIME':       u'%s/%s' % tuple(self.file_mime[:2]), }
@@ -2966,8 +2967,37 @@
         self._properties['Properties'][0].update(result)
         return
 
-    # midi audio feature extraction
-    def _detect_AudioFeatures(self):
+    # midi audio stream/feature extraction, detect streams of notes; parts
+    def _detect_Streams(self):
+        # like in '_OggFile' (streams) a nice content listing of MIDI (music21)
+        d = self._util_get_DataStreams_MUSIC21()
+        if not d:
+            return
+
+        data = []
+        for i, part in enumerate(d["parts"]):
+            #print part.elements
+            mm     = part.elements[0]   # MetronomeMark
+            ts     = part.elements[1]   # TimeSignature
+            stream = part.notes         # Stream - containing all Note(s)
+            #print mm.secondsPerQuarter()
+            #print mm.durationToSeconds(part.duration.quarterLength)
+            #print sum([item.seconds for item in stream])    # sum over all 
Note(s)
+            #print part.metadata
+            data.append( {'ID':        (i+1), 
+                          'Format':    u'(audio/midi)', 
+                          # note rate / noteduration ...??
+                          'Rate':      u'%s/-/-' % d["channels"][i],
+                          'Dimension': (None, None),
+                          'Duration':  part.seconds,} )
+
+        self._features['Streams'] = data
+        return
+
+    def _util_get_DataStreams_MUSIC21(self):
+        if hasattr(self, '_buffer_MUSIC21'):
+            return self._buffer_MUSIC21
+
         import _music21 as music21
 
         #music21.features.jSymbolic.getCompletionStats()
@@ -2976,7 +3006,7 @@
             #s = music21.midi.translate.midiFilePathToStream(self.file_name)
             s = music21.converter.parse(self.file_name)
         except music21.midi.base.MidiException:
-            pywikibot.warning(u'unknown file type [_detect_AudioFeatures]')
+            pywikibot.warning(u'unknown file type [_detect_Streams]')
             return
 
         #fs = music21.features.jSymbolic.extractorsById
@@ -2995,29 +3025,37 @@
         #                    print f.name, f.vector
         #                except AttributeError:
         #                    print "ERROR"
-        data = {'RegisterImportance': 
(music21.features.jSymbolic.ImportanceOfBassRegisterFeature(s).extract().vector[0],
-                                       
music21.features.jSymbolic.ImportanceOfMiddleRegisterFeature(s).extract().vector[0],
-                                       
music21.features.jSymbolic.ImportanceOfHighRegisterFeature(s).extract().vector[0],),
-                      'NoteDuration': 
(music21.features.jSymbolic.AverageNoteDurationFeature(s).extract().vector[0],
-                                       
music21.features.jSymbolic.MaximumNoteDurationFeature(s).extract().vector[0],),
-                 'IndependentVoices': 
(music21.features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0],
-                                       
music21.features.jSymbolic.MaximumNumberOfIndependentVoicesFeature(s).extract().vector[0],),
-                   'MostCommonPitch': 
music21.features.jSymbolic.MostCommonPitchFeature(s).extract().vector[0],
-                             'Tempo': 
music21.features.jSymbolic.InitialTempoFeature(s).extract().vector[0],
-                          'Duration': s.highestTime,
-                          #'Metadata': s.metadata if s.metadata else u'',
-                            'Lyrics': s.lyrics(recurse=True) if 
s.lyrics(recurse=True) else u'',}
+# TODO: do we extract "streams" and/or features here ... ???!?
+#        data = [{'RegisterImportance': 
(music21.features.jSymbolic.ImportanceOfBassRegisterFeature(s).extract().vector[0],
+#                                        
music21.features.jSymbolic.ImportanceOfMiddleRegisterFeature(s).extract().vector[0],
+#                                        
music21.features.jSymbolic.ImportanceOfHighRegisterFeature(s).extract().vector[0],),
+#                       'NoteDuration': 
(music21.features.jSymbolic.AverageNoteDurationFeature(s).extract().vector[0],
+#                                        
music21.features.jSymbolic.MaximumNoteDurationFeature(s).extract().vector[0],),
+#                  'IndependentVoices': 
(music21.features.jSymbolic.AverageNumberOfIndependentVoicesFeature(s).extract().vector[0],
+#                                        
music21.features.jSymbolic.MaximumNumberOfIndependentVoicesFeature(s).extract().vector[0],),
+#                    'MostCommonPitch': 
music21.features.jSymbolic.MostCommonPitchFeature(s).extract().vector[0],
+#                              'Tempo': 
music21.features.jSymbolic.InitialTempoFeature(s).extract().vector[0],
+#                           #'Duration': s.highestTime,
+#                           #'Metadata': s.metadata if s.metadata else u'',
+#                             'Lyrics': s.lyrics(recurse=True) if 
s.lyrics(recurse=True) else u'',}]
         #print music21.text.assembleLyrics(s)
-        #print s.duration
-        #print s.offsetMap
-        #print s.measureOffsetMap()
-        #print s.seconds
-        #print s.secondsMap
 
-        self._features['Audio'] = [data]
-        return
+        #print s.show('text')
+        #midi = [item for item in s.recurse()]
+        #print midi
 
+        mf = music21.midi.translate.streamToMidiFile(s)
 
+        res = {}
+
+        res["channels"] = [ len(t.getChannels()) for t in mf.tracks ]
+        res["parts"]    = [ p for p in s.elements ]
+        res["duration"] = max([ p.seconds for p in s.elements ])
+        self._buffer_MUSIC21 = res
+
+        return self._buffer_MUSIC21
+
+
 # http://commons.wikimedia.org/wiki/File_formats
 _FILETYPES = {                        '*': _UnknownFile,
               (      'image',     'jpeg'): _JpegFile,
@@ -3221,7 +3259,7 @@
         pdf    = u'PDF' in self._info_filter['Properties'][0]['Format']
         result = self._info_filter['Text']
         relevance = pdf and len(result) and \
-                    (self._info_filter['Properties'][0]['Pages'] >= 10) and \
+                    (self._info_filter['Properties'][0]['Length'] >= 10) and \
                     (result[0]['Size'] >= 5E4) and (result[0]['Lines'] >= 1000)
 
         return (u'Books (literature) in PDF', relevance)
@@ -3231,7 +3269,7 @@
     # (Category:Animated SVG‎)
     def _cat_prop_Animated_general(self):
         result = self._info_filter['Properties']
-        relevance = result and (result[0]['Pages'] > 1) and \
+        relevance = result and (result[0]['Length'] > 1) and \
                     (result[0]['Format'] in [u'GIF', u'PNG'])
 
         return (u'Animated %s' % result[0]['Format'], relevance)
@@ -3270,259 +3308,70 @@
         return (u'Graphics', bool(relevance))
 
     # Category:MIDI files created with GNU LilyPond
-    def _cat_meta_MIDIfilescreatedwithGNULilyPond(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"Generated automatically by: GNU LilyPond" in
-                     result[0]['Desc'])
-
-        return (u'MIDI files created with GNU LilyPond', bool(relevance))
-
     # Category:Bitmap_from_Inkscape (png)
-    def _cat_meta_BitmapfromInkscape(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"www.inkscape.org" in
-                     result[0]['Software'].lower())
-
-        return (u'Bitmap from Inkscape', bool(relevance))
-
     # Category:Created_with_Inkscape (svg)
-    def _cat_meta_CreatedwithInkscape(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Output_Extension' in result[0]) and \
-                    (u"org.inkscape.output.svg.inkscape" in
-                     result[0]['Output_Extension'].lower())
-
-        return (u'Created with Inkscape', bool(relevance))
-
     # Category:Created_with_MATLAB (png)
     # Category:Created_with_MATLAB (svg)
-    def _cat_meta_CreatedwithMATLAB(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and \
-                    ((('Software' in result[0]) and \
-                    (u"MATLAB, The Mathworks, Inc." in 
-                     result[0]['Software'])) \
-                    or \
-                     (('Desc' in result[0]) and \
-                    (u"Matlab Figure" in 
-                     result[0]['Desc'])) )
-
-        return (u'Created with MATLAB', bool(relevance))
-
     # Category:Created_with_PLOT2SVG (svg) [new]
-    def _cat_meta_CreatedwithPLOT2SVG(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"Converted by PLOT2SVG" in
-                     result[0]['Desc'])
-
-        return (u'Created with PLOT2SVG', bool(relevance))
-
     # Category:Created_with_ImageMagick (jpg)
-    def _cat_meta_CreatedwithImageMagick(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"ImageMagick" in
-                     result[0]['Software'])
-
-        return (u'Created with ImageMagick', bool(relevance))
-
     # Category:Created_with_Adobe_ImageReady (png)
-    def _cat_meta_CreatedwithAdobeImageReady(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Adobe ImageReady" in
-                     result[0]['Software'])
-
-        return (u'Created with Adobe ImageReady', bool(relevance))
-
     # Category:Created_with_Adobe_Photoshop (jpg)
-    def _cat_meta_CreatedwithAdobePhotoshop(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Adobe Photoshop" in
-                     result[0]['Software'])
-
-        return (u'Created with Adobe Photoshop', bool(relevance))
-
     # Category:Created_with_Picasa (jpg)
-    def _cat_meta_CreatedwithPicasa(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Picasa" in
-                     result[0]['Software'])
-
-        return (u'Created with Picasa', bool(relevance))
-
     # Category:Created_with_Qtpfsgui (jpg)
-    def _cat_meta_CreatedwithQtpfsgui(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Created with opensource tool Qtpfsgui" in
-                     result[0]['Software'])
-
-        return (u'Created with Qtpfsgui', bool(relevance))
-
     # Category:Created_with_Autopano (jpg)
-    def _cat_meta_CreatedwithAutopano(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Autopano" in
-                     result[0]['Software'])
-
-        return (u'Created with Autopano', bool(relevance))
-
     # Category:Created_with_Xmgrace (png)
-    def _cat_meta_CreatedwithXmgrace(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Grace" in
-                     result[0]['Software'])
-
-        return (u'Created with Xmgrace', bool(relevance))
-
     # Category:Created_with_darktable (jpg)
-    def _cat_meta_Createdwithdarktable(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"darktable" in
-                     result[0]['Software'].lower())
-
-        return (u'Created with darktable', bool(relevance))
-
     # Category:Created_with_easyHDR (jpg)
-    def _cat_meta_CreatedwitheasyHDR(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and \
-                    ((('Software' in result[0]) and \
-                    (u"easyHDR" in
-                     result[0]['Software'])) \
-                    or \
-                     (('Comment' in result[0]) and \
-                    (u"easyHDR" in
-                     result[0]['Comment'])) )
-
-        return (u'Created with easyHDR', bool(relevance))
-
     # Category:Created_with_GIMP (jpg) [new]
-    def _cat_meta_CreatedwithGIMP(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and \
-                    ((('Software' in result[0]) and \
-                    (u"GIMP" in
-                     result[0]['Software'])) \
-                    or \
-                     (('Comment' in result[0]) and \
-                    (u"Created with GIMP" in
-                     result[0]['Comment'])) )
-
-        return (u'Created with GIMP', bool(relevance))
-
     # Category:Created_with_R (svg)
-    def _cat_meta_CreatedwithR(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"R SVG" in
-                     result[0]['Desc'])
-
-        return (u'Created with R', bool(relevance))
-
     # Category:Created_with_VectorFieldPlot (svg)
-    def _cat_meta_CreatedwithVectorFieldPlot(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"created with VectorFieldPlot" in
-                     result[0]['Desc'])
-
-        return (u'Created with VectorFieldPlot', bool(relevance))
-
     # Category:Created_with_Chemtool (svg)
-    def _cat_meta_CreatedwithChemtool(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"Created with Chemtool" in
-                     result[0]['Desc'])
-
-        return (u'Created with Chemtool', bool(relevance))
-
     # Category:Created_with_GNU_Octave (svg)
-    def _cat_meta_CreatedwithGNUOctave(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Desc' in result[0]) and \
-                    (u"Produced by GNUPLOT" in
-                     result[0]['Desc'])
-
-        return (u'Created with GNU Octave', bool(relevance))
-
     # Category:Created_with_GeoGebra (svg)
-    def _cat_meta_CreatedwithGeoGebra(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('DescProducer' in result[0]) and \
-                    (u"geogebra.d.W" in
-                     result[0]['DescProducer']) #and \
-                    #(u"FreeHEP Graphics2D Driver" in
-                    # result[0]['DescCreator'])
-
-        return (u'Created with GeoGebra', bool(relevance))
-
     # Category:Created_with_Stella (png)
-    def _cat_meta_CreatedwithStella(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Comment' in result[0]) and \
-                    (u"Created using Stella4D" in
-                     result[0]['Comment'])
-
-        return (u'Created with Stella', bool(relevance))
-
     # Category:Created_with_PhotoStitch (jpg)
-    def _cat_meta_CreatedwithPhotoStitch(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Comment' in result[0]) and \
-                    (u"LEAD Technologies Inc." in
-                     result[0]['Comment'])
-
-        return (u'Created with PhotoStitch', bool(relevance))
-
     # Category:Created_with_Scribus (pdf)
-    def _cat_meta_CreatedwithScribus(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Producer' in result[0]) and \
-                    (u"Scribus PDF Library" in
-                     result[0]['Producer'])
-
-        return (u'Created with Scribus', bool(relevance))
-
     # Category:Created_with_OpenOffice.org (pdf)
-    def _cat_meta_CreatedwithOpenOfficeorg(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Producer' in result[0]) and \
-                    (u"OpenOffice.org" in
-                     result[0]['Producer'])
-
-        return (u'Created with OpenOffice.org', bool(relevance))
-
     # Category:Created_with_Tux_Paint (pdf)
-    def _cat_meta_CreatedwithTuxPaint(self):
-        result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Tux Paint" in
-                     result[0]['Software'])
-
-        return (u'Created with Tux Paint', bool(relevance))
-
     # Category:Created_with_Microsoft_Image_Composite_Editor (jpg)
-    def _cat_meta_CreatedwithMicrosoftImageCompositeEditor(self):
+    def _cat_meta_general(self):
         result = self._info_filter['Metadata']
-        relevance = len(result) and ('Software' in result[0]) and \
-                    (u"Microsoft ICE" in
-                     result[0]['Software'])
+        for key, magic, cat in [('Desc',             u"Generated automatically 
by: GNU LilyPond", u'MIDI files created with GNU LilyPond'),
+                                ('Software',         u"www.inkscape.org",      
                   u'Bitmap from Inkscape'),
+                                ('Misc',             
u"org.inkscape.output.svg.inkscape",         u'Created with Inkscape'), # 
'Output_extension'
+                                ('Software',         u"MATLAB, The Mathworks, 
Inc.",              u'Created with MATLAB'),
+                                ('Desc',             u"Matlab Figure",         
                   u'Created with MATLAB'),
+                                ('Desc',             u"Converted by PLOT2SVG", 
                   u'Created with PLOT2SVG'),
+                                ('Software',         u"ImageMagick",           
                   u'Created with ImageMagick'),
+                                ('Software',         u"Adobe ImageReady",      
                   u'Created with Adobe ImageReady'),
+                                ('Software',         u"Adobe Photoshop",       
                   u'Created with Adobe Photoshop'),
+                                ('Software',         u"Picasa",                
                   u'Created with Picasa'),
+                                ('Software',         u"Created with opensource 
tool Qtpfsgui",    u'Created with Qtpfsgui'),
+                                ('Software',         u"Autopano",              
                   u'Created with Autopano'),
+                                ('Software',         u"Grace",                 
                   u'Created with Xmgrace'),
+                                ('Software',         u"darktable",             
                   u'Created with darktable'),
+                                ('Software',         u"Tux Paint",             
                   u'Created with Tux Paint'),
+                                ('Software',         u"Microsoft ICE",         
                   u'Created with Microsoft Image Composite Editor'),
+                                ('Software',         u"easyHDR",               
                   u'Created with easyHDR'),
+                                ('Comment',          u"easyHDR",               
                   u'Created with easyHDR'),
+                                ('Software',         u"GIMP",                  
                   u'Created with GIMP'),
+                                ('Comment',          u"Created with GIMP",     
                   u'Created with GIMP'),
+                                ('Desc',             u"R SVG",                 
                   u'Created with R'),
+                                ('Desc',             u"created with 
VectorFieldPlot",             u'Created with VectorFieldPlot'),
+                                ('Desc',             u"Created with Chemtool", 
                   u'Created with Chemtool'),
+                                ('Desc',             u"Produced by GNUPLOT",   
                   u'Created with GNU Octave'),
+                                ('Misc',             u"geogebra.d.W",          
                   u'Created with GeoGebra'), # 'DescProducer'
+                                ('Comment',          u"Created using 
Stella4D",                   u'Created with Stella'),
+                                ('Comment',          u"LEAD Technologies 
Inc.",                   u'Created with PhotoStitch'),
+                                ('Producer',         u"Scribus PDF Library",   
                   u'Created with Scribus'),
+                                ('Producer',         u"OpenOffice.org",        
                   u'Created with OpenOffice.org'),]:
+            relevance = len(result) and (key in result[0]) and \
+                        (magic in result[0][key])
+            if relevance:
+                break
 
-        return (u'Created with Microsoft Image Composite Editor', 
bool(relevance))
+        return (cat, bool(relevance))
 
-# TODO: make '_cat_meta_general(self)'
-
     # Category:Categorized by DrTrigonBot
     def _addcat_BOT(self):
         # - ALWAYS -
@@ -4101,9 +3950,12 @@
         return {'Properties': result}
 
     def _filter_Metadata(self):
-        # >>> never drop <<<
-        result = self._info['Metadata']
-        return {'Metadata': result}
+        ## >>> never drop <<<
+        #result = self._info['Metadata']
+        ok = False
+        for item in self._info['Metadata'][0]:
+            ok = ok or (self._info['Metadata'][0][item] != u'-')
+        return {'Metadata': self._info['Metadata'] if ok else []}
 
     def _filter_Faces(self):
         result = self._info['Faces']


_______________________________________________
Pywikipedia-svn mailing list
Pywikipedia-svn@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to