Revision: 46297
Author:   dale
Date:     2009-01-26 23:21:36 +0000 (Mon, 26 Jan 2009)

Log Message:
-----------
capture software infrastructure updates.  

Modified Paths:
--------------
    trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
    trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php

Modified: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
===================================================================
--- trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php     
2009-01-26 23:18:51 UTC (rev 46296)
+++ trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php     
2009-01-26 23:21:36 UTC (rev 46297)
@@ -175,7 +175,7 @@
                // init the stream
                $MVStreams[$stream->name] = new MV_Stream( $stream );
                // check if the stream has already been added to the wiki (if 
not add it)
-               $mvTitle = new MV_Title( 'MvStream:' . $stream->name );
+               $mvTitle = new MV_Title( 'Stream:' . $stream->name );
                if ( !$mvTitle->doesStreamExist() ) {
                        // print 'do stream desc'."\n";
                        do_add_stream( $mvTitle, $stream );
@@ -463,7 +463,7 @@
        $res = $dbr->query($sql);
        //echo "\n" . $sql . "\n";
        $stream = $dbr->fetchObject($res);*/
-       $stream_id = $stream->id;
+       //$stream_id = $stream->id;
        $out = '';      
        //(if we have old version of stream copy over is properties) 
        if( isset( $stream->org_start_time ) )
@@ -495,78 +495,80 @@
        $dbw = wfGetDB( DB_WRITE );
        
        //clear out existing archive.org files for the current stream           
        
-       $sql = "DELETE FROM  `mv_stream_files` WHERE 
`stream_id`='{$stream->id}' AND `file_desc_msg` LIKE 'ao_file_%' LIMIT 10";
-       $dbw->query( $sql );
-       print "removed existing archive.org files for $stream->name \n";
-                       
-       if ( $stream->archive_org != '' ) {
-               // grab file list from archive.org:
-               require_once( 'scrape_and_insert.inc.php' );
-               $aos = new MV_ArchiveOrgScrape();
-               
-               $file_list = $aos->getFileList( $stream->name );
-               if($file_list===false || count($file_list)==0) {
-                       print 'no files on archive.org for'. $stream->name 
."\n\n";
-                       return '';
-               }
-               $out .= '==More Media Sources==' . "\n";
-               // all streams have congretional cronical:
-               $out .= 
'*[http://www.c-spanarchives.org/congress/?q=node/69850&date=' . $cspan_date . 
'&hors=' . $ch_type .
-               ' CSPAN\'s Congressional Chronicle]' . "\n";
-               
-               if ( $file_list ) {                                     
-                       $out .= '*[http://www.archive.org/details/mv_' . 
$stream->name . 
-                       ' Archive.org hosted version]' . "\n";
-                       // also output 'direct' semantic links to alternate 
file qualities:
-                       $out .= "\n===Full File Links===\n";            
-                       $found_ogg=false;
-                       foreach ( $file_list as $file ) {
-                               $name = str_replace( ' ', '_', $file[2] );
-                               $url = 'http://archive.org'.$file[1];
-                               $size = $file[3];               
-                                                                               
-                               // add these files into the mv_files table:
-                               // @@todo in the future we should tie the 
mv_files table to the semantic properties.
-                               // check if already present:
-                               
-                               $quality_msg = 'ao_file_' . $name;
-                               
-                               if($name=='Ogg_Video'){
-                                       $found_ogg=true;
-                               }
-                               $path_type = 'url_file';
-                               if($found_ogg && $name=='512Kb_MPEG4'){
-                                       $quality_msg = 'mv_archive_org_mp4';
-                                       $path_type = 'mp4_stream';
-                               }
-                               //print "found ogg $found_ogg name: $name  
qm:$quality_msg\n";
+       //$sql = "DELETE FROM  `mv_stream_files` WHERE 
`stream_id`='{$stream->id}' AND `file_desc_msg` LIKE 'ao_file_%' LIMIT 10";
+       //$dbw->query( $sql );
+       //print "removed existing archive.org files for $stream->name \n";
 
-                               //output stream to wiki text: 
-                               $out .= "*[{$url} $name] {$size}\n";    
-                               
-                               $dbr = wfGetDB( DB_SLAVE );
-                               $res = $dbr->query( "SELECT * FROM 
`mv_stream_files`
-                                               WHERE 
`stream_id`={$mvTitle->getStreamId()}
-                                               AND 
`file_desc_msg`='{$quality_msg}'" );
-                               if ( $dbr->numRows( $res ) == 0 ) {
-                                       $sql = "INSERT INTO `mv_stream_files` 
(`stream_id`,`duration`, `file_desc_msg`, `path_type`, `path`)" . 
-                                       " VALUES 
('{$mvTitle->getStreamId()}','{$mvTitle->getDuration()}', '{$quality_msg}', 
'{$path_type}','{$url}' )";
-                               } else {
-                                       $row = $dbr->fetchObject( $res );
-                                       // update that msg key *just in case*
-                                       $sql = "UPDATE  `mv_stream_files` SET 
`path_type`='{$path_type}', `path`='$url' WHERE `id`={$row->id}";
-                               }
-                               $dbw->query( $sql );
+       //just do a forced link to the archive.org details page
+       //if ( $stream->archive_org != '' ) {
+       // grab file list from archive.org:
+       //require_once( 'scrape_and_insert.inc.php' );
+       //$aos = new MV_ArchiveOrgScrape();
+       
+       //$file_list = $aos->getFileList( $stream->name );
+       //if($file_list===false || count($file_list)==0) {
+       //      print 'no files on archive.org for'. $stream->name ."\n\n";
+       //      return '';
+       //}
+       $out .= '==More Media Sources==' . "\n";
+       // all streams have congretional cronical:
+       $out .= '*[http://www.c-spanarchives.org/congress/?q=node/69850&date=' 
. $cspan_date . '&hors=' . $ch_type .
+       ' CSPAN\'s Congressional Chronicle]' . "\n";
+       
+       //if ( $file_list ) {                                   
+               $out .= '*[http://www.archive.org/details/mv_' . $stream->name 
. 
+               ' Archive.org hosted version]' . "\n";
+               // also output 'direct' semantic links to alternate file 
qualities:
+               /*$out .= "\n===Full File Links===\n";          
+               $found_ogg=false;
+               foreach ( $file_list as $file ) {
+                       $name = str_replace( ' ', '_', $file[2] );
+                       $url = 'http://archive.org'.$file[1];
+                       $size = $file[3];               
+                                                                       
+                       // add these files into the mv_files table:
+                       // @@todo in the future we should tie the mv_files 
table to the semantic properties.
+                       // check if already present:
+                       
+                       $quality_msg = 'ao_file_' . $name;
+                       
+                       if($name=='Ogg_Video'){
+                               $found_ogg=true;
                        }
-                       $dbw->commit();
-                       // more semantic properties
-                       $out .= "\n\n";
-                       $out .= '[[stream_duration::' . ( 
$mvTitle->getDuration() ) . '| ]]' . "\n";
-                       if ( $stream->date_start_time ) {
-                               $out .= '[[original_date::' . 
$stream->date_start_time . '| ]]';
+                       $path_type = 'url_file';
+                       if($found_ogg && $name=='512Kb_MPEG4'){
+                               $quality_msg = 'mv_archive_org_mp4';
+                               $path_type = 'mp4_stream';
                        }
+                       //print "found ogg $found_ogg name: $name  
qm:$quality_msg\n";
+
+                       //output stream to wiki text: 
+                       $out .= "*[{$url} $name] {$size}\n";    
+                       
+                       $dbr = wfGetDB( DB_SLAVE );
+                       $res = $dbr->query( "SELECT * FROM `mv_stream_files`
+                                       WHERE 
`stream_id`={$mvTitle->getStreamId()}
+                                       AND `file_desc_msg`='{$quality_msg}'" );
+                       if ( $dbr->numRows( $res ) == 0 ) {
+                               $sql = "INSERT INTO `mv_stream_files` 
(`stream_id`,`duration`, `file_desc_msg`, `path_type`, `path`)" . 
+                               " VALUES 
('{$mvTitle->getStreamId()}','{$mvTitle->getDuration()}', '{$quality_msg}', 
'{$path_type}','{$url}' )";
+                       } else {
+                               $row = $dbr->fetchObject( $res );
+                               // update that msg key *just in case*
+                               $sql = "UPDATE  `mv_stream_files` SET 
`path_type`='{$path_type}', `path`='$url' WHERE `id`={$row->id}";
+                       }
+                       $dbw->query( $sql );
                }
-       }
+               $dbw->commit();
+               */
+               // more semantic properties
+               $out .= "\n\n";
+               $out .= '[[stream_duration::' . ( $mvTitle->getDuration() ) . 
'| ]]' . "\n";
+               if ( $stream->date_start_time ) {
+                       $out .= '[[original_date::' . $stream->date_start_time 
. '| ]]';
+               }
+               //}
+       //}
        // add stream category (based on sync status)
        //(only add if the wiki page does not exist)
        $wStreamTitle = Title::newFromText($stream->name, MV_NS_STREAM);
@@ -583,11 +585,7 @@
                                // other options [stream high quality sync ];
                        break;
                }
-       }
-       // add in semantic stream properties
-       //$out = mv_proccess_attr( 'stream_attr_varchar', $stream_id );
-       //$out .= mv_proccess_attr( 'stream_attr_int', $stream_id );
-       
+       }       
        return $out;
 }
 function do_bill_insert( $bill_key ) {

Modified: trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
===================================================================
--- trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php 
2009-01-26 23:18:51 UTC (rev 46296)
+++ trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php 
2009-01-26 23:21:36 UTC (rev 46297)
@@ -19,12 +19,12 @@
 require_once( 'maintenance_util.inc.php' );
 
 if ( count( $args ) == 0 || isset ( $options['help'] ) ) {
-       print'
+       print '
 USAGE
- php ogg_thumb_insert.php stream_name filename interval
+ php video_thumb_insert.php stream_name interval
 
 EXAMPLE we get a frame every 5 seconds from input file stream.mpeg: 
- video2image2mvwiki.php stream_name stream.ogg 5
+ video2image2mvwiki.php stream_name stream.mpeg2 [5]
 
 DURATION is scraped from ffmpeg
 
@@ -36,42 +36,96 @@
 }
 
 
-//maybe we derive stream name from filename? one less thing to think about.
-$stream_name = $args[0];
-$filename = $args[1];
-$interval = $args[2];
+if(isset($args[0])){
+       $stream_name = $args[0];
+}else{
+       die('no stream name provided'."\n");    
+}
 
+if(isset($args[1])){
+       $interval = $args[1];
+}else{
+       $interval = 5;
+}
+$workingdir = '/video/metavid/raw_mpeg2';
 
-$MV_Stream = MV_Stream::newStreamByName( $stream_name );
-$stream_id = $MV_Stream->getStreamId();
 
-$filedir = '../stream_images/' . MV_StreamImage::getRelativeImagePath( 
$stream_id );
-$workingdir = '/metavid/raw_mpeg';
+$filename = $workingdir .'/'. $stream_name . '.mpeg'; 
 $duration = getDuration($filename);
 
-$ocrfile = "";
+$mvTitle = new MV_Title( 'Stream:' . $stream_name );
+if ( !$mvTitle->doesStreamExist() ) {
+       print $stream_name . " does not exist ... creating\n";
+       // print 'do stream desc'."\n";
+       include_once('metavid2mvWiki.inc.php');
+       
+       //read the timestamp from the .srt (this should be unified)
+       $srt_file = $workingdir . '/' . $stream_name . '.srt';
+       $srt_ary = file( $srt_file );
+       if($srt_ary === false)
+               die(' could not find srt file: ' . $srt_file); 
+                               
+       //time stamp: 
+       $org_start_time = intval( trim( str_replace( 'starttime' , '', 
$srt_ary[2] )) );         
+       class streamObject{
+       
+       }
+       $stream = new streamObject();
+       $stream->name = $stream_name;
+       $stream->org_start_time =       $org_start_time; 
+       $stream->sync_status    =       'in_sync';
+       $stream->duration               =       $duration;
+               
+       if(!isset($MVStreams))
+               $MVStreams = array();
+       
+       // init the stream (legacy from old stream insert system)  
+       $MVStreams[ $stream->name ] = new MV_Stream( $stream );         
+       
+       do_add_stream( $mvTitle, $stream );
+}
+$stream_id = $mvTitle->getStreamId();
+print 'got stream id: '. $stream_id . "\n";
+$filedir = '/video/metavid/mvprime_stream_images/' . 
MV_StreamImage::getRelativeImagePath( $stream_id );
 
+echo "working on: $filename \n";
+$ocroutput = "";
+//@@TODO we should do sequential output and parse the OCR file if it already 
exists. 
 
+//make sure we can write to the ocr file: 
+$ocrfileloc = "$workingdir/$stream_name.ocr";
+$fh = @fopen($ocrfileloc, 'a') or die ("\nError: can't write to ocr file\n");
+fclose($fh);
 //gets duration from ffmpeg
 
 $dbw = $dbr = wfGetDB( DB_MASTER );
 for ( $i = 0; $i < $duration; $i += $interval ) {
-  shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an -f 
rawvideo -y {$filedir}/{$i}.jpg 2>&1" );
+  //only run the ffmpeg cmd if we have to: 
+  if(!is_file("{$filedir}/{$i}.jpg"))
+       shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an 
-f rawvideo -y {$filedir}/{$i}.jpg 2>&1" );
+  
   if(is_file("{$filedir}/{$i}.jpg")){
-       //$dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`) 
VALUES ($stream_id, $i)" );
-       shell_exec("convert $filedir/$i.jpg -crop 457x30+63+358  
$workingdir/temp.ocr.tif && convert $workingdir/temp.ocr.tif -resize 300% 
-level 10%,1,20% -monochrome +compress $workingdir/temp.ocr.tif");
-    shell_exec("tesseract $workingdir/temp.ocr.tif $workingdir/ocrtemp nobatch 
lettersonly 2>&1");
-    $ocr = shell_exec("tail $workingdir/ocrtemp.txt") ." at " .sec2hms($i) ." 
\n";
-    echo $ocr;
-    $ocrfile .= $ocr;  
+       //insert the image into the db:
+       $dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`) 
VALUES ($stream_id, $i)" );
+       
+       //get ocr:
+       shell_exec("convert {$filedir}/{$i}.jpg -crop 457x30+63+358  
{$workingdir}/temp.{$stream_id}.ocr.tif && convert 
{$workingdir}/temp.{$stream_id}.ocr.tif -resize 300% -level 10%,1,20% 
-monochrome +compress {$workingdir}/temp.{$stream_id}.ocr.tif");
+    shell_exec("tesseract {$workingdir}/temp.{$stream_id}.ocr.tif 
{$workingdir}/ocrtemp{$i} nobatch lettersonly 2>&1");
+    $ocr = shell_exec("tail {$workingdir}/ocrtemp{$i}.txt") ." at " . 
seconds2ntp($i) ." \n";
+    echo 'got ocr:'.  $ocr;
+    $ocroutput .= $ocr;        
+    
   }else{
        print "failed to create file: {$filedir}/{$i}.jpg \n";  
   }
 }
+//remove temporary files: 
+shell_exec("rm {$workingdir}ocrtemp{$i}.txt");
+shell_exec("rm {$workingdir}/temp.{$stream_id}.ocr.tif");
 
 $ocrfileloc = "$workingdir/$stream_name.ocr";
 $fh = fopen($ocrfileloc, 'w') or die ("can't write ocr file");
-fwrite($fh, $ocrfile);
+fwrite($fh, $ocroutput);
 fclose($fh);
 
 function getDuration($filename)
@@ -82,40 +136,8 @@
     $result = $reg_array[0];
     $hms = explode(" ", $result);
     $durationhms = $hms[1];
-    echo "duration is $durationhms \n";
+    echo "$filename duration is $durationhms \n";
     $durarray = explode(":", $durationhms);
     return ($durarray[0]* 3600) + ($durarray[1]* 60) + $durarray[2];
 }
 
-function sec2hms ($sec, $padHours = false) {
-
-    $hms = "";
-    
-    // there are 3600 seconds in an hour, so if we
-    // divide total seconds by 3600 and throw away
-    // the remainder, we've got the number of hours
-    $hours = intval(intval($sec) / 3600); 
-
-    // add to $hms, with a leading 0 if asked for
-    $hms .= ($padHours) 
-          ? str_pad($hours, 2, "0", STR_PAD_LEFT). ':'
-          : $hours. ':';
-     
-    // dividing the total seconds by 60 will give us
-    // the number of minutes, but we're interested in 
-    // minutes past the hour: to get that, we need to 
-    // divide by 60 again and keep the remainder
-    $minutes = intval(($sec / 60) % 60); 
-
-    // then add to $hms (with a leading 0 if needed)
-    $hms .= str_pad($minutes, 2, "0", STR_PAD_LEFT). ':';
-
-    // seconds are simple - just divide the total
-    // seconds by 60 and keep the remainder
-    $seconds = intval($sec % 60); 
-
-    // add to $hms, again with a leading 0 if needed
-    $hms .= str_pad($seconds, 2, "0", STR_PAD_LEFT);
-
-    return $hms;
-}



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to