Revision: 46297
Author: dale
Date: 2009-01-26 23:21:36 +0000 (Mon, 26 Jan 2009)
Log Message:
-----------
capture software infrastructure updates.
Modified Paths:
--------------
trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
Modified: trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
===================================================================
--- trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
2009-01-26 23:18:51 UTC (rev 46296)
+++ trunk/extensions/MetavidWiki/maintenance/metavid2mvWiki.inc.php
2009-01-26 23:21:36 UTC (rev 46297)
@@ -175,7 +175,7 @@
// init the stream
$MVStreams[$stream->name] = new MV_Stream( $stream );
// check if the stream has already been added to the wiki (if
not add it)
- $mvTitle = new MV_Title( 'MvStream:' . $stream->name );
+ $mvTitle = new MV_Title( 'Stream:' . $stream->name );
if ( !$mvTitle->doesStreamExist() ) {
// print 'do stream desc'."\n";
do_add_stream( $mvTitle, $stream );
@@ -463,7 +463,7 @@
$res = $dbr->query($sql);
//echo "\n" . $sql . "\n";
$stream = $dbr->fetchObject($res);*/
- $stream_id = $stream->id;
+ //$stream_id = $stream->id;
$out = '';
//(if we have old version of stream copy over is properties)
if( isset( $stream->org_start_time ) )
@@ -495,78 +495,80 @@
$dbw = wfGetDB( DB_WRITE );
//clear out existing archive.org files for the current stream
- $sql = "DELETE FROM `mv_stream_files` WHERE
`stream_id`='{$stream->id}' AND `file_desc_msg` LIKE 'ao_file_%' LIMIT 10";
- $dbw->query( $sql );
- print "removed existing archive.org files for $stream->name \n";
-
- if ( $stream->archive_org != '' ) {
- // grab file list from archive.org:
- require_once( 'scrape_and_insert.inc.php' );
- $aos = new MV_ArchiveOrgScrape();
-
- $file_list = $aos->getFileList( $stream->name );
- if($file_list===false || count($file_list)==0) {
- print 'no files on archive.org for'. $stream->name
."\n\n";
- return '';
- }
- $out .= '==More Media Sources==' . "\n";
- // all streams have congretional cronical:
- $out .=
'*[http://www.c-spanarchives.org/congress/?q=node/69850&date=' . $cspan_date .
'&hors=' . $ch_type .
- ' CSPAN\'s Congressional Chronicle]' . "\n";
-
- if ( $file_list ) {
- $out .= '*[http://www.archive.org/details/mv_' .
$stream->name .
- ' Archive.org hosted version]' . "\n";
- // also output 'direct' semantic links to alternate
file qualities:
- $out .= "\n===Full File Links===\n";
- $found_ogg=false;
- foreach ( $file_list as $file ) {
- $name = str_replace( ' ', '_', $file[2] );
- $url = 'http://archive.org'.$file[1];
- $size = $file[3];
-
- // add these files into the mv_files table:
- // @@todo in the future we should tie the
mv_files table to the semantic properties.
- // check if already present:
-
- $quality_msg = 'ao_file_' . $name;
-
- if($name=='Ogg_Video'){
- $found_ogg=true;
- }
- $path_type = 'url_file';
- if($found_ogg && $name=='512Kb_MPEG4'){
- $quality_msg = 'mv_archive_org_mp4';
- $path_type = 'mp4_stream';
- }
- //print "found ogg $found_ogg name: $name
qm:$quality_msg\n";
+ //$sql = "DELETE FROM `mv_stream_files` WHERE
`stream_id`='{$stream->id}' AND `file_desc_msg` LIKE 'ao_file_%' LIMIT 10";
+ //$dbw->query( $sql );
+ //print "removed existing archive.org files for $stream->name \n";
- //output stream to wiki text:
- $out .= "*[{$url} $name] {$size}\n";
-
- $dbr = wfGetDB( DB_SLAVE );
- $res = $dbr->query( "SELECT * FROM
`mv_stream_files`
- WHERE
`stream_id`={$mvTitle->getStreamId()}
- AND
`file_desc_msg`='{$quality_msg}'" );
- if ( $dbr->numRows( $res ) == 0 ) {
- $sql = "INSERT INTO `mv_stream_files`
(`stream_id`,`duration`, `file_desc_msg`, `path_type`, `path`)" .
- " VALUES
('{$mvTitle->getStreamId()}','{$mvTitle->getDuration()}', '{$quality_msg}',
'{$path_type}','{$url}' )";
- } else {
- $row = $dbr->fetchObject( $res );
- // update that msg key *just in case*
- $sql = "UPDATE `mv_stream_files` SET
`path_type`='{$path_type}', `path`='$url' WHERE `id`={$row->id}";
- }
- $dbw->query( $sql );
+ //just do a forced link to the archive.org details page
+ //if ( $stream->archive_org != '' ) {
+ // grab file list from archive.org:
+ //require_once( 'scrape_and_insert.inc.php' );
+ //$aos = new MV_ArchiveOrgScrape();
+
+ //$file_list = $aos->getFileList( $stream->name );
+ //if($file_list===false || count($file_list)==0) {
+ // print 'no files on archive.org for'. $stream->name ."\n\n";
+ // return '';
+ //}
+ $out .= '==More Media Sources==' . "\n";
+ // all streams have congretional cronical:
+ $out .= '*[http://www.c-spanarchives.org/congress/?q=node/69850&date='
. $cspan_date . '&hors=' . $ch_type .
+ ' CSPAN\'s Congressional Chronicle]' . "\n";
+
+ //if ( $file_list ) {
+ $out .= '*[http://www.archive.org/details/mv_' . $stream->name
.
+ ' Archive.org hosted version]' . "\n";
+ // also output 'direct' semantic links to alternate file
qualities:
+ /*$out .= "\n===Full File Links===\n";
+ $found_ogg=false;
+ foreach ( $file_list as $file ) {
+ $name = str_replace( ' ', '_', $file[2] );
+ $url = 'http://archive.org'.$file[1];
+ $size = $file[3];
+
+ // add these files into the mv_files table:
+ // @@todo in the future we should tie the mv_files
table to the semantic properties.
+ // check if already present:
+
+ $quality_msg = 'ao_file_' . $name;
+
+ if($name=='Ogg_Video'){
+ $found_ogg=true;
}
- $dbw->commit();
- // more semantic properties
- $out .= "\n\n";
- $out .= '[[stream_duration::' . (
$mvTitle->getDuration() ) . '| ]]' . "\n";
- if ( $stream->date_start_time ) {
- $out .= '[[original_date::' .
$stream->date_start_time . '| ]]';
+ $path_type = 'url_file';
+ if($found_ogg && $name=='512Kb_MPEG4'){
+ $quality_msg = 'mv_archive_org_mp4';
+ $path_type = 'mp4_stream';
}
+ //print "found ogg $found_ogg name: $name
qm:$quality_msg\n";
+
+ //output stream to wiki text:
+ $out .= "*[{$url} $name] {$size}\n";
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $res = $dbr->query( "SELECT * FROM `mv_stream_files`
+ WHERE
`stream_id`={$mvTitle->getStreamId()}
+ AND `file_desc_msg`='{$quality_msg}'" );
+ if ( $dbr->numRows( $res ) == 0 ) {
+ $sql = "INSERT INTO `mv_stream_files`
(`stream_id`,`duration`, `file_desc_msg`, `path_type`, `path`)" .
+ " VALUES
('{$mvTitle->getStreamId()}','{$mvTitle->getDuration()}', '{$quality_msg}',
'{$path_type}','{$url}' )";
+ } else {
+ $row = $dbr->fetchObject( $res );
+ // update that msg key *just in case*
+ $sql = "UPDATE `mv_stream_files` SET
`path_type`='{$path_type}', `path`='$url' WHERE `id`={$row->id}";
+ }
+ $dbw->query( $sql );
}
- }
+ $dbw->commit();
+ */
+ // more semantic properties
+ $out .= "\n\n";
+ $out .= '[[stream_duration::' . ( $mvTitle->getDuration() ) .
'| ]]' . "\n";
+ if ( $stream->date_start_time ) {
+ $out .= '[[original_date::' . $stream->date_start_time
. '| ]]';
+ }
+ //}
+ //}
// add stream category (based on sync status)
//(only add if the wiki page does not exist)
$wStreamTitle = Title::newFromText($stream->name, MV_NS_STREAM);
@@ -583,11 +585,7 @@
// other options [stream high quality sync ];
break;
}
- }
- // add in semantic stream properties
- //$out = mv_proccess_attr( 'stream_attr_varchar', $stream_id );
- //$out .= mv_proccess_attr( 'stream_attr_int', $stream_id );
-
+ }
return $out;
}
function do_bill_insert( $bill_key ) {
Modified: trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
===================================================================
--- trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
2009-01-26 23:18:51 UTC (rev 46296)
+++ trunk/extensions/MetavidWiki/maintenance/video_ocr_thumb_insert.php
2009-01-26 23:21:36 UTC (rev 46297)
@@ -19,12 +19,12 @@
require_once( 'maintenance_util.inc.php' );
if ( count( $args ) == 0 || isset ( $options['help'] ) ) {
- print'
+ print '
USAGE
- php ogg_thumb_insert.php stream_name filename interval
+ php video_thumb_insert.php stream_name interval
EXAMPLE we get a frame every 5 seconds from input file stream.mpeg:
- video2image2mvwiki.php stream_name stream.ogg 5
+ video2image2mvwiki.php stream_name stream.mpeg2 [5]
DURATION is scraped from ffmpeg
@@ -36,42 +36,96 @@
}
-//maybe we derive stream name from filename? one less thing to think about.
-$stream_name = $args[0];
-$filename = $args[1];
-$interval = $args[2];
+if(isset($args[0])){
+ $stream_name = $args[0];
+}else{
+ die('no stream name provided'."\n");
+}
+if(isset($args[1])){
+ $interval = $args[1];
+}else{
+ $interval = 5;
+}
+$workingdir = '/video/metavid/raw_mpeg2';
-$MV_Stream = MV_Stream::newStreamByName( $stream_name );
-$stream_id = $MV_Stream->getStreamId();
-$filedir = '../stream_images/' . MV_StreamImage::getRelativeImagePath(
$stream_id );
-$workingdir = '/metavid/raw_mpeg';
+$filename = $workingdir .'/'. $stream_name . '.mpeg';
$duration = getDuration($filename);
-$ocrfile = "";
+$mvTitle = new MV_Title( 'Stream:' . $stream_name );
+if ( !$mvTitle->doesStreamExist() ) {
+ print $stream_name . " does not exist ... creating\n";
+ // print 'do stream desc'."\n";
+ include_once('metavid2mvWiki.inc.php');
+
+ //read the timestamp from the .srt (this should be unified)
+ $srt_file = $workingdir . '/' . $stream_name . '.srt';
+ $srt_ary = file( $srt_file );
+ if($srt_ary === false)
+ die(' could not find srt file: ' . $srt_file);
+
+ //time stamp:
+ $org_start_time = intval( trim( str_replace( 'starttime' , '',
$srt_ary[2] )) );
+ class streamObject{
+
+ }
+ $stream = new streamObject();
+ $stream->name = $stream_name;
+ $stream->org_start_time = $org_start_time;
+ $stream->sync_status = 'in_sync';
+ $stream->duration = $duration;
+
+ if(!isset($MVStreams))
+ $MVStreams = array();
+
+ // init the stream (legacy from old stream insert system)
+ $MVStreams[ $stream->name ] = new MV_Stream( $stream );
+
+ do_add_stream( $mvTitle, $stream );
+}
+$stream_id = $mvTitle->getStreamId();
+print 'got stream id: '. $stream_id . "\n";
+$filedir = '/video/metavid/mvprime_stream_images/' .
MV_StreamImage::getRelativeImagePath( $stream_id );
+echo "working on: $filename \n";
+$ocroutput = "";
+//@@TODO we should do sequential output and parse the OCR file if it already
exists.
+//make sure we can write to the ocr file:
+$ocrfileloc = "$workingdir/$stream_name.ocr";
+$fh = @fopen($ocrfileloc, 'a') or die ("\nError: can't write to ocr file\n");
+fclose($fh);
//gets duration from ffmpeg
$dbw = $dbr = wfGetDB( DB_MASTER );
for ( $i = 0; $i < $duration; $i += $interval ) {
- shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an -f
rawvideo -y {$filedir}/{$i}.jpg 2>&1" );
+ //only run the ffmpeg cmd if we have to:
+ if(!is_file("{$filedir}/{$i}.jpg"))
+ shell_exec( "ffmpeg -ss $i -i {$filename} -vcodec mjpeg -vframes 1 -an
-f rawvideo -y {$filedir}/{$i}.jpg 2>&1" );
+
if(is_file("{$filedir}/{$i}.jpg")){
- //$dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`)
VALUES ($stream_id, $i)" );
- shell_exec("convert $filedir/$i.jpg -crop 457x30+63+358
$workingdir/temp.ocr.tif && convert $workingdir/temp.ocr.tif -resize 300%
-level 10%,1,20% -monochrome +compress $workingdir/temp.ocr.tif");
- shell_exec("tesseract $workingdir/temp.ocr.tif $workingdir/ocrtemp nobatch
lettersonly 2>&1");
- $ocr = shell_exec("tail $workingdir/ocrtemp.txt") ." at " .sec2hms($i) ."
\n";
- echo $ocr;
- $ocrfile .= $ocr;
+ //insert the image into the db:
+ $dbw->query( "INSERT INTO `mv_stream_images` (`stream_id`, `time`)
VALUES ($stream_id, $i)" );
+
+ //get ocr:
+ shell_exec("convert {$filedir}/{$i}.jpg -crop 457x30+63+358
{$workingdir}/temp.{$stream_id}.ocr.tif && convert
{$workingdir}/temp.{$stream_id}.ocr.tif -resize 300% -level 10%,1,20%
-monochrome +compress {$workingdir}/temp.{$stream_id}.ocr.tif");
+ shell_exec("tesseract {$workingdir}/temp.{$stream_id}.ocr.tif
{$workingdir}/ocrtemp{$i} nobatch lettersonly 2>&1");
+ $ocr = shell_exec("tail {$workingdir}/ocrtemp{$i}.txt") ." at " .
seconds2ntp($i) ." \n";
+ echo 'got ocr:'. $ocr;
+ $ocroutput .= $ocr;
+
}else{
print "failed to create file: {$filedir}/{$i}.jpg \n";
}
}
+//remove temporary files:
+shell_exec("rm {$workingdir}ocrtemp{$i}.txt");
+shell_exec("rm {$workingdir}/temp.{$stream_id}.ocr.tif");
$ocrfileloc = "$workingdir/$stream_name.ocr";
$fh = fopen($ocrfileloc, 'w') or die ("can't write ocr file");
-fwrite($fh, $ocrfile);
+fwrite($fh, $ocroutput);
fclose($fh);
function getDuration($filename)
@@ -82,40 +136,8 @@
$result = $reg_array[0];
$hms = explode(" ", $result);
$durationhms = $hms[1];
- echo "duration is $durationhms \n";
+ echo "$filename duration is $durationhms \n";
$durarray = explode(":", $durationhms);
return ($durarray[0]* 3600) + ($durarray[1]* 60) + $durarray[2];
}
-function sec2hms ($sec, $padHours = false) {
-
- $hms = "";
-
- // there are 3600 seconds in an hour, so if we
- // divide total seconds by 3600 and throw away
- // the remainder, we've got the number of hours
- $hours = intval(intval($sec) / 3600);
-
- // add to $hms, with a leading 0 if asked for
- $hms .= ($padHours)
- ? str_pad($hours, 2, "0", STR_PAD_LEFT). ':'
- : $hours. ':';
-
- // dividing the total seconds by 60 will give us
- // the number of minutes, but we're interested in
- // minutes past the hour: to get that, we need to
- // divide by 60 again and keep the remainder
- $minutes = intval(($sec / 60) % 60);
-
- // then add to $hms (with a leading 0 if needed)
- $hms .= str_pad($minutes, 2, "0", STR_PAD_LEFT). ':';
-
- // seconds are simple - just divide the total
- // seconds by 60 and keep the remainder
- $seconds = intval($sec % 60);
-
- // add to $hms, again with a leading 0 if needed
- $hms .= str_pad($seconds, 2, "0", STR_PAD_LEFT);
-
- return $hms;
-}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs