Chad has uploaded a new change for review. https://gerrit.wikimedia.org/r/171014
Change subject: DumpHTML: Minor code fixes ...................................................................... DumpHTML: Minor code fixes - Remove disabled-by-hardcoding profiling. Especially since maintenance scripts no longer disable profiling - Remove --udp-profile option and related code for the same reason--just use normal core profiling - Remove MW_FORCE_PROFILE, it does nothing - Inline callback functions for xml_set_element_handler() - Remove wfMkdirParents() wrapper Change-Id: Ia38aa9048814787c49a1108813a1d9e0bc00fce1 --- M dumpHTML.inc M dumpHTML.php M wm-scripts/queueSlave 3 files changed, 23 insertions(+), 88 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/DumpHTML refs/changes/14/171014/1 diff --git a/dumpHTML.inc b/dumpHTML.inc index 6d8b110..5ff77b7 100644 --- a/dumpHTML.inc +++ b/dumpHTML.inc @@ -76,9 +76,6 @@ # Max page ID, lazy initialised var $maxPageID = false; - # UDP profiling - var $udpProfile, $udpProfileCounter = 0, $udpProfileInit = false; - # Debugging options var $showTitles = false; @@ -481,7 +478,8 @@ print $title->getPrefixedDBkey() . "\n"; } - $this->profile(); + // In case we're profiling + Profiler::instance()->setProfileID( 'dumpHTML' ); $this->rawPages = array(); $text = $this->getArticleHTML( $title ); @@ -498,7 +496,7 @@ $this->writeArticle( $title, $text ); # Do raw pages - $this->mkdir( "{$this->dest}/raw", 0755 ); + wfMkdirParents( "{$this->dest}/raw", 0755 ); foreach( $this->rawPages as $record ) { list( $file, $ftitle, $params ) = $record; @@ -525,7 +523,8 @@ /** Write the given text to the file identified by the given title object */ function writeArticle( $title, $text ) { - wfProfileIn( __METHOD__ ); + $profile = new ProfileSection( __METHOD__ ); + $filename = $this->getHashedFilename( $title ); $fullName = "{$this->dest}/$filename"; @@ -551,7 +550,7 @@ $this->escapeForVBScript( $fullName ) ) ); } } else { - if ( !$this->mkdir( $fullDir ) ) { + if ( !wfMkdirParents( $fullDir ) ) { print "Error: unable to create directory '$fullDir'.\n"; } #wfSuppressWarnings(); @@ -562,7 +561,6 @@ if ( !$success ) { die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n"); } - wfProfileOut( __METHOD__ ); } /** Escape a UTF-8 string for VBScript's Unescape() */ @@ -583,7 +581,7 @@ /** Copy a directory recursively, not including .svn */ function copyDirectory( $source, $dest ) { if ( !is_dir( $dest ) ) { - if ( !mkdir( $dest ) ) { + if ( !wfMkdirParents( $dest ) ) { echo "Warning: unable to create directory \"$dest\"\n"; return false; } @@ -617,10 +615,10 @@ return; } echo "Initialising destination directory...\n"; - if ( !$this->mkdir( "{$this->dest}/skins" ) ) { + if ( !wfMkdirParents( "{$this->dest}/skins" ) ) { throw new MWException( "Unable to create destination skin directory." ); } - if ( !$this->mkdir( "{$this->dest}/temp" ) ) { + if ( !wfMkdirParents( "{$this->dest}/temp" ) ) { throw new MWException( "Unable to create destination temp directory." ); } @@ -765,7 +763,7 @@ */ function makeUrlSnapshot( $url ) { global $wgServer; - $this->mkdir( "{$this->dest}/misc" ); + wfMkdirParents( "{$this->dest}/misc" ); $destName = urldecode( basename( $url ) ); $destPath = "{$this->dest}/misc/$destName"; if ( !file_exists( $destPath ) ) { @@ -845,7 +843,15 @@ function findImages( $text ) { global $wgDumpImages; $parser = xml_parser_create( 'UTF-8' ); - xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); + xml_set_element_handler( $parser, + function( $parser, $name, $attribs ) use ( $wgDumpImages ) { + if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { + $wgDumpImages[$attribs['SRC']] = true; + } + }, + function( $parser, $name ) { + } + ); $wgDumpImages = array(); xml_parse( $parser, $text ); @@ -879,7 +885,7 @@ $destLoc = "$destDirBase/$rel"; #print "Copying $sourceLoc to $destLoc\n"; if ( !$this->pathExists( $destLoc ) ) { - $this->mkdir( dirname( $destLoc ), 0755 ); + wfMkdirParents( dirname( $destLoc ), 0755 ); if ( !copy( $sourceLoc, $destLoc ) ) { print "Warning: unable to copy $sourceLoc to $destLoc\n"; } @@ -1082,40 +1088,8 @@ return $this->maxPageID; } - function profile() { - global $wgRequestTime, $wgRUstart; - - if ( !$this->udpProfile ) { - return; - } - if ( !$this->udpProfileInit ) { - $this->udpProfileInit = true; - $this->udpProfileCounter = 0; - } elseif ( $this->udpProfileCounter == 1 % $this->udpProfile ) { - Profiler::instance()->logData(); - Profiler::setInstance( new ProfilerStub( array() ) ); - } - if ( $this->udpProfileCounter == 0 ) { - $wgRequestTime = microtime( true ); - $wgRUstart = getrusage(); - Profiler::setInstance( new ProfilerSimpleUDP( array() ) ); - Profiler::instance()->setProfileID( 'dumpHTML' ); - } - $this->udpProfileCounter = ( $this->udpProfileCounter + 1 ) % $this->udpProfile; - } - function debug( $text ) { print "$text\n"; - } - - function mkdir( $dir ) { - //if ( wfIsWindows() ) { - return wfMkdirParents( $dir, 0755, __METHOD__ ); - /*} else { - $dir = escapeshellarg( $dir ); - `mkdir -p -- $dir`; - return true; - }*/ } } @@ -1222,7 +1196,7 @@ return; } if ( !is_dir( dirname( $dest ) ) ) { - $this->dump->mkdir( dirname( $dest ) ); + wfMkdirParents( dirname( $dest ) ); } #$this->dump->debug( "Copying $source to $dest" ); @@ -1325,7 +1299,7 @@ } } else { if ( !is_dir( dirname( $dest ) ) ) { - $this->dump->mkdir( dirname( $dest ) ); + wfMkdirParents( dirname( $dest ) ); } $tmpFile = $this->repo->getLocalCopy( $source ); @@ -1335,19 +1309,6 @@ } } } - - -/** XML parser callback */ -function wfDumpStartTagHandler( $parser, $name, $attribs ) { - global $wgDumpImages; - - if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { - $wgDumpImages[$attribs['SRC']] = true; - } -} - -/** XML parser callback */ -function wfDumpEndTagHandler( $parser, $name ) {} /** * Workaround for bug 30921; extends FauxRequest to return a fake current URL. diff --git a/dumpHTML.php b/dumpHTML.php index f112a2b..61af372 100644 --- a/dumpHTML.php +++ b/dumpHTML.php @@ -27,7 +27,6 @@ --interlang allow interlanguage links --image-snapshot copy all images used to the destination directory --compress generate compressed version of the html pages - --udp-profile <N> profile 1/N rendering operations using ProfilerSimpleUDP --oom-adj <N> set /proc/<pid>/oom_adj --show-titles write each article title to stdout --munge-title <HOW> available munging algorithms: none, md5, windows @@ -37,24 +36,8 @@ define( 'MW_HTML_FOR_DUMP', 1 ); -$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice', 'udp-profile', 'oom-adj', 'munge-title', 'group' ); +$optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice', 'oom-adj', 'munge-title', 'group' ); $options = array( 'help' ); -$profiling = false; - -if ( $profiling ) { - define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' ); - function wfSetupDump() { - global $wgProfileToDatabase, $wgProfileSampleRate; - Profiler::instance(); - // Override disabled profiling in maintenance scripts - $wgProfileToDatabase = false; - $wgProfileSampleRate = 1; - } -} - -if ( in_array( '--udp-profile', $argv ) ) { - define( 'MW_FORCE_PROFILE', 1 ); -} $IP = getenv( 'MW_INSTALL_PATH' ); if ( $IP === false ) { @@ -125,7 +108,6 @@ 'noOverwrite' => $options['no-overwrite'], 'compress' => $options['compress'], 'noSharedDesc' => $options['no-shared-desc'], - 'udpProfile' => $options['udp-profile'], 'showTitles' => $options['show-titles'], 'group' => $options['group'], 'mungeTitle' => $options['munge-title'], @@ -171,9 +153,3 @@ printf( "%9d %s\n", $size, $name ); } } - -if ( $profiling ) { - echo Profiler::instance()->getOutput(); -} - - diff --git a/wm-scripts/queueSlave b/wm-scripts/queueSlave index e704133..9651717 100755 --- a/wm-scripts/queueSlave +++ b/wm-scripts/queueSlave @@ -146,7 +146,6 @@ msg(wiki + ' articles ' + slice) dumpHTML(outputFile, wiki,"--no-shared-desc", "--image-snapshot", "--interlang","-d",dest,"--slice",slice, - "--udp-profile","50", "--oom-adj", "6", #"--show-titles", "--checkpoint",checkpoint,"--no-overwrite") @@ -163,7 +162,6 @@ msg(wiki + ' shared ' + slice) dumpHTML(outputFile, wiki,"--shared-desc", "--image-snapshot", "--interlang","-d",dest,"--slice",slice, - "--udp-profile", "50", "--oom-adj", "4", "--checkpoint",checkpoint,"--no-overwrite") if isDone(checkpoint, 'shared image'): -- To view, visit https://gerrit.wikimedia.org/r/171014 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia38aa9048814787c49a1108813a1d9e0bc00fce1 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/DumpHTML Gerrit-Branch: master Gerrit-Owner: Chad <ch...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits