goba            Fri Dec 28 09:49:29 2001 EDT

  Added files:                 
    /phpdoc/scripts     dbtags.php 
  Log:
  A new script to check what tags we use, what tags are important, and what
  are used only once or twice (may be omitted to clear up XML code)
  
  

Index: phpdoc/scripts/dbtags.php
+++ phpdoc/scripts/dbtags.php
#!/usr/bin/php -q
<?php

if ($argc > 2 || in_array($argv[1], array('--help', '-help', '-h', '-?'))) {
?>

Process XML files for used DocBook tags
and give statistics

  Usage:
  <?=$argv[0]?> [<language-code>]

  <language-code> can be a valid language code
  used in the repository, or 'all' for all
  languages. Defaults to en.

  The script will generate a tag_usage.txt
  file, containing the tags used and the numbers.
  
  Written by Gabor Hojtsy <[EMAIL PROTECTED]>, 2001-12-28

<?php
  exit;
}

// CONFIG SECTION
$docdir = "../"; // Main directory of the PHP documentation (one dir up in cvs)

/*********************************************************************/
/* Nothing to modify below this line                                 */
/*********************************************************************/

// Long runtime
set_time_limit(0);

// Array to collect the entities
$used_tags = array();

// Default values
$langcodes = array("en");

// Parameter value copying
if ($argc == 2) { 
    $langcodes = array($argv[1]);
    if ($argv[1] === 'all') {
        $langcodes = array("ar", "cs", "de", "en", "es", "fr",
                           "hk", "hu", "it", "ja", "kr", "nl",
                           "pl", "pt_BR", "ru", "tr", "tw");
    }
}

/*********************************************************************/
/* Here starts the functions part                                    */
/*********************************************************************/

// Checks a diretory of phpdoc XML files
function check_dir($dir, &$used_tags)
{
    // Collect files and diretcories in these arrays
    $directories = array();
    $files = array();

    // Open and traverse the directory
    $handle = @opendir($dir);
    while ($file = @readdir($handle)) {

      // Collect directories and XML files
      if ($file != 'CVS' && $file != '.' &&
          $file != '..' && is_dir($dir.$file)) {
        $directories[] = $file;
      }
      elseif (strstr($file, ".xml")) {
        $files[] = $file;
      }

    }
    @closedir($handle);

    // Sort files and directories
    sort($directories);
    sort($files);

    // Files first...
    foreach ($files as $file) {
      check_file($dir.$file, $used_tags);
    }

    // than the subdirs
    foreach ($directories as $file) {
      check_dir($dir.$file."/", $used_tags);
    }
} // check_dir() function end

function check_file ($filename, &$used_tags)
{
    // Read in file contents
    $contents = preg_replace("/[\r\n]/", "", join("", file($filename)));
    
    // Drop out CDATA sections, they do not contain any DocBook tags
    $contents = preg_replace("/<!\\[CDATA\\[.+\\]\\]>/U", "", $contents);
    
    // Drop out comments, they do not contain any DocBook tags
    $contents = preg_replace("/<!--.+-->/U", "", $contents);

    // Find all tags in this file
    preg_match_all("!<([^\\s>/]+)[\\s>]!U", $contents, $tags_found);
    
    // No entities found
    if (count($tags_found[1]) == 0) { return; }
    
    // New occurances found, so increase the number
    foreach ($tags_found[1] as $tag_name) {
        $used_tags[$tag_name]++;
    }

} // check_file() function end
  
/*********************************************************************/
/* Here starts the program                                           */
/*********************************************************************/

// Chechking all languages
foreach ($langcodes as $langcode) {

    // Check for directory validity
    if (!@is_dir($docdir . $langcode)) {
        print("The $langcode language code is not valid\n");
        continue;
    } else {
        $tested_trees[] = $langcode;
    }
      
    // If directory is OK, start with the header
    echo "Searching in $docdir$langcode ...\n";
    
    // Check the requested directory
    check_dir("$docdir$langcode/", $used_tags);

}
    
echo "Generating tag_usage.txt ...\n";
    
$fp = fopen("tag_usage.txt", "w");
fwrite($fp, "TAG USAGE STATISCTICS

=========================================================
In this file you can find tag usage stats compiled
from the following tree[s] at phpdoc:\n" .
join(", ", $tested_trees) . ".

You may find some rarely used tags here, and find out
what tags others use to write documentation.
=========================================================

");

arsort($used_tags);
foreach ($used_tags as $tag_name => $number) {
    fwrite($fp, sprintf("%-30s %d", $tag_name, $number). "\n");
}

fclose($fp);

echo "Done!\n";

?>


Reply via email to