goba Fri Dec 28 09:49:29 2001 EDT
Added files:
/phpdoc/scripts dbtags.php
Log:
A new script to check what tags we use, what tags are important, and what
are used only once or twice (may be omitted to clear up XML code)
Index: phpdoc/scripts/dbtags.php
+++ phpdoc/scripts/dbtags.php
#!/usr/bin/php -q
<?php
if ($argc > 2 || in_array($argv[1], array('--help', '-help', '-h', '-?'))) {
?>
Process XML files for used DocBook tags
and give statistics
Usage:
<?=$argv[0]?> [<language-code>]
<language-code> can be a valid language code
used in the repository, or 'all' for all
languages. Defaults to en.
The script will generate a tag_usage.txt
file, containing the tags used and the numbers.
Written by Gabor Hojtsy <[EMAIL PROTECTED]>, 2001-12-28
<?php
exit;
}
// CONFIG SECTION
$docdir = "../"; // Main directory of the PHP documentation (one dir up in cvs)
/*********************************************************************/
/* Nothing to modify below this line */
/*********************************************************************/
// Long runtime
set_time_limit(0);
// Array to collect the entities
$used_tags = array();
// Default values
$langcodes = array("en");
// Parameter value copying
if ($argc == 2) {
$langcodes = array($argv[1]);
if ($argv[1] === 'all') {
$langcodes = array("ar", "cs", "de", "en", "es", "fr",
"hk", "hu", "it", "ja", "kr", "nl",
"pl", "pt_BR", "ru", "tr", "tw");
}
}
/*********************************************************************/
/* Here starts the functions part */
/*********************************************************************/
// Checks a diretory of phpdoc XML files
function check_dir($dir, &$used_tags)
{
// Collect files and diretcories in these arrays
$directories = array();
$files = array();
// Open and traverse the directory
$handle = @opendir($dir);
while ($file = @readdir($handle)) {
// Collect directories and XML files
if ($file != 'CVS' && $file != '.' &&
$file != '..' && is_dir($dir.$file)) {
$directories[] = $file;
}
elseif (strstr($file, ".xml")) {
$files[] = $file;
}
}
@closedir($handle);
// Sort files and directories
sort($directories);
sort($files);
// Files first...
foreach ($files as $file) {
check_file($dir.$file, $used_tags);
}
// than the subdirs
foreach ($directories as $file) {
check_dir($dir.$file."/", $used_tags);
}
} // check_dir() function end
function check_file ($filename, &$used_tags)
{
// Read in file contents
$contents = preg_replace("/[\r\n]/", "", join("", file($filename)));
// Drop out CDATA sections, they do not contain any DocBook tags
$contents = preg_replace("/<!\\[CDATA\\[.+\\]\\]>/U", "", $contents);
// Drop out comments, they do not contain any DocBook tags
$contents = preg_replace("/<!--.+-->/U", "", $contents);
// Find all tags in this file
preg_match_all("!<([^\\s>/]+)[\\s>]!U", $contents, $tags_found);
// No entities found
if (count($tags_found[1]) == 0) { return; }
// New occurances found, so increase the number
foreach ($tags_found[1] as $tag_name) {
$used_tags[$tag_name]++;
}
} // check_file() function end
/*********************************************************************/
/* Here starts the program */
/*********************************************************************/
// Chechking all languages
foreach ($langcodes as $langcode) {
// Check for directory validity
if (!@is_dir($docdir . $langcode)) {
print("The $langcode language code is not valid\n");
continue;
} else {
$tested_trees[] = $langcode;
}
// If directory is OK, start with the header
echo "Searching in $docdir$langcode ...\n";
// Check the requested directory
check_dir("$docdir$langcode/", $used_tags);
}
echo "Generating tag_usage.txt ...\n";
$fp = fopen("tag_usage.txt", "w");
fwrite($fp, "TAG USAGE STATISCTICS
=========================================================
In this file you can find tag usage stats compiled
from the following tree[s] at phpdoc:\n" .
join(", ", $tested_trees) . ".
You may find some rarely used tags here, and find out
what tags others use to write documentation.
=========================================================
");
arsort($used_tags);
foreach ($used_tags as $tag_name => $number) {
fwrite($fp, sprintf("%-30s %d", $tag_name, $number). "\n");
}
fclose($fp);
echo "Done!\n";
?>