Ok.. I was bored and made some changes to the new spell-checker dave wrote. here are the list of changes:
- autodetect cvsroot directory instead of ../ - ensure custom.pws is in cvsroot of phpdoc. - allow filename(s) to be passed on the command prompt - fixed bug with word's having apostrophies. - preview of paragraph with *word* highlighted in context. - list choices from pspell_suggest(). - allow user to enter new word, skip, or add to word list. - if spelling changes made allow your to save changes back to disk. Any objections to commiting these changes? See attached patch. Curt -- First, let me assure you that this is not one of those shady pyramid schemes you've been hearing about. No, sir. Our model is the trapezoid!
Index: spell-checker.php =================================================================== RCS file: /repository/phpdoc/scripts/spell-checker.php,v retrieving revision 1.2 diff -u -r1.2 spell-checker.php --- spell-checker.php 13 Aug 2004 22:33:03 -0000 1.2 +++ spell-checker.php 17 Aug 2004 04:33:01 -0000 @@ -24,24 +24,46 @@ */ -/* path to phpdoc CVS checkout. if this file is in the scripts/ directory - * then the value below will be correct! +/* + * autodetect cvs cvsroot */ -$phpdoc = '../'; +$phpdoc = get_cvsroot_dir('phpdoc'); /* english! */ $lang = 'en'; /* (immediate) tags to check for spelling mistakes */ -$check_tags = array( 'para', - 'simpara', - 'title', - ); +$check_tags = array( + 'para', + 'simpara', + 'title', + ); $element = ''; $current_file = ''; $word_count = 0; +/* autodectects repostiory root, assumes repository name isn't + * in the path name twice */ +function get_cvsroot_dir($repository) { + + /* TODO: what to do with windows.... */ + $dirs = explode('/', posix_getcwd()); + array_shift($dirs); + + $cvsdir = '/'; + foreach($dirs as $path) { + $dir = array_shift($dirs); + if ($dir == $repository) { + break; + } + $cvsdir .= $dir . '/'; + } + $cvsdir .= $dir . '/'; + + return $cvsdir; +} + /* prompt a user and return the response */ function read_line($prompt) { echo $prompt; @@ -74,10 +96,67 @@ return; } +/* Give the user some multiple choice options.. for choosing words */ +function choose_word($dict, $word, &$safechange) { + + + $safechange = true; + $ok = false; + do { + + // grab a suggestion list. + $spelt = pspell_suggest($dict, $word); + + $i = 0; + foreach($spelt as $correctly) { + if (! ($i%3) ) { + echo "\n"; + } + echo "$i) $correctly\t"; + $i++; + } + print "\n\n"; + $response = read_line("what to do? ([a]dd/s[k]ip/[digit]/[n]ew word): "); + switch ($response{0}) { + case 'a': + pspell_add_to_personal($dict, $word); + echo "Added '$word' to personal wordlist.\n"; + + $ok = true; + break; + case 'k': + $ok = true; + echo "Skpping '$word'\n"; + break; + + case 'n': + $word = read_line("=> "); + echo "got $word\n"; + $safechange = false; /* forces recheck */ + $ok = true; + break; + + default: + if (is_numeric($response) ) { + if (!empty($spelt[$response]) ) { + $ok = true; + $word = $spelt[$response]; + + } + } + } + + } while (! $ok); + + + return $word; + +} + /* spell check a chunk of data */ function check_data($xml, $data) { - global $element, $dict, $check_tags, $current_file, $word_count; + global $element, $dict, $check_tags, $current_file, $word_count, $replace_file; if (!in_array($element, $check_tags)) return; @@ -85,43 +164,72 @@ if (trim($data) == '') return; - $words = preg_split('/\W+/', trim($data)); + /* look for: word's */ + $words = preg_split('/[^\w\']+/', trim($data)); + + if (is_array($words)) { + + $replace = array(); + $with = array(); + + $spelldata = $data; /* current spell buffer */ + $replaced = false; + foreach ($words as $word) { if (trim($word) == '' || is_numeric($word) || preg_match('/[^a-z]/', $word)) continue; $word_count++; - $word = strtolower($word); - if (!pspell_check($dict, $word)) { - /* known bug: due to trim()ing and whitespace removal, the - * line number shown here might not match the actual line - * number in the file, but it's usually pretty close - */ - echo "$current_file:" . xml_get_current_line_number($xml) . ": $word (in element $element)\n"; - do { - $response = read_line("Add this word to personal wordlist? (yes/no/save): "); - if ($response{0} == 's') { - pspell_save_wordlist($dict); - echo "Wordlist saved.\n"; - } - } while ($response{0} != 'y' && $response{0} != 'n'); + // should we fix case? + //$word = strtolower($word); - if ($response{0} == 'y') { - pspell_add_to_personal($dict, $word); - echo "Added '$word' to personal wordlist.\n"; + $spellword = $word; /* current spelling word */ + $new = ''; + + do { // A loop for rechecking changed word + + $wordok = true; + + if (!pspell_check($dict, $spellword)) { + $new = ''; + + /* known bug: due to trim()ing and whitespace removal, the + * line number shown here might not match the actual line + * number in the file, but it's usually pretty close + */ + echo "$current_file [" . xml_get_current_line_number($xml) . "]: $word \n"; + echo "---------------<$element>", preg_replace("/($spellword)/ms", '*$1*', $spelldata, 1). "</$element>\n--------------\n"; + $new = choose_word($dict, $word, $wordok); + + $spelldata = preg_replace("/($spellword)/ms", $new, $spelldata, 1); + $spellword = $new; } + + + } while(! $wordok); + + if (!empty($new) && $new != $word) { + $replaced = true; } } + + /* save all replaced paragraphs to be saved to file. */ + if ($replaced) { + $replace_file['from'][] = $data; + $replace_file['to'][] = $spelldata; + } + } return; } +/* spell check a file and correct the file on disk if changed */ function check_file($filename) { - global $phpdoc, $lang, $current_file; + global $phpdoc, $lang, $current_file, $replace_file; if (!fnmatch('*.xml', $filename) || fnmatch("$phpdoc$lang/functions/*", $filename)) return; @@ -129,6 +237,7 @@ echo "checking $filename...\n"; $current_file = $filename; + $replace_file = array('from' => array(), 'to'=> array()); $file = file_get_contents($filename); if (!$file) @@ -144,10 +253,31 @@ if (!xml_parse($xml, $file, true)) { printf("%s: XML error: %s at line %d\n", - $filename, - xml_error_string(xml_get_error_code($xml)), - xml_get_current_line_number($xml) - ); + $filename, + xml_error_string(xml_get_error_code($xml)), + xml_get_current_line_number($xml) + ); + } + + /* if changes prompt to save file */ + if (! empty($replace_file['from']) ) { + do { + $l = read_line('file changed.. save? [y/n]: '); + } while (! ($l{0} == 'y' || $l{0} == 'n') ); + if ($l{0} == 'y') { + + $newfile = str_replace($replace_file['from'], $replace_file['to'], $file); + + $fp = fopen($filename, 'w'); + if ($fp ) { + fwrite($fp, $newfile, strlen($newfile)); + fclose($fp); + echo "ok.\n"; + } else { + echo "write failed!\n"; + } + + } } xml_parser_free($xml); @@ -158,10 +288,35 @@ return; } -$dict = pspell_new_personal('custom.pws', 'en'); +$dict = pspell_new_personal($phpdoc.'custom.pws', 'en'); + +array_shift($argv); +$files = array(); +foreach($argv as $arg) { + $files[] = $arg; +} + +if ($files) { + + $wd = posix_getcwd(); + foreach ($files as $file) { + + // file relative? + if ($file{0} != '/') { + check_file("$wd/$file"); + } else { + check_file($file); + } + + } + +} else { + /* glob for everything */ + globbetyglob("$phpdoc$lang", 'check_file'); +} -globbetyglob("$phpdoc$lang", 'check_file'); pspell_save_wordlist($dict); + echo "Wordlist saved.\n"; echo "Processed $word_count words.\n"; ?>