Ok.. I was bored and made some changes to the new spell-checker
dave wrote. here are the list of changes:
- autodetect cvsroot directory instead of ../
- ensure custom.pws is in cvsroot of phpdoc.
- allow filename(s) to be passed on the command prompt
- fixed bug with word's having apostrophies.
- preview of paragraph with *word* highlighted in context.
- list choices from pspell_suggest().
- allow user to enter new word, skip, or add to word list.
- if spelling changes made allow your to save changes back to disk.
Any objections to commiting these changes? See attached patch.
Curt
--
First, let me assure you that this is not one of those shady pyramid schemes
you've been hearing about. No, sir. Our model is the trapezoid!
Index: spell-checker.php
===================================================================
RCS file: /repository/phpdoc/scripts/spell-checker.php,v
retrieving revision 1.2
diff -u -r1.2 spell-checker.php
--- spell-checker.php 13 Aug 2004 22:33:03 -0000 1.2
+++ spell-checker.php 17 Aug 2004 04:33:01 -0000
@@ -24,24 +24,46 @@
*/
-/* path to phpdoc CVS checkout. if this file is in the scripts/ directory
- * then the value below will be correct!
+/*
+ * autodetect cvs cvsroot
*/
-$phpdoc = '../';
+$phpdoc = get_cvsroot_dir('phpdoc');
/* english! */
$lang = 'en';
/* (immediate) tags to check for spelling mistakes */
-$check_tags = array( 'para',
- 'simpara',
- 'title',
- );
+$check_tags = array(
+ 'para',
+ 'simpara',
+ 'title',
+ );
$element = '';
$current_file = '';
$word_count = 0;
+/* autodectects repostiory root, assumes repository name isn't
+ * in the path name twice */
+function get_cvsroot_dir($repository) {
+
+ /* TODO: what to do with windows.... */
+ $dirs = explode('/', posix_getcwd());
+ array_shift($dirs);
+
+ $cvsdir = '/';
+ foreach($dirs as $path) {
+ $dir = array_shift($dirs);
+ if ($dir == $repository) {
+ break;
+ }
+ $cvsdir .= $dir . '/';
+ }
+ $cvsdir .= $dir . '/';
+
+ return $cvsdir;
+}
+
/* prompt a user and return the response */
function read_line($prompt) {
echo $prompt;
@@ -74,10 +96,67 @@
return;
}
+/* Give the user some multiple choice options.. for choosing words */
+function choose_word($dict, $word, &$safechange) {
+
+
+ $safechange = true;
+ $ok = false;
+ do {
+
+ // grab a suggestion list.
+ $spelt = pspell_suggest($dict, $word);
+
+ $i = 0;
+ foreach($spelt as $correctly) {
+ if (! ($i%3) ) {
+ echo "\n";
+ }
+ echo "$i) $correctly\t";
+ $i++;
+ }
+ print "\n\n";
+ $response = read_line("what to do? ([a]dd/s[k]ip/[digit]/[n]ew word): ");
+ switch ($response{0}) {
+ case 'a':
+ pspell_add_to_personal($dict, $word);
+ echo "Added '$word' to personal wordlist.\n";
+
+ $ok = true;
+ break;
+ case 'k':
+ $ok = true;
+ echo "Skpping '$word'\n";
+ break;
+
+ case 'n':
+ $word = read_line("=> ");
+ echo "got $word\n";
+ $safechange = false; /* forces recheck */
+ $ok = true;
+ break;
+
+ default:
+ if (is_numeric($response) ) {
+ if (!empty($spelt[$response]) ) {
+ $ok = true;
+ $word = $spelt[$response];
+
+ }
+ }
+ }
+
+ } while (! $ok);
+
+
+ return $word;
+
+}
+
/* spell check a chunk of data */
function check_data($xml, $data)
{
- global $element, $dict, $check_tags, $current_file, $word_count;
+ global $element, $dict, $check_tags, $current_file, $word_count, $replace_file;
if (!in_array($element, $check_tags))
return;
@@ -85,43 +164,72 @@
if (trim($data) == '')
return;
- $words = preg_split('/\W+/', trim($data));
+ /* look for: word's */
+ $words = preg_split('/[^\w\']+/', trim($data));
+
+
if (is_array($words)) {
+
+ $replace = array();
+ $with = array();
+
+ $spelldata = $data; /* current spell buffer */
+ $replaced = false;
+
foreach ($words as $word) {
if (trim($word) == '' || is_numeric($word) || preg_match('/[^a-z]/',
$word))
continue;
$word_count++;
- $word = strtolower($word);
- if (!pspell_check($dict, $word)) {
- /* known bug: due to trim()ing and whitespace removal, the
- * line number shown here might not match the actual line
- * number in the file, but it's usually pretty close
- */
- echo "$current_file:" . xml_get_current_line_number($xml) . ": $word
(in element $element)\n";
- do {
- $response = read_line("Add this word to personal wordlist?
(yes/no/save): ");
- if ($response{0} == 's') {
- pspell_save_wordlist($dict);
- echo "Wordlist saved.\n";
- }
- } while ($response{0} != 'y' && $response{0} != 'n');
+ // should we fix case?
+ //$word = strtolower($word);
- if ($response{0} == 'y') {
- pspell_add_to_personal($dict, $word);
- echo "Added '$word' to personal wordlist.\n";
+ $spellword = $word; /* current spelling word */
+ $new = '';
+
+ do { // A loop for rechecking changed word
+
+ $wordok = true;
+
+ if (!pspell_check($dict, $spellword)) {
+ $new = '';
+
+ /* known bug: due to trim()ing and whitespace removal, the
+ * line number shown here might not match the actual line
+ * number in the file, but it's usually pretty close
+ */
+ echo "$current_file [" . xml_get_current_line_number($xml) . "]:
$word \n";
+ echo "---------------<$element>",
preg_replace("/($spellword)/ms", '*$1*', $spelldata, 1).
"</$element>\n--------------\n";
+ $new = choose_word($dict, $word, $wordok);
+
+ $spelldata = preg_replace("/($spellword)/ms", $new, $spelldata,
1);
+ $spellword = $new;
}
+
+
+ } while(! $wordok);
+
+ if (!empty($new) && $new != $word) {
+ $replaced = true;
}
}
+
+ /* save all replaced paragraphs to be saved to file. */
+ if ($replaced) {
+ $replace_file['from'][] = $data;
+ $replace_file['to'][] = $spelldata;
+ }
+
}
return;
}
+/* spell check a file and correct the file on disk if changed */
function check_file($filename)
{
- global $phpdoc, $lang, $current_file;
+ global $phpdoc, $lang, $current_file, $replace_file;
if (!fnmatch('*.xml', $filename) || fnmatch("$phpdoc$lang/functions/*",
$filename))
return;
@@ -129,6 +237,7 @@
echo "checking $filename...\n";
$current_file = $filename;
+ $replace_file = array('from' => array(), 'to'=> array());
$file = file_get_contents($filename);
if (!$file)
@@ -144,10 +253,31 @@
if (!xml_parse($xml, $file, true)) {
printf("%s: XML error: %s at line %d\n",
- $filename,
- xml_error_string(xml_get_error_code($xml)),
- xml_get_current_line_number($xml)
- );
+ $filename,
+ xml_error_string(xml_get_error_code($xml)),
+ xml_get_current_line_number($xml)
+ );
+ }
+
+ /* if changes prompt to save file */
+ if (! empty($replace_file['from']) ) {
+ do {
+ $l = read_line('file changed.. save? [y/n]: ');
+ } while (! ($l{0} == 'y' || $l{0} == 'n') );
+ if ($l{0} == 'y') {
+
+ $newfile = str_replace($replace_file['from'], $replace_file['to'], $file);
+
+ $fp = fopen($filename, 'w');
+ if ($fp ) {
+ fwrite($fp, $newfile, strlen($newfile));
+ fclose($fp);
+ echo "ok.\n";
+ } else {
+ echo "write failed!\n";
+ }
+
+ }
}
xml_parser_free($xml);
@@ -158,10 +288,35 @@
return;
}
-$dict = pspell_new_personal('custom.pws', 'en');
+$dict = pspell_new_personal($phpdoc.'custom.pws', 'en');
+
+array_shift($argv);
+$files = array();
+foreach($argv as $arg) {
+ $files[] = $arg;
+}
+
+if ($files) {
+
+ $wd = posix_getcwd();
+ foreach ($files as $file) {
+
+ // file relative?
+ if ($file{0} != '/') {
+ check_file("$wd/$file");
+ } else {
+ check_file($file);
+ }
+
+ }
+
+} else {
+ /* glob for everything */
+ globbetyglob("$phpdoc$lang", 'check_file');
+}
-globbetyglob("$phpdoc$lang", 'check_file');
pspell_save_wordlist($dict);
+
echo "Wordlist saved.\n";
echo "Processed $word_count words.\n";
?>