Ok.. I was bored and made some changes to the new spell-checker
dave wrote. here are the list of changes:

- autodetect cvsroot directory instead of ../
- ensure custom.pws is in cvsroot of phpdoc.
- allow filename(s) to be passed on the command prompt
- fixed bug with word's having apostrophies.
- preview of paragraph with *word* highlighted in context.
- list choices from pspell_suggest().
- allow user to enter new word, skip, or add to word list.
- if spelling changes made allow your to save changes back to disk.


Any objections to commiting these changes? See attached patch.



Curt
-- 
First, let me assure you that this is not one of those shady pyramid schemes
you've been hearing about.  No, sir.  Our model is the trapezoid!
Index: spell-checker.php
===================================================================
RCS file: /repository/phpdoc/scripts/spell-checker.php,v
retrieving revision 1.2
diff -u -r1.2 spell-checker.php
--- spell-checker.php   13 Aug 2004 22:33:03 -0000      1.2
+++ spell-checker.php   17 Aug 2004 04:33:01 -0000
@@ -24,24 +24,46 @@
 
 */
 
-/* path to phpdoc CVS checkout. if this file is in the scripts/ directory
- * then the value below will be correct!
+/*
+ * autodetect cvs cvsroot
  */
-$phpdoc = '../';
+$phpdoc = get_cvsroot_dir('phpdoc');
 
 /* english! */
 $lang = 'en';
 
 /* (immediate) tags to check for spelling mistakes */
-$check_tags = array(    'para',
-                        'simpara',
-                        'title',
-                    );
+$check_tags = array(
+        'para',
+        'simpara',
+        'title',
+        );
 
 $element = '';
 $current_file = '';
 $word_count = 0;
 
+/* autodectects repostiory root, assumes repository name isn't
+ * in the path name twice */
+function get_cvsroot_dir($repository) {
+
+    /* TODO: what to do with windows.... */
+    $dirs = explode('/', posix_getcwd());
+    array_shift($dirs);
+
+    $cvsdir = '/';
+    foreach($dirs as $path) {
+        $dir = array_shift($dirs);
+        if ($dir == $repository) {
+            break;
+        }
+        $cvsdir .= $dir . '/';
+    }
+    $cvsdir .= $dir . '/';
+
+    return $cvsdir;
+}
+
 /* prompt a user and return the response */
 function read_line($prompt) {
     echo $prompt;
@@ -74,10 +96,67 @@
     return;
 }
 
+/* Give the user some multiple choice options.. for choosing words */
+function choose_word($dict, $word, &$safechange) {
+
+
+    $safechange = true;
+    $ok = false;
+    do {
+
+        // grab a suggestion list.
+        $spelt = pspell_suggest($dict, $word);
+
+        $i = 0;
+        foreach($spelt as $correctly) {
+            if (! ($i%3) ) {
+                echo "\n";
+            }
+            echo "$i) $correctly\t";
+            $i++;
+        }
+        print "\n\n";
+        $response = read_line("what to do? ([a]dd/s[k]ip/[digit]/[n]ew word): ");
+        switch ($response{0}) {
+            case 'a':
+                pspell_add_to_personal($dict, $word);
+                echo "Added '$word' to personal wordlist.\n";
+
+                $ok = true;
+                break;
+            case 'k':
+                $ok = true;
+                echo "Skpping '$word'\n";
+                break;
+
+            case 'n':
+                $word = read_line("=> ");
+                echo "got $word\n";
+                $safechange = false; /* forces recheck */
+                $ok = true;
+                break;
+
+            default:
+                if (is_numeric($response) ) {
+                    if (!empty($spelt[$response]) ) {
+                        $ok = true;
+                        $word = $spelt[$response];
+
+                    }
+                }
+        }
+
+    } while (! $ok);
+
+
+    return $word;
+
+}
+
 /* spell check a chunk of data */
 function check_data($xml, $data)
 {
-    global $element, $dict, $check_tags, $current_file, $word_count;
+    global $element, $dict, $check_tags, $current_file, $word_count, $replace_file;
 
     if (!in_array($element, $check_tags))
         return;
@@ -85,43 +164,72 @@
     if (trim($data) == '')
         return;
 
-    $words = preg_split('/\W+/', trim($data));
+    /* look for: word's */
+    $words = preg_split('/[^\w\']+/', trim($data));
+
+
     if (is_array($words)) {
+
+        $replace = array();
+        $with = array();
+
+        $spelldata = $data; /* current spell buffer */
+        $replaced = false; 
+
         foreach ($words as $word) {
             if (trim($word) == '' || is_numeric($word) || preg_match('/[^a-z]/', 
$word))
                 continue;
 
             $word_count++;
-            $word = strtolower($word);
 
-            if (!pspell_check($dict, $word)) {
-                /* known bug: due to trim()ing and whitespace removal, the
-                 * line number shown here might not match the actual line
-                 * number in the file, but it's usually pretty close
-                 */
-                echo "$current_file:" . xml_get_current_line_number($xml) . ": $word  
 (in element $element)\n";
-                do {
-                    $response = read_line("Add this word to personal wordlist? 
(yes/no/save): ");
-                    if ($response{0} == 's') {
-                        pspell_save_wordlist($dict);
-                        echo "Wordlist saved.\n";
-                    }
-                } while ($response{0} != 'y' && $response{0} != 'n');
+            // should we fix case?
+            //$word = strtolower($word);
 
-                if ($response{0} == 'y') {
-                    pspell_add_to_personal($dict, $word);
-                    echo "Added '$word' to personal wordlist.\n";
+            $spellword = $word; /* current spelling word */
+            $new = '';
+
+            do { // A loop for rechecking changed word
+
+                $wordok = true;
+
+                if (!pspell_check($dict, $spellword)) {
+                    $new = '';
+
+                    /* known bug: due to trim()ing and whitespace removal, the
+                     * line number shown here might not match the actual line
+                     * number in the file, but it's usually pretty close
+                     */
+                    echo "$current_file [" . xml_get_current_line_number($xml) . "]: 
$word \n";
+                    echo "---------------<$element>", 
preg_replace("/($spellword)/ms", '*$1*', $spelldata, 1). 
"</$element>\n--------------\n";
+                    $new = choose_word($dict, $word, $wordok);
+
+                    $spelldata = preg_replace("/($spellword)/ms", $new, $spelldata, 
1);
+                    $spellword = $new;
                 }
+
+
+            } while(! $wordok);
+
+            if (!empty($new) && $new != $word) {
+                $replaced = true;
             }
         }
+
+        /* save all replaced paragraphs to be saved to file. */
+        if ($replaced) {
+            $replace_file['from'][] = $data;
+            $replace_file['to'][] = $spelldata;
+        }
+
     }
 
     return;
 }
 
+/* spell check a file and correct the file on disk if changed */
 function check_file($filename)
 {
-    global $phpdoc, $lang, $current_file;
+    global $phpdoc, $lang, $current_file, $replace_file;
 
     if (!fnmatch('*.xml', $filename) || fnmatch("$phpdoc$lang/functions/*", 
$filename))
         return;
@@ -129,6 +237,7 @@
     echo "checking $filename...\n";
     $current_file = $filename;
 
+    $replace_file = array('from' => array(), 'to'=> array());
     $file = file_get_contents($filename);
 
     if (!$file)
@@ -144,10 +253,31 @@
 
     if (!xml_parse($xml, $file, true)) {
         printf("%s: XML error: %s at line %d\n",
-            $filename,
-            xml_error_string(xml_get_error_code($xml)),
-            xml_get_current_line_number($xml)
-        );
+                $filename,
+                xml_error_string(xml_get_error_code($xml)),
+                xml_get_current_line_number($xml)
+              );
+    }
+
+    /* if changes prompt to save file */
+    if (! empty($replace_file['from']) ) {
+        do {
+            $l = read_line('file changed.. save? [y/n]: ');
+        } while (! ($l{0} == 'y' || $l{0} == 'n') );
+        if ($l{0} == 'y') {
+
+            $newfile = str_replace($replace_file['from'], $replace_file['to'], $file);
+
+            $fp = fopen($filename, 'w');
+            if ($fp ) {
+                fwrite($fp, $newfile, strlen($newfile));
+                fclose($fp);
+                echo "ok.\n";
+            } else {
+                echo "write failed!\n";
+            }
+
+        }
     }
 
     xml_parser_free($xml);
@@ -158,10 +288,35 @@
     return;
 }
 
-$dict = pspell_new_personal('custom.pws', 'en');
+$dict = pspell_new_personal($phpdoc.'custom.pws', 'en');
+
+array_shift($argv);
+$files = array();
+foreach($argv as $arg) {
+    $files[] = $arg;
+}
+
+if ($files) {
+
+    $wd = posix_getcwd();
+    foreach ($files as $file) {
+
+        // file relative?
+        if ($file{0} != '/') {
+            check_file("$wd/$file");
+        } else {
+            check_file($file);
+        }
+
+    }
+
+} else {
+    /* glob for everything */
+    globbetyglob("$phpdoc$lang", 'check_file');
+}
 
-globbetyglob("$phpdoc$lang", 'check_file');
 pspell_save_wordlist($dict);
+
 echo "Wordlist saved.\n";
 echo "Processed $word_count words.\n";
 ?>

Reply via email to