nicos           Sun Nov 17 11:11:54 2002 EDT

  Modified files:              
    /phpdoc/chm make_chm.php 
  Log:
  adding patch of Moriyoshi Koizumi see phpdoc mailing for details
  
  
Index: phpdoc/chm/make_chm.php
diff -u phpdoc/chm/make_chm.php:1.5 phpdoc/chm/make_chm.php:1.6
--- phpdoc/chm/make_chm.php:1.5 Sun Oct  6 05:35:03 2002
+++ phpdoc/chm/make_chm.php     Sun Nov 17 11:11:54 2002
@@ -14,25 +14,79 @@
 $FANCY_PATH    = getenv("PHP_HELP_COMPILE_FANCYDIR");
 $LANGUAGE      = getenv("PHP_HELP_COMPILE_LANG");
 $INDEX_IN_HTML = "index.html";
+$INTERNAL_CHARSET = "UTF-8";
+$DEFAULT_FONT = "Arial,10,0";
 
 if (empty($FANCY_PATH)) { $FANCY_PATH = $HTML_PATH; }
 
 // Array to manual code -> HTML Help Code conversion
 // Code list: http://www.helpware.net/htmlhelp/hh_info.htm
 $LANGUAGES = array(
-    "tw"    => "0x404 Traditional Chinese",
-    "cs"    => "0x405 Czech",
-    "de"    => "0x407 German (Germany)",
-    "en"    => "0x809 Enlish (United Kingdom)",
-    "es"    => "0xc0a Spanish (International Sort)",
-    "fr"    => "0x40c French (France)",
-    "hu"    => "0x40e Hungarian",
-    "it"    => "0x410 Italian (Italy)",
-    "ja"    => "0x411 Japanese",
-    "kr"    => "0x412 Korean",
-    "nl"    => "0x413 Dutch (Netherlands)",
-    "pt_BR" => "0x416 Portuguese (Brazil)",
-    "zh"    => "0x804 Simplified Chinese"
+    "tw"    => array(
+                   "langcode" => "0x404 Traditional Chinese",
+                   "preferred_charset" => "CP950",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "cs"    => array(
+                   "langcode" => "0x405 Czech",
+                   "preferred_charset" => "Windows-1250",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "de"    => array(
+                   "langcode" => "0x407 German (Germany)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "en"    => array(
+                   "langcode" => "0x809 English (United Kingdom)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "es"    => array(
+                   "langcode" => "0xc0a Spanish (International Sort)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "fr"    => array(
+                   "langcode" => "0x40c French (France)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "hu"    => array(
+                   "langcode" => "0x40e Hungarian",
+                   "preferred_charset" => "Windows-1250",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "it"    => array(
+                   "langcode" => "0x410 Italian (Italy)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "ja"    => array(
+                   "langcode" => "0x411 Japanese",
+                   "preferred_charset" => "CP932",
+                   "preferred_font" => "MS P Gothic,10,0"
+               ),
+    "kr"    => array(
+                   "langcode" => "0x412 Korean",
+                   "preferred_charset" => "CP949",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "nl"    => array(
+                   "langcode" => "0x413 Dutch (Netherlands)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "pt_BR" => array(
+                   "langcode" => "0x416 Portuguese (Brazil)",
+                   "preferred_charset" => "Windows-1252",
+                   "preferred_font" => $DEFAULT_FONT
+               ),
+    "zh"    => array(
+                   "langcode" => "0x804 Simplified Chinese",
+                   "preferred_charset" => "CP936",
+                   "preferred_font" => $DEFAULT_FONT
+               )
 );
 
 // Files on the top level of the TOC
@@ -46,6 +100,18 @@
     "appendixes.html"
 );
 
+// backwards compatibility
+if (!function_exists("file_get_contents")) {
+    function file_get_contents($file)
+    {
+        $cnt = file($file);
+        if ($cnt !== false) {
+            return join('', $cnt);
+        }
+        return false;
+    }
+}
+
 // Header for index and toc 
 $HEADER = '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
 <html>
@@ -72,8 +138,8 @@
     $index = fopen("php_manual_$LANGUAGE.hhk", "w");
 
     // Write out file headers
-    fputs($toc,   $HEADER);
-    fputs($index, $HEADER);
+    fputs_wrapper($toc,   $HEADER);
+    fputs_wrapper($index, $HEADER);
 
     // Read original index file and drop out newlines
     $indexline = oneLiner("$HTML_PATH/$INDEX_IN_HTML");
@@ -85,7 +151,7 @@
     if ($FIRST_PAGE != $INDEX_IN_HTML) {
 
         // Find the name of the Table of Contents
-        preg_match('|CLASS=\"TOC\" ><DL ><DT ><B >(.*)</B|U', $indexline, $match);
+        preg_match('|CLASS=\"TOC\" *><DL *><DT *><B *>(.*)</B|U', $indexline, $match);
         if (empty($match[1])) { // Fallback
             $match[1] = "Table of Contents";
         }
@@ -94,40 +160,43 @@
     }
 
     // Find the name of the Preface
-    preg_match('|<A HREF="preface.html" >(.*)</A >|U', $indexline, $match);
+    preg_match('|<A +HREF="preface.html" *>([^<]*)</A *>|U', $indexline, $match);
     if (empty($match[1])) { // Fallback
         $match[1] = "Preface";
     }
     mapAndIndex($match[1], "preface.html", "    ", $toc, $index);
 
     // Now autofind the main pages
+
     $MAIN_REGEXP = join("|", $MAIN_FILES);
-    preg_match_all("![IVX]+\. <A HREF=\"($MAIN_REGEXP)\" >(.+)</A >(.+)(?=[IVX]+\. <A 
HREF=\"($MAIN_REGEXP)|</DT ></DL ></DD ></DL)!U", $indexline, $matches, 
PREG_SET_ORDER);
+
+    
+preg_match_all("![IVX]+[^<]*<A\\s+HREF=\"($MAIN_REGEXP)\"\\s*>([^<]+)</A\\s*>(.+)</DT\\s*></DL\\s*></DD\\s*><DT\\s*>!Ui",
+ $indexline, $matches, PREG_SET_ORDER);
     
     // Go through the main files, and link in subpages
     foreach ($matches as $matchinfo) {
         mapAndIndex($matchinfo[2], $matchinfo[1], "    ", $toc, $index);
 
-        fputs($toc, "\n      <ul>\n");
-        preg_match_all("!<A HREF=\"(.+)\" >(.+)</A >!U", $matchinfo[3], $subpages, 
PREG_SET_ORDER);
+        fputs_wrapper($toc, "\n      <ul>\n");
+        preg_match_all("!<A\\s+HREF=\"([^\"]+)\"\\s*>([^<]*)</A\\s*>!iU", 
+$matchinfo[3], $subpages, PREG_SET_ORDER);
+
         foreach ($subpages as $spinfo) {
             mapAndIndex($spinfo[2], $spinfo[1], "        ", $toc, $index);
             findDeeperLinks($spinfo[1], $toc, $index);
         }
-        fputs($toc, "\n      </ul>\n");
+        fputs_wrapper($toc, "\n      </ul>\n");
     }
 
     // Link in directly the copyright page
     $copyline = oneLiner("$HTML_PATH/copyright.html");
-    preg_match('|<A NAME="copyright" ></A ><P ><B >(.*)</B|U', $copyline, $match);
+    preg_match('|<A\\s+NAME="copyright"\\s*></A\\s*><P\\s*><B\\s*>([^<]*)</B|U', 
+$copyline, $match);
     if (empty($match[1])) { // Fallback
         $match[1] = "Copyright";
     }
     mapAndIndex($match[1], "copyright.html", "    ", $toc, $index, 17);
 
     // Write out closing line, and end files
-    fputs($index, "  </ul>\n</body>\n</html>");
-    fputs($toc,   "  </ul>\n</body>\n</html>");
+    fputs_wrapper($index, "  </ul>\n</body>\n</html>");
+    fputs_wrapper($toc,   "  </ul>\n</body>\n</html>");
     fclose($index);
     fclose($toc);
 } // makeContentfiles() function end
@@ -148,41 +217,41 @@
            
     // Start writing the project file
     $project = fopen("php_manual_$LANGUAGE.hhp", "w");
-    fputs($project, "[OPTIONS]\n");
-    fputs($project, "Compatibility=1.1 or later\n");
-    fputs($project, "Compiled file=php_manual_$LANGUAGE.chm\n");
-    fputs($project, "Contents file=php_manual_$LANGUAGE.hhc\n");
-    fputs($project, "Index file=php_manual_$LANGUAGE.hhk\n");
-    fputs($project, "Default Font=Arial,10,0\n");
-    fputs($project, "Default Window=phpdoc\n");
-    fputs($project, "Default topic=$FANCY_PATH\\$FIRST_PAGE\n");
-    fputs($project, "Display compile progress=Yes\n");
-    fputs($project, "Full-text search=Yes\n");
+    fputs_wrapper($project, "[OPTIONS]\n");
+    fputs_wrapper($project, "Compatibility=1.1 or later\n");
+    fputs_wrapper($project, "Compiled file=php_manual_$LANGUAGE.chm\n");
+    fputs_wrapper($project, "Contents file=php_manual_$LANGUAGE.hhc\n");
+    fputs_wrapper($project, "Index file=php_manual_$LANGUAGE.hhk\n");
+    fputs_wrapper($project, "Default 
+Font={$LANGUAGES[$LANGUAGE]['preferred_font']}\n");
+    fputs_wrapper($project, "Default Window=phpdoc\n");
+    fputs_wrapper($project, "Default topic=$FANCY_PATH\\$FIRST_PAGE\n");
+    fputs_wrapper($project, "Display compile progress=Yes\n");
+    fputs_wrapper($project, "Full-text search=Yes\n");
 
     // Get the proper language code from the array
-    fputs($project, "Language=" . $LANGUAGES[$LANGUAGE] . "\n");
+    fputs_wrapper($project, "Language={$LANGUAGES[$LANGUAGE]["langcode"]}\n");
 
     // Now try to find out how the manual named in the actual language
     // this must be in the index.html file as the title (DSSSL generated)
     $content = oneLiner("$HTML_PATH/$INDEX_IN_HTML");
-    if (preg_match("|<TITLE >(.*)</TITLE >|U", $content, $found)) {
+    if (preg_match("|<TITLE\s*>([^<]*)</TITLE\s*>|U", $content, $found)) {
         $MANUAL_TITLE = $found[1];
     } else { // Fallback
         $MANUAL_TITLE = "PHP Manual";
     }
 
-    fputs($project, "Title=$MANUAL_TITLE\n");
+    fputs_wrapper($project, "Title=$MANUAL_TITLE\n");
 
     // Define the phpdoc window style (adds more functionality)
-    fputs($project, 
"\n[WINDOWS]\nphpdoc=\"$MANUAL_TITLE\",\"php_manual_$LANGUAGE.hhc\",\"php_manual_$LANGUAGE.hhk\","
 .
+    fputs_wrapper($project, 
+"\n[WINDOWS]\nphpdoc=\"$MANUAL_TITLE\",\"php_manual_$LANGUAGE.hhc\",\"php_manual_$LANGUAGE.hhk\","
+ .
           
"\"$FANCY_PATH\\$FIRST_PAGE\",\"$FANCY_PATH\\$FIRST_PAGE\",,,,,0x23520,,0x386e,,,,,,,,0\n");
 
     // Write out all the filenames as in FANCY_PATH
-    fputs($project, "\n[FILES]\n");
+    fputs_wrapper($project, "\n[FILES]\n");
     $handle = opendir($FANCY_PATH);
     while (false !== ($file = readdir($handle))) {
         if ($file != "." && $file != "..") {
-            fputs($project, "$FANCY_PATH\\$file\n");
+            fputs_wrapper($project, "$FANCY_PATH\\$file\n");
         }
     }
     closedir($handle);
@@ -195,18 +264,18 @@
     global $FANCY_PATH;
     $name = str_replace('"', '&quot;', $name);
 
-    fputs($toc, "
+    fputs_wrapper($toc, "
 $tabs<li><object type=\"text/sitemap\">
 $tabs  <param name=\"Name\" value=\"$name\">
 $tabs  <param name=\"Local\" value=\"$FANCY_PATH\\$local\">
 ");
 
     if ($imgnum != "auto") {
-        fputs($toc, "$tabs  <param name=\"ImageNumber\" value=\"$imgnum\">\n");
+        fputs_wrapper($toc, "$tabs  <param name=\"ImageNumber\" 
+value=\"$imgnum\">\n");
     }
-    fputs($toc, "$tabs  </object>\n");
+    fputs_wrapper($toc, "$tabs  </object>\n");
 
-    fputs($index, "
+    fputs_wrapper($index, "
     <li><object type=\"text/sitemap\">
       <param name=\"Local\" value=\"$FANCY_PATH\\$local\">
       <param name=\"Name\" value=\"$name\">
@@ -223,10 +292,10 @@
     $contents = oneLiner("$HTML_PATH/$filename");
     
     // Find all sublinks
-    if 
(preg_match_all("!<DT\s+><A\s+HREF=\"(([\w\.-]+\.)+html)(\#[\w\.-]+)?\"\s+>(.*)</A\s+>!U",
 $contents, $matches, PREG_SET_ORDER)) {
+    if 
+(preg_match_all("!<DT\\s*><A\\s+HREF=\"(([\\w\\.-]+\\.)+html)(\\#[\\w\\.-]+)?\"\\s*>([^<]*)</A\\s*>!U",
+ $contents, $matches, PREG_SET_ORDER)) {
         
         // Print out the file informations for all the links
-        fputs($toc, "\n        <ul>");
+        fputs_wrapper($toc, "\n        <ul>");
         foreach ($matches as $onematch) {
             $param["html"] = $onematch[1];
             if (!empty($onematch[3])) {
@@ -235,7 +304,7 @@
             $param["title"] = strip_tags($onematch[4]);
             mapAndIndex($param["title"], $param["html"], "          ", $toc, $index);
         }
-        fputs($toc, "        </ul>\n");
+        fputs_wrapper($toc, "        </ul>\n");
 
     } else {
 
@@ -250,11 +319,61 @@
     
 } // findDeeperLinks() function end
 
+function fputs_wrapper($fp, $str)
+{
+    fputs($fp, convertCharset($str));
+}
 
 // Return a file joined on one line
 function oneLiner($filename)
 {
-    return preg_replace("/[\r|\n]{1,2}/", " ", join("", file($filename)));
+    global $INTERNAL_CHARSET;
+
+    $buf = preg_replace("/[\r|\n]{1,2}/U", " ", file_get_contents($filename));
+    $charset = detectDocumentCharset($buf);
+
+    if ($charset === false) $charset = "UTF-8";
+
+    if ($charset != $INTERNAL_CHARSET) {
+        if (function_exists("iconv")) {
+            $buf = iconv($charset, $INTERNAL_CHARSET, $buf);
+        } elseif (function_exists("mb_convert_encoding")) {
+            $buf = mb_convert_encoding($buf, $INTERNAL_CHARSET, $charset);
+        } elseif (preg_match("/^UTF-?8$/i", $INTERNAL_CHARSET) && 
+preg_match("/^(ISO-8859-1|WINDOWS-1252)$/i", $charset)) {
+            $buf = utf8_encode($buf);
+        } else {
+            die("charset conversion function is not available.");
+        }
+    }
+    return $buf;
+}
+
+function convertCharset($buf)
+{
+    global $LANGUAGE, $LANGUAGES, $INTERNAL_CHARSET;
+
+    $charset = $LANGUAGES[$LANGUAGE]['preferred_charset'];
+
+    if ($charset != $INTERNAL_CHARSET) {
+        if (function_exists("iconv")) {
+            $buf = iconv($INTERNAL_CHARSET, $charset, $buf);
+        } elseif (function_exists("mb_convert_encoding")) {
+            $buf = mb_convert_encoding($buf, $charset, $INTERNAL_CHARSET);
+        } elseif (preg_match("/^UTF-?8$/i", $INTERNAL_CHARSET) && 
+preg_match("/^(ISO-8859-1|WINDOWS-1252)$/i", $charset)) {
+            $buf = utf8_decode($buf);
+        } else {
+            die("$LANGUAGE locale is not supported.");
+        }
+    }
+    return $buf;
 } // oneLiner() function end
 
+// Returns the name of character set in the given document
+function detectDocumentCharset($doc)
+{
+    if 
+(preg_match("/<META\\s+HTTP-EQUIV=\"CONTENT-TYPE\"\\s+CONTENT=\"TEXT\\/HTML;\\s+CHARSET=([\\w\\d-]*)\"\\s*>/iU",
+ $doc, $reg)) {
+        return $reg[1];
+    }
+    return false;
+}
 ?>

-- 
PHP Documentation Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to