Oops... Thanks for your notification.
A corrected version is attached.
I'm getting less sure if this is the right list to post this kind of patch
to. [EMAIL PROTECTED] is more likely?
Moriyoshi
<[EMAIL PROTECTED]> wrote:
> Hello,
>
> Can you fix the typos please?
>
> + "langcode" => "0x809 Enlish (United Kingdom)",
>
> + "langcode" => "0x40c Frcsh (France)",
>
> English, French.
>
> Then +1.
>
> Regards.
> M.CHAILLAN Nicolas
> [EMAIL PROTECTED]
> www.WorldAKT.com H ergement de sites internets.
>
> "Moriyoshi Koizumi" <[EMAIL PROTECTED]> a rit dans le message de
> news: 20021117230541A0?[EMAIL PROTECTED]
> > Hi guys,
> >
> > Attached is a patch for make_chm.php which solves character
> > set issues and enables you to make chm help files in any supported
> > languages.
> >
> > You have to get either iconv or mbstring enabled to make the chm files
> > that are encoded in the character set other than iso-8859-1 or
> > Windows-1252. And you should specify the proper font by modifying the
> > appropriate part of the code.
> >
> > Hm, Well, I'm aware of the header of the file says
> > "PLEASE DO NOT MAKE ANY MAJOR MODIFICATIONS TO THIS CODE!" and
> > I actually did heavy modifications on this, but I hope it helps
> > the ongoing development.
> >
> > Anyway, comments are welcome.
> >
> >
> > Cheers,
> >
> > Moriyoshi
> >
> >
>
>
>
> --
> PHP Documentation Mailing List (http://www.php.net/)
> To unsubscribe, visit: http://www.php.net/unsub.php
>
>
>
Index: make_chm.php
===================================================================
RCS file: /repository/phpdoc/chm/make_chm.php,v
retrieving revision 1.5
diff -u -r1.5 make_chm.php
--- make_chm.php 6 Oct 2002 09:35:03 -0000 1.5
+++ make_chm.php 17 Nov 2002 15:38:54 -0000
@@ -14,25 +14,79 @@
$FANCY_PATH = getenv("PHP_HELP_COMPILE_FANCYDIR");
$LANGUAGE = getenv("PHP_HELP_COMPILE_LANG");
$INDEX_IN_HTML = "index.html";
+$INTERNAL_CHARSET = "UTF-8";
+$DEFAULT_FONT = "Arial,10,0";
if (empty($FANCY_PATH)) { $FANCY_PATH = $HTML_PATH; }
// Array to manual code -> HTML Help Code conversion
// Code list: http://www.helpware.net/htmlhelp/hh_info.htm
$LANGUAGES = array(
- "tw" => "0x404 Traditional Chinese",
- "cs" => "0x405 Czech",
- "de" => "0x407 German (Germany)",
- "en" => "0x809 Enlish (United Kingdom)",
- "es" => "0xc0a Spanish (International Sort)",
- "fr" => "0x40c French (France)",
- "hu" => "0x40e Hungarian",
- "it" => "0x410 Italian (Italy)",
- "ja" => "0x411 Japanese",
- "kr" => "0x412 Korean",
- "nl" => "0x413 Dutch (Netherlands)",
- "pt_BR" => "0x416 Portuguese (Brazil)",
- "zh" => "0x804 Simplified Chinese"
+ "tw" => array(
+ "langcode" => "0x404 Traditional Chinese",
+ "preferred_charset" => "CP950",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "cs" => array(
+ "langcode" => "0x405 Czech",
+ "preferred_charset" => "Windows-1250",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "de" => array(
+ "langcode" => "0x407 German (Germany)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "en" => array(
+ "langcode" => "0x809 English (United Kingdom)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "es" => array(
+ "langcode" => "0xc0a Spanish (International Sort)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "fr" => array(
+ "langcode" => "0x40c French (France)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "hu" => array(
+ "langcode" => "0x40e Hungarian",
+ "preferred_charset" => "Windows-1250",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "it" => array(
+ "langcode" => "0x410 Italian (Italy)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "ja" => array(
+ "langcode" => "0x411 Japanese",
+ "preferred_charset" => "CP932",
+ "preferred_font" => "MS P Gothic,10,0"
+ ),
+ "kr" => array(
+ "langcode" => "0x412 Korean",
+ "preferred_charset" => "CP949",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "nl" => array(
+ "langcode" => "0x413 Dutch (Netherlands)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "pt_BR" => array(
+ "langcode" => "0x416 Portuguese (Brazil)",
+ "preferred_charset" => "Windows-1252",
+ "preferred_font" => $DEFAULT_FONT
+ ),
+ "zh" => array(
+ "langcode" => "0x804 Simplified Chinese",
+ "preferred_charset" => "CP936",
+ "preferred_font" => $DEFAULT_FONT
+ )
);
// Files on the top level of the TOC
@@ -46,6 +100,18 @@
"appendixes.html"
);
+// backwards compatibility
+if (!function_exists("file_get_contents")) {
+ function file_get_contents($file)
+ {
+ $cnt = file($file);
+ if ($cnt !== false) {
+ return join('', $cnt);
+ }
+ return false;
+ }
+}
+
// Header for index and toc
$HEADER = '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
<html>
@@ -72,8 +138,8 @@
$index = fopen("php_manual_$LANGUAGE.hhk", "w");
// Write out file headers
- fputs($toc, $HEADER);
- fputs($index, $HEADER);
+ fputs_wrapper($toc, $HEADER);
+ fputs_wrapper($index, $HEADER);
// Read original index file and drop out newlines
$indexline = oneLiner("$HTML_PATH/$INDEX_IN_HTML");
@@ -85,7 +151,7 @@
if ($FIRST_PAGE != $INDEX_IN_HTML) {
// Find the name of the Table of Contents
- preg_match('|CLASS=\"TOC\" ><DL ><DT ><B >(.*)</B|U', $indexline, $match);
+ preg_match('|CLASS=\"TOC\" *><DL *><DT *><B *>(.*)</B|U', $indexline, $match);
if (empty($match[1])) { // Fallback
$match[1] = "Table of Contents";
}
@@ -94,40 +160,43 @@
}
// Find the name of the Preface
- preg_match('|<A HREF="preface.html" >(.*)</A >|U', $indexline, $match);
+ preg_match('|<A +HREF="preface.html" *>([^<]*)</A *>|U', $indexline, $match);
if (empty($match[1])) { // Fallback
$match[1] = "Preface";
}
mapAndIndex($match[1], "preface.html", " ", $toc, $index);
// Now autofind the main pages
+
$MAIN_REGEXP = join("|", $MAIN_FILES);
- preg_match_all("![IVX]+\. <A HREF=\"($MAIN_REGEXP)\" >(.+)</A >(.+)(?=[IVX]+\. <A
HREF=\"($MAIN_REGEXP)|</DT ></DL ></DD ></DL)!U", $indexline, $matches,
PREG_SET_ORDER);
+
+
+preg_match_all("![IVX]+[^<]*<A\\s+HREF=\"($MAIN_REGEXP)\"\\s*>([^<]+)</A\\s*>(.+)</DT\\s*></DL\\s*></DD\\s*><DT\\s*>!Ui",
+ $indexline, $matches, PREG_SET_ORDER);
// Go through the main files, and link in subpages
foreach ($matches as $matchinfo) {
mapAndIndex($matchinfo[2], $matchinfo[1], " ", $toc, $index);
- fputs($toc, "\n <ul>\n");
- preg_match_all("!<A HREF=\"(.+)\" >(.+)</A >!U", $matchinfo[3], $subpages,
PREG_SET_ORDER);
+ fputs_wrapper($toc, "\n <ul>\n");
+ preg_match_all("!<A\\s+HREF=\"([^\"]+)\"\\s*>([^<]*)</A\\s*>!iU",
+$matchinfo[3], $subpages, PREG_SET_ORDER);
+
foreach ($subpages as $spinfo) {
mapAndIndex($spinfo[2], $spinfo[1], " ", $toc, $index);
findDeeperLinks($spinfo[1], $toc, $index);
}
- fputs($toc, "\n </ul>\n");
+ fputs_wrapper($toc, "\n </ul>\n");
}
// Link in directly the copyright page
$copyline = oneLiner("$HTML_PATH/copyright.html");
- preg_match('|<A NAME="copyright" ></A ><P ><B >(.*)</B|U', $copyline, $match);
+ preg_match('|<A\\s+NAME="copyright"\\s*></A\\s*><P\\s*><B\\s*>([^<]*)</B|U',
+$copyline, $match);
if (empty($match[1])) { // Fallback
$match[1] = "Copyright";
}
mapAndIndex($match[1], "copyright.html", " ", $toc, $index, 17);
// Write out closing line, and end files
- fputs($index, " </ul>\n</body>\n</html>");
- fputs($toc, " </ul>\n</body>\n</html>");
+ fputs_wrapper($index, " </ul>\n</body>\n</html>");
+ fputs_wrapper($toc, " </ul>\n</body>\n</html>");
fclose($index);
fclose($toc);
} // makeContentfiles() function end
@@ -148,41 +217,41 @@
// Start writing the project file
$project = fopen("php_manual_$LANGUAGE.hhp", "w");
- fputs($project, "[OPTIONS]\n");
- fputs($project, "Compatibility=1.1 or later\n");
- fputs($project, "Compiled file=php_manual_$LANGUAGE.chm\n");
- fputs($project, "Contents file=php_manual_$LANGUAGE.hhc\n");
- fputs($project, "Index file=php_manual_$LANGUAGE.hhk\n");
- fputs($project, "Default Font=Arial,10,0\n");
- fputs($project, "Default Window=phpdoc\n");
- fputs($project, "Default topic=$FANCY_PATH\\$FIRST_PAGE\n");
- fputs($project, "Display compile progress=Yes\n");
- fputs($project, "Full-text search=Yes\n");
+ fputs_wrapper($project, "[OPTIONS]\n");
+ fputs_wrapper($project, "Compatibility=1.1 or later\n");
+ fputs_wrapper($project, "Compiled file=php_manual_$LANGUAGE.chm\n");
+ fputs_wrapper($project, "Contents file=php_manual_$LANGUAGE.hhc\n");
+ fputs_wrapper($project, "Index file=php_manual_$LANGUAGE.hhk\n");
+ fputs_wrapper($project, "Default
+Font={$LANGUAGES[$LANGUAGE]['preferred_font']}\n");
+ fputs_wrapper($project, "Default Window=phpdoc\n");
+ fputs_wrapper($project, "Default topic=$FANCY_PATH\\$FIRST_PAGE\n");
+ fputs_wrapper($project, "Display compile progress=Yes\n");
+ fputs_wrapper($project, "Full-text search=Yes\n");
// Get the proper language code from the array
- fputs($project, "Language=" . $LANGUAGES[$LANGUAGE] . "\n");
+ fputs_wrapper($project, "Language={$LANGUAGES[$LANGUAGE]["langcode"]}\n");
// Now try to find out how the manual named in the actual language
// this must be in the index.html file as the title (DSSSL generated)
$content = oneLiner("$HTML_PATH/$INDEX_IN_HTML");
- if (preg_match("|<TITLE >(.*)</TITLE >|U", $content, $found)) {
+ if (preg_match("|<TITLE\s*>([^<]*)</TITLE\s*>|U", $content, $found)) {
$MANUAL_TITLE = $found[1];
} else { // Fallback
$MANUAL_TITLE = "PHP Manual";
}
- fputs($project, "Title=$MANUAL_TITLE\n");
+ fputs_wrapper($project, "Title=$MANUAL_TITLE\n");
// Define the phpdoc window style (adds more functionality)
- fputs($project,
"\n[WINDOWS]\nphpdoc=\"$MANUAL_TITLE\",\"php_manual_$LANGUAGE.hhc\",\"php_manual_$LANGUAGE.hhk\","
.
+ fputs_wrapper($project,
+"\n[WINDOWS]\nphpdoc=\"$MANUAL_TITLE\",\"php_manual_$LANGUAGE.hhc\",\"php_manual_$LANGUAGE.hhk\","
+ .
"\"$FANCY_PATH\\$FIRST_PAGE\",\"$FANCY_PATH\\$FIRST_PAGE\",,,,,0x23520,,0x386e,,,,,,,,0\n");
// Write out all the filenames as in FANCY_PATH
- fputs($project, "\n[FILES]\n");
+ fputs_wrapper($project, "\n[FILES]\n");
$handle = opendir($FANCY_PATH);
while (false !== ($file = readdir($handle))) {
if ($file != "." && $file != "..") {
- fputs($project, "$FANCY_PATH\\$file\n");
+ fputs_wrapper($project, "$FANCY_PATH\\$file\n");
}
}
closedir($handle);
@@ -195,18 +264,18 @@
global $FANCY_PATH;
$name = str_replace('"', '"', $name);
- fputs($toc, "
+ fputs_wrapper($toc, "
$tabs<li><object type=\"text/sitemap\">
$tabs <param name=\"Name\" value=\"$name\">
$tabs <param name=\"Local\" value=\"$FANCY_PATH\\$local\">
");
if ($imgnum != "auto") {
- fputs($toc, "$tabs <param name=\"ImageNumber\" value=\"$imgnum\">\n");
+ fputs_wrapper($toc, "$tabs <param name=\"ImageNumber\"
+value=\"$imgnum\">\n");
}
- fputs($toc, "$tabs </object>\n");
+ fputs_wrapper($toc, "$tabs </object>\n");
- fputs($index, "
+ fputs_wrapper($index, "
<li><object type=\"text/sitemap\">
<param name=\"Local\" value=\"$FANCY_PATH\\$local\">
<param name=\"Name\" value=\"$name\">
@@ -223,10 +292,10 @@
$contents = oneLiner("$HTML_PATH/$filename");
// Find all sublinks
- if
(preg_match_all("!<DT\s+><A\s+HREF=\"(([\w\.-]+\.)+html)(\#[\w\.-]+)?\"\s+>(.*)</A\s+>!U",
$contents, $matches, PREG_SET_ORDER)) {
+ if
+(preg_match_all("!<DT\\s*><A\\s+HREF=\"(([\\w\\.-]+\\.)+html)(\\#[\\w\\.-]+)?\"\\s*>([^<]*)</A\\s*>!U",
+ $contents, $matches, PREG_SET_ORDER)) {
// Print out the file informations for all the links
- fputs($toc, "\n <ul>");
+ fputs_wrapper($toc, "\n <ul>");
foreach ($matches as $onematch) {
$param["html"] = $onematch[1];
if (!empty($onematch[3])) {
@@ -235,7 +304,7 @@
$param["title"] = strip_tags($onematch[4]);
mapAndIndex($param["title"], $param["html"], " ", $toc, $index);
}
- fputs($toc, " </ul>\n");
+ fputs_wrapper($toc, " </ul>\n");
} else {
@@ -250,11 +319,61 @@
} // findDeeperLinks() function end
+function fputs_wrapper($fp, $str)
+{
+ fputs($fp, convertCharset($str));
+}
// Return a file joined on one line
function oneLiner($filename)
{
- return preg_replace("/[\r|\n]{1,2}/", " ", join("", file($filename)));
+ global $INTERNAL_CHARSET;
+
+ $buf = preg_replace("/[\r|\n]{1,2}/U", " ", file_get_contents($filename));
+ $charset = detectDocumentCharset($buf);
+
+ if ($charset === false) $charset = "UTF-8";
+
+ if ($charset != $INTERNAL_CHARSET) {
+ if (function_exists("iconv")) {
+ $buf = iconv($charset, $INTERNAL_CHARSET, $buf);
+ } elseif (function_exists("mb_convert_encoding")) {
+ $buf = mb_convert_encoding($buf, $INTERNAL_CHARSET, $charset);
+ } elseif (preg_match("/^UTF-?8$/i", $INTERNAL_CHARSET) &&
+preg_match("/^(ISO-8859-1|WINDOWS-1252)$/i", $charset)) {
+ $buf = utf8_encode($buf);
+ } else {
+ die("charset conversion function is not available.");
+ }
+ }
+ return $buf;
+}
+
+function convertCharset($buf)
+{
+ global $LANGUAGE, $LANGUAGES, $INTERNAL_CHARSET;
+
+ $charset = $LANGUAGES[$LANGUAGE]['preferred_charset'];
+
+ if ($charset != $INTERNAL_CHARSET) {
+ if (function_exists("iconv")) {
+ $buf = iconv($INTERNAL_CHARSET, $charset, $buf);
+ } elseif (function_exists("mb_convert_encoding")) {
+ $buf = mb_convert_encoding($buf, $charset, $INTERNAL_CHARSET);
+ } elseif (preg_match("/^UTF-?8$/i", $INTERNAL_CHARSET) &&
+preg_match("/^(ISO-8859-1|WINDOWS-1252)$/i", $charset)) {
+ $buf = utf8_decode($buf);
+ } else {
+ die("$LANGUAGE locale is not supported.");
+ }
+ }
+ return $buf;
} // oneLiner() function end
+// Returns the name of character set in the given document
+function detectDocumentCharset($doc)
+{
+ if
+(preg_match("/<META\\s+HTTP-EQUIV=\"CONTENT-TYPE\"\\s+CONTENT=\"TEXT\\/HTML;\\s+CHARSET=([\\w\\d-]*)\"\\s*>/iU",
+ $doc, $reg)) {
+ return $reg[1];
+ }
+ return false;
+}
?>
--
PHP Documentation Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php