Bernard Bel (2010-01-16 11:25):
> I almost completed a powerful PhpWiki to PmWki converter with many options
> for renaming pages and reorginizing the site. Notably it replaces redirection
> plugin pages with AutoLinks and creates group headers, footers and sidebars
> as instructed.
>
> For a short while you can see the old and new versions of a 500-page site
> that is converted by a single PHP procedure call.
>
> Here is the old one :
> http://wiki.naissance.asso.fr/index.php/PageAccueil
>
> and the new one :
> http://crdo.fr/cianewiki/CianeWiki/PageAccueil
>
> I still need to convert 2 out of the 3 table formats. :-/
>
> Once completed I suggest to publish the code on PmWiki.org.
I have also created a PhpWiki to PmWiki converter in Spring 2009. I don't
think that it was powerful, but it worked pretty well for my specific case
(the wiki I converted had 900 pages). I was too lazy to publish it then.
My script managed to convert OldStyle and DefinitionStyle tables, but
I believe that the conversion had some quirks. I have commented and
attached the script (hope it goes through), in case you are interested.
I haven't seen many requests for such a script in the community, but
PhpWiki has been mentioned a few times. I guess a cookbook recipe able to
convert most of PhpWiki's markup would be a very good addition and deem
useful for a few newcomers, searching through pmwiki.org for this
specific use case.
--
-- Rogutės Sparnuotos
<?php if (!defined('PmWiki')) exit();
# This is a PhpWiki to PmWiki converter.
# It was created to convert a 900-page wiki.
# The conversion was successful, but the wiki didn't use much features (and the
# markup used was rather consistent).
#
# Requirements: UTF-8 and the iconv PHP extension
#
# To use this script,
# 1. Put your exported PhpWiki files in a "phpwiki.in" directory and create
# a writable "phpwiki.out" directory for the resulting PmWiki pages.
# 2. Change the defaults of variables defined below (if needed). E.g., before
# including this script in "config.php", define the default group for
converted
# pages and specifiy an additional markup conversion:
# $OutGroup = 'Main';
# $PWC_MarkupConversions=array('/wolf/'=>'lupus');
# 3. Include this script in "config.php" and execute your wiki with
"?action=phpwikic":
# include_once("$FarmD/cookbook/phpwikiconv.php");
#
# Notes:
# 1. Files in $OutWikiDir will be overwritten!
# 2. WikiWords will be disabled.
# 3. No PhpWiki plugins are converted (extend the $PWC_MarkupConversions array
# to convert them).
# 4. Only Old Style and Definition Style tables are supported.
# 5. Definition style table conversion might be quirky. Moreover, tables within
# tables and spanning is not supported for these.
# 6. There are 3 arrays that control the behaviour of markup conversion:
# * main conversion happens for markup specified in $PWC_MarkupConversions,
# * $PWC_MarkupConversionsPre might be used for things requiring a 2 step
conversion,
# * $PWC_MarkupConversionsPost might be used to change the resulting markup
# (MarkupRestore() is called before doing these)
# 7. One can create a $PWC_MarkupTestString variable and put some PhpWiki
markup in
# it, and run PmWiki with ?action=phpwikit to test markup conversion.
SDV($InWikiDir,'phpwiki.in');
SDV($OutWikiDir,'phpwiki.out');
SDV($InEncoding,'windows-1252');
SDV($OutEncoding,'utf-8');
# If the PmWiki function MatchPageNames will match the page patterns given by
# SkipPageFilenames, the conversion function will skip them.
SDV($SkipPageFilenames,'PhpWiki*');
# Converted pages will be put to this group
# (except the exceptions defined in $PWC_PageNameConversions)
SDV($OutGroup,'PhpWiki');
# A facility to rename pages, or put them to a different group than the
# $OutGroup
SDVA($PWC_PageNameConversions, array(
# example: '/^(John|Judas)$/' => 'Profiles.$1'
));
SDV($PHPWikiWikiWordPattern,
'(?<![[:alnum:]])(?:[[:upper:]][[:lower:]]+){2,}(?![[:alnum:]])');
SDV($PWC_MarkupConversionsPre, array());
SDVA($PWC_MarkupConversions, array(
## Anchors:
## #[foo]: An anchor around the text "foo" with id "foo"
'/(?>#\[([A-Za-z][-\w:.]*)\])/e' => "Keep('$1[[#$1]]')"
## #[|foo]: An empty anchor with id "foo"
## #[howdy|foo]: An anchor around the text "howdy" with id "foo"
,'/(?>#\[([^\|\]]*)\|([A-Za-z][-\w:.]+)\])/e' => "Keep('$1'=='' ? '[[#$2]]' :
'$1[[#$2]]')"
## image links [http://site.com/xxx.jpg|bla bla]
,"#t?\[\ *((?:https?|ftp)://(?:[^\s$UrlExcludeChars]*))\.(jpg|png|gif)\
*(?:\\n\|{1,2} | \|{1,2}\\n | \| |) ([^\[\]|\\n]*) \]#xie" =>
"pwc_convertImageLinks('$1', '$2', '$3')"
## apply styling to link text before Keep()
#,'!\[\ *_([^\|\]\n_]+)_!' => "[''$1''" # emphasis, pmwiki
,'!\[\ *_([^\|\]\n_]+)_!' => "[//$1//" # emphasis, creole
##,'!\[\ *\*([^\|\]\n*]+)\*!' => "['''$1'''" # strong, pmwiki
,'!\[\ *\*([^\|\]\n*]+)\*!' => "[**$1**" # strong, creole
## links [bla bla | local/url link]
,'!\[ ([^\|\]\n]+) \| \s* ([^\[\]]+) \]!xe' => "Keep('[[$2 | $1]]')"
## [free links]
,'!(?>\[\s*(.+?)\ *\])!e' => "Keep('[[$1]]')"
## bare links
,"#\b(?:https?|ftp)://[^\s$UrlExcludeChars]*[^\s.,?!$UrlExcludeChars]#e" =>
"Keep('$0')"
## old style tables
,'!<\?plugin OldStyleTable\s*(.+?)\?>!se' => "pwc_OldStyleTableConvert('$1',
\$pagename)"
## get rid of WikiWords
#,"/(~*)($PHPWikiWikiWordPattern)/e" => "('$1'==''||strlen('$1')%2==0) ?
Keep('[[$2]]') : '$2'"
,"/(~*)($PHPWikiWikiWordPattern)/e" => "('$1'==''||strlen('$1')%2==0) ?
'[['.AsSpaced('$2').']]' : '$2'"
## get rid of WikiWord escaping ~
,'/~~/' => '~'
## bold
,'!\*(.*?)\*!' => "'''$1'''" # pmwiki
,'!<b>(.*?)</b>!' => "'''$1'''" # pmwiki
#,'!\*(.*?)\*!' => "**$1**" # creole
#,'!<b>(.*?)</b>!' => "**$1**" # creole
## italics
,'!_(.*?)_!' => "''$1''" # pmwiki
,'!<i>(.*?)</i>!' => "''$1''" # pmwiki
#,'!_(.*?)_!' => "//$1//" # creole
#,'!<i>(.*?)</i>!' => "//$1//" # creole
## bold italics
,'!(?:_\*|\*_)(.+?)(?:_\*|\*_)!' => "'''''$1'''''" # pmwiki
#,'!(?:_\*|\*_)(.+?)(?:_\*|\*_)!' => "//**$1**//" # creole
## fixed width
,'!=(.+?)=!' => '@@\1@@'
## headings
,'/^(!{1,3})\s?(.*)$/me' => "'$1'=='!' ? '!!! $2' : ('$1'=='!!' ? '!!! $2' :
'!! $2' )"
## line breaks
,'/(?<!%)%%%(?!%)|<br>/' => "\\\\\\\\\n"
## Definition list style tables
,'/(^[^|\n]*\| *(?:\n(?: *\n)*(?: *[^|\n ][^|\n]*\|?|[^|\n]*\|) *)+)/me' =>
"pwc_DefStyleTableConvert('$1', \$pagename)"
));
SDV($PWC_MarkupConversionsPost, array());
SDV($PHPWikiConvertAction,'phpwikic');
SDV($PHPWikiMarkupTestAction,'phpwikit');
SDV($HandleActions[$PHPWikiConvertAction],'HandlePHPWikiMigrate');
SDV($HandleActions[$PHPWikiMarkupTestAction],'HandlePHPWikiMarkupTest');
if (!is_dir($OutWikiDir) || !is_writable($OutWikiDir)) {
echo "OutWikiDir ($OutWikiDir) must be a writable directory!";
exit;
}
array_unshift($WikiLibDirs, new PageStore($OutWikiDir.'/{$FullName}', 1));
$PWC_MarkupConversions =
array_diff_key($PWC_MarkupConversions,$PWC_MarkupConversionsPre,$PWC_MarkupConversionsPost);
function HandlePHPWikiMigrate($pagename) {
global $FarmD
, $InWikiDir, $OutWikiDir
, $InEncoding, $OutEncoding
, $OutGroup
, $PWC_MarkupConversionsPre, $PWC_MarkupConversions,
$PWC_MarkupConversionsPost
, $PWC_PageNameConversions
, $SkipPageFilenames;
header('Content-Type: text/html; charset=UTF-8');
iconv_set_encoding('internal_encoding', 'utf-8');
include_once($FarmD.'/scripts/xlpage-utf-8.php');
$infiles = glob("$InWikiDir/*");
if (empty($infiles)) {
echo "No files found for $InWikiDir/*\n";
exit;
}
foreach ($infiles as $k=>$f) {
unset($infiles[$k]);
$infiles[basename($f)] = $f;
}
$skippages = MatchPageNames(array_keys($infiles), $SkipPageFilenames);
$converted = 0;
$skipped = 0;
foreach ($infiles as $pagefile=>$pagepath) {
if (in_array($pagefile, $skippages)) {
$skipped++;
pwc_show("match with SkipPageFilenames...", 'skip', $pagefile);
continue;
}
pwc_show('Processing file <span style="color:
blue;">'.$pagepath."</span>...");
if (!$inpagestr=file_get_contents($pagepath)) {
pwc_show("couldn't read it!", 'error', $pagepath); continue;
}
$pagefile=basename($pagepath);
$inpagestr=iconv($InEncoding, $OutEncoding, $inpagestr);
$inpagestr=str_replace("\r", '', $inpagestr);
$sep=strpos($inpagestr, "\n\n");
$pageattr=substr($inpagestr, 0, $sep);
$pagetext=substr($inpagestr, $sep+2);
if (empty($pageattr) || empty($pagetext)) {
pwc_show("couldn't separate headers from text!", 'error', $pagefile);
continue;
}
if (substr($pageattr, -33)!="Content-Transfer-Encoding: binary") {
pwc_show("page attributes ended with an unexpected string", 'error',
$pagefile); continue;
}
$mime=iconv_mime_decode_headers($pageattr);
if (isset($mime['Content-Type'])) {
$mime=$mime['Content-Type'];
$mime=iconv($InEncoding, $OutEncoding, urldecode($mime));
$mime=explode(';', $mime);
$page = array();
$pagename='';
foreach ($mime as $m) {
if (!$m=explode('=', trim($m), 2)) continue;
if (count($m)!=2) continue;
list ($k, $v) = $m;
switch ($k) {
case 'pagename':
# PmWiki wants pagenames to start with lowercase letters and have no
spaces nor slashes...
$pn = $pn0 = urldecode($v);
if (!empty($PWC_PageNameConversions)) {
$pn = preg_replace(array_keys($PWC_PageNameConversions),
array_values($PWC_PageNameConversions), $pn, -1, $rcount);
if ($rcount > 0) pwc_show("$pn0 has been renamed to $pn", 'rename',
$pagefile);
}
else
$pn = "$OutGroup.$pn";
$pagename=MakePageName("$OutGroup.$OutGroup", $pn);
if (empty($pagename)) pwc_show("couldn't convert pagename $pn",
'error', $pagefile);
else pwc_show("\tpagename: $pagename");
break;
case 'flags':
if ($v!='""' && $v!='PAGE_LOCKED')
pwc_show("unknown flags found: $v ", 'error', $pagefile);
break;
case 'author':
if (trim($v)=='The PhpWiki programming team') {
$skipped++;
pwc_show("author string indicates this an untouched sys page...",
'skip', $pagefile); continue 3;
}
$page['author']=$v; pwc_show("\tauthor: $v ");
break;
case 'version': $page['rev']=$v; break;
case 'lastmodified': $page['time']=$v; break;
case 'created': $page['ctime']=$v; break;
case 'author_id':
if (long2ip(ip2long($v)==$v)) $page['host']=$v;
elseif ($page['author']!=$v) pwc_show("author_id!=author
($v!=$page[author])", 'error', $pagefile);
break;
case 'markup': if ($v!=2) pwc_show("markup!=2", 'error', $pagefile);
break;
case 'summary': $page['csum']=$v; break;
case 'charset':
$page['charset'] = 'UTF-8';
if ($v!='iso-8859-1') pwc_show("charset!=iso-8859-1 ($v)", 'error',
$pagefile);
break;
case 'hits': case 'acl':
break;
default: pwc_show("unknown key: $k ($v)", 'error', $pagefile);
}
}
if (empty($pagename)) {
pwc_show("couldn't find pagename attribute!", 'error', $pagefile);
continue;
}
if (empty($page['author'])) {
pwc_show("couldn't find author attribute!", 'error', $pagefile);
continue;
}
}
else {
pwc_show("couldn't process page headers!", 'error', $pagefile); continue;
}
if (!empty($PWC_MarkupConversionsPre))
$pagetext=preg_replace(array_keys($PWC_MarkupConversionsPre),
array_values($PWC_MarkupConversionsPre), $pagetext);
$pagetext=preg_replace(array_keys($PWC_MarkupConversions),
array_values($PWC_MarkupConversions), $pagetext);
$pagetext=MarkupRestore($pagetext);
if (!empty($PWC_MarkupConversionsPost))
$pagetext=preg_replace(array_keys($PWC_MarkupConversionsPost),
array_values($PWC_MarkupConversionsPost), $pagetext);
$page['text'] = $pagetext;
$page['name'] = $pagename;
ksort($page);
$page=array_merge(array('version'=>'pmwiki-2.2.0 ordered=1 urlencoded=1'),
$page);
if (!pwc_write($pagename, $page))
pwc_show("couldn't save the converted page", 'error', $pagefile);
else
$converted++;
}
$missing = count($infiles)-$skipped-$converted;
pwc_show("total files considered: ".count($infiles), '');
pwc_show("total converted: $converted", '');
pwc_show('', 'errors');
pwc_show('', 'skips');
pwc_show('', 'renames');
pwc_show('', 'infos');
exit;
}
## Formats and display messages
function pwc_show($msg, $type='info', $context='') {
static $errors=array(), $infos=array(), $skips=array(), $renames=array();
switch ($type) {
case 'info':
$infos[]=empty($context)?"$msg\n":"$msg ($context)\n";
break;
case 'error':
$msg = "<span style='color:red;'>$msg</span>";
if (!empty($context)) $msg="$msg ($context)";
$errors[]="$msg\n";
break;
case 'skip': case 'rename':
${$type.'s'}[]=empty($context)?"$msg\n":"$msg ($context)\n";
break;
case 'errors': case 'infos': case 'skips': case 'renames':
$count=count(${$type});
echo '<pre>';
echo "\n$type ($count lines)\n=======================\n"
.implode('', ${$type});
echo '</pre>';
break;
default:
echo "<pre>".print_r($msg, true);
if (!empty($context)) echo '('.print_r($context, true).')';
echo "</pre>\n";
}
}
## Gets called by ?action=phpwikit (useful for testing markup conversion)
function HandlePHPWikiMarkupTest($pagename) {
global $PWC_MarkupTestString,
$PWC_MarkupConversionsPre, $PWC_MarkupConversions,
$PWC_MarkupConversionsPost;
header('Content-Type: text/html; charset=UTF-8');
$text=$PWC_MarkupTestString;
echo "<pre>before conversion:\n"
.print_r(htmlspecialchars($text),true);
if (!empty($PWC_MarkupConversionsPre))
$text=preg_replace(array_keys($PWC_MarkupConversionsPre),
array_values($PWC_MarkupConversionsPre), $text);
$text=preg_replace(array_keys($PWC_MarkupConversions),
array_values($PWC_MarkupConversions), $text);
$text=MarkupRestore($text);
if (!empty($PWC_MarkupConversionsPost))
$text=preg_replace(array_keys($PWC_MarkupConversionsPost),
array_values($PWC_MarkupConversionsPost), $text);
echo "\n\n\nafter conversion:\n";
echo htmlspecialchars($text);
echo '</pre>';
exit;
}
## Writes pages to disk
function pwc_write($pagename, $page) {
global $OutWikiDir;
$str='';
foreach ($page as $k=>$v) {
if ($k=='text') $v=str_replace(array('%', "\n", '<'), array('%25', '%0a',
'%3c'), $v);
$str.="$k=$v\n";
}
if (file_put_contents("$OutWikiDir/$pagename", $str)) return true;
return false;
}
## Gets called when converting image links
## (a hook should be added here to enable conversion to gallery recipes).
function pwc_convertImageLinks($link, $ext, $title) {
$link.=strtolower(".$ext");
# The two lines below could be used to convert some links to Mini: markup
#if (preg_match('!/phpwiki/files!', $link))
# return Keep('Mini:'.basename($l).(empty($text)?'':'"'.$text.'"'));
if (!empty($title)) $link.='"'.$title.'"';
return Keep($link);
}
## Gets called when converting old style tables
function pwc_OldStyleTableConvert($table, $pagename) {
global $OldStyleTableColPattern, $OldStyleTableColReplacement;
SDV($OldStyleTableColPattern, '/\s*\|(v+|>|<|\^|)\s*([^\|$]*)/e');
SDV($OldStyleTableColReplacement,
"'||'.(('$2'=='') ? '__' :
(
(('$1'=='>' || '$1'=='^') ? ' ' : '')
.'$2'
.(('$1'=='<' || '$1'=='^') ? ' ' :
(substr('$1',0,1)=='v'?str_repeat('+',strlen('$1')+1):''))
))"
);
$table=explode("\n", $table);
foreach($table as $k=>&$v) {
$v=trim($v);
if (empty($v)) unset($table[$k]);
}
unset($v);
$table=preg_replace($OldStyleTableColPattern, $OldStyleTableColReplacement,
$table);
return ("\n|| class=border\n".implode("||\n", $table)."||\n");
}
## Gets called then converting definition style tables
function pwc_DefStyleTableConvert($table, $pagename) {
#return
"\n<--------------------START--->\n$table\n<---END------------------------>\n";
$lines = explode("\n", $table);
foreach ($lines as $lno=>$line)
if (trim($line)=='') unset($lines[$lno]);
$lcount = count($lines);
$lines = array_values($lines);
# new row lines - all the lines not starting with space, but ending with '|'
$linespec=array();
$prev_nr=0; # line number of the previous new row
$ccol=2; # column count for the current row batch
$rows=0;
$cols=0; # table column count
foreach ($lines as $lno=>$line) {
$lilen=strlen($line);
$linespec[$lno]=array('text'=>trim($line, "| \n"));
# count the spaces (indent)
$indent = 0;
while ($line[$indent]==' ') $indent++;
$linespec[$lno]['indent']=$indent;
# gather new row lines
if ($indent==0 && $line[$lilen-1]=='|') {
$linespec[$lno]['newrow']=$lno-$prev_nr;
$prev_nr=$lno;
$rows++;
if ($cols<$ccol) $cols=$ccol;
$ccol=2;
}
# gather the new column lines
elseif ($line[$lilen-1]=='|') {
$linespec[$lno]['newcol']=1;
$ccol++;
}
}
$tablestr = '';
foreach ($linespec as $row) {
if (isset($row['newrow'])) $tablestr .= '(:cellnr';
else $tablestr .= '(:cell';
#$tablestr .= '(:'.$row['cmd'];
if (isset($row['colspan'])) $tablestr.=' colspan='.($row['colspan']+1);
if (isset($row['rowspan'])) $tablestr.=' rowspan='.($row['rowspan']+1);
$tablestr .= ':) '.$row['text']."\n";
}
return "(:table class=border:)\n$tablestr(:tableend:)";
}
# Datestamp of the original programming: Spring 2009
_______________________________________________
pmwiki-users mailing list
[email protected]
http://www.pmichaud.com/mailman/listinfo/pmwiki-users