Am Donnerstag, 23. Mai 2002 06:50 schrieb Denis B. Roegel:
> \begin{itemize} by \startitemize, etc. Many of these things could
> be automated, and I wonder if someone already wrote a script for that?

Try the attached one. It's everything other than sophisticated or complete, 
but it does some nasty work.

Gr��lis vom Hraban!
-- 
http://www.fiee.net/texnique/
---
#!/usr/bin/perl -w

print "\nThis is HTML2ConTeXt. Version 2002-05-15\n";
print "I'll try to convert your HTML file for ConTeXt.\n";
print "copyleft Henning Hraban Ramm, http://www.fiee.net/texnique/\n\n";;

unless ($ARGV[0]) {die "You must name a file to convert! $!"};
my $HTMLDatei = $ARGV[0];
unless (-T $HTMLDatei) {
	print "$HTMLDatei not found!\n";
	if (-T $HTMLDatei.'.htm')  { $HTMLDatei .= ".htm"; }
	if (-T $HTMLDatei.'.html') { $HTMLDatei .= ".html"; }
} # unless
$HTMLDatei =~ s/\\/\//g;

my $Table="n";
my $Encod="win";

open (QUELLE, $HTMLDatei) or die "Can't open $HTMLDatei! $!";

my $TeXDatei = $HTMLDatei;
$TeXDatei =~ s/\.htm$/\.tex/i;
print $TeXDatei."\n";
open (ZIEL, ">".$TeXDatei) or die "Can't make $TeXDatei! $!";

while (<QUELLE>) {
# single entities and chars
	s�&(.)uml;�\\\"$1�g;
	s�&(.)acute;�\\�$1�g;
	s�&(.)grave;�\\`$1�g;
	s�&(.)circ;�\\^$1�g;
	s�&(.)ring;�\\�$1�g;
if ($Encod eq "win") {
	s�&szlig;�ߧg;
} else {
	s�&szlig;�\\ss{}�g;
} # if Encoding
	s�&(\#150|endash);�--�g;	# endash
	s� - � -- �g;			# endash
	s�&nbsp;�~�g;	# non breaking space
	s�&quot;([^<>]*)&quot;�\\quotation{$1}�g;
	s�&(r|l)aquo;([^<>]*)&(l|r)aquo;�\\quotation{$2}�g;
	s�&\#132;([^<>]*)&quot;�\\quotation{$1}�g;
	s�&\#132;([^<>]*)$�\\quotation{$1�g; # uncompleted line
	s�\s(&quot;|\")� \\quotation{�g;	# begin quote
	s�&quot;\s�} �g;	# end quote
	s�&quot;�\"�g;		# quote
	s�([^\\=\s])\"�$1}�g;	# end quote
	s�%�|~|\\%{}�g;	# percent
	s�&lt;�<�g;
	s�&gt;�>�g;
	s�&amp;�\&�g;
	s�&sup(.);�^$1�g;
	s�&frac(.)(.);�\\frac{$1}{$2}�g;
	s�&\#133;� �g;
#	s���g;
#	s���g;

# TeX words and marks
	s�T<SUB>E</SUB>X�TeX�g;
	s�pdfTeX�\\pdfTeX{}�gi;
	s�ppchTeX�\\pdfTeX{}�gi;
	s�ConTeXt�\\ConTeXt{}�g;
	s�CONTEXT�\\ConTeXt{}�g;
	s�(\s)TeX�$1\\TeX{}�g;

# environments
	s�<BODY[^<>]*>�\\starttext�gi;
	s�</BODY>�\\stoptext�gi;
	s�(<BLOCKQUOTE>|<QUOTE>)�\\startquotation�gi;
	s�(<\/BLOCKQUOTE>|<\/QUOTE>)�\\stopquotation�gi;
	s�</*DIV[^<>]*>��gi;	# delete all divs
	s�</*FONT[^<>]*(>|$)��gi;	# delete all font tags

# Headers
	s�<H1>�\\chapter{�gi;
	s�<H2>�\\section{�gi;
	s�<H3>�\\subsection{�gi;
	s�<H4>�\\subsubsection{�gi;
	s�</H.>�}�gi;

# Links
	s�<A\s(.*)HREF=\"(.*)\">(.*)</A>�\\goto{$3}[URL($2)]�gi;
	s�<A\s(.*)NAME=\"(.*)\">(.*)</A>�\\reference[$2]{$3}�gi;

# Tables
if ($Table eq "y") {
	s�<TABLE([^<>]*)>�\\bTABLE \%$1 �gi;
	s�</TABLE>�\\eTABLE�gi;
	s�</TD>�\\eTD �gi;
	s�<TD([^<>]*)>�\\bTD �gi;
	s�</TR>�\\eTR �gi;
	s�<TR([^<>]*)>�\\bTR �gi;
} else {
	s�</*T(ABLE|D|R|BODY)[^<>]*>��gi;	# delete all table tags
} # if Table

# Images
	s�<IMG\s([^<>]*)>�\\externalfigure[$1]�gi;
	s�<IMG\s([^"=]*)src=\"([^<>]*)\"([^<>]*)$�\\externalfigure[$2]\t\% $1 $3�gi;

# Lists
	s�<UL>�\\startitemize\[1\]�gi;
	s�<OL>�\\startitemize\[n\]�gi;
	s�<DL>�\\startitemize\[1\]�gi; # ?
	s�</.L>�\\stopitemize�gi;
	s�<LI>�\\item �gi;
	s�<DT>�\\item �gi; #
	s�<DD>�\\item �gi; #
	s�</LI>��gi;


	s�<P[^<>]*>��gi;
#	s�</P>�\\par�gi;
	s�</P>�\n\n�gi;
	s�<BR[^<>]*>�\n�gi;
	s�<HR[^<>]*>�\\blank �gi;

	s�<(PRE|TT|CODE)>�\\type{�gi;
	s�<(STRONG|B)>�{\\bf �gi;
	s�<(EM|I|U)>�{\\em �gi;

	s�^</([^\s]*)>$�\\stop$1�gi;
	s�^<([^\s]*)([^<>]*)>$�\\start$1\[$2\]�gi;
	s�</.*>�}�gi; # all other closing tags become }
	s�<([^\s]*)(\s)(.*)>�\\$1\[$3\]\{�gi; # all other opening tags become {
	s�<([^\s]*)>�\\$1\{�gi; # all other opening tags become {

	s�^\s*��g;	# remove trailing spaces

	print ZIEL;
	print ".";
} # while
print "\n";

close (ZIEL);
close (QUELLE);


# \goto{text}[URL(Link)]
#!/usr/bin/perl -w

print "\nThis is LaTeX2ConTeXt. Version 2002-03-13\n";
print "I'll try to convert your LaTeX file for ConTeXt.\n";
print "copyleft Henning Hraban Ramm, http://www.fiee.net/texnique/\n\n";;

unless ($ARGV[0]) {die "You must name a file to convert! $!"};
my $LaTeXDatei = $ARGV[0];
unless (-T $LaTeXDatei) {
	print "$LaTeXDatei not found!\n";
	if (-T $LaTeXDatei.'.tex')  { $LaTeXDatei .= ".tex"; }
} # unless
$LaTeXDatei =~ s/\\/\//g;
rename ($LaTeXDatei, $LaTeXDatei.".bak");

open (QUELLE, $LaTeXDatei.".bak") or die "Can't open $LaTeXDatei! $!";

my $ConTeXtDatei = $LaTeXDatei;
open (ZIEL, ">".$ConTeXtDatei) or die "Can't make $ConTeXtDatei! $!";

# Header (should be an extra file)
print ZIEL "% interface=english output=pdftex\n";
print ZIEL "%D this \\ConTeXt\\ file is automatically converted from \\LaTeX.\n";
print ZIEL "\\enableregime[windows]\n";
print ZIEL "\\mainlanguage[de]\n";
print ZIEL "\\setupencoding[default=texnansi]\n";
print ZIEL "\n";

while (<QUELLE>) {
# disable LaTeX header and some other commands
	s�^%!�%%% �;
	s�^(|\s)(\\document)(style|class)�%%% $2$3�g;
	s�^(|\s)(\\usepackage|\\setlength|\\addtolength|\\normalfont)�%%% $2�g;
#	s�^(|\s)(\\addtolength)�%%% $2�g;

	unless (m/^%/) { # if no comment line
# raw convert environments
		s�\\begin{([^{}]*)}�\\start$1 �g;	# \begin{environment} --> \startenvironment
		s�\\end{([^{}]*)}�\\stop$1 �g;	# \end{environment} --> \stopenvironment
		s�\\begingroup�\\bgroup�g;
		s�\\endgroup�\\egroup�g;
		s�\\include(\s*){([^{}<>\[\]]*)}�\\input $2 �g;	# \include{} --> \input

# convert single commands
		s�\\newcommand�\\def�g;	# \newcommand --> \def
		s�\\new(environment)(\s*){([^{}]*)}�\\define$1\[$3\]�g;	# \newenvironment{} --> \defineenvironment[]
		s�\\label�\\reference�g;	# \label --> \reference
		s�\\\\\*�\\\\�g;	# \\* --> \\
		s�\\(big|small)skip�\\blank\[$1\]�g;	# \...skip --> \blank[...]
		s�\"`(.*)\"'�\\quote{$1}�g;
		s�``(.*)''�\\quotation{$1}�g;
		s�\\select(language){([^{}]*)}�\\$1\[$2\]�g;	# \selectlanguage{lang} --> \language[lang]
		s�\\new(page)�\\$1�g;	# \newpage --> \page
		s�\\(|this)pagestyle{empty}�\\setupheaders[state=stop]�g;
		s�\\(|this)pagestyle{([^{}]*)}�\\setupheaders[state=start] \% $2�g;
		s�\\areaset{([^{}]*)}{([^{}]*)}�\\setuplayout[width=$1, height=$2]�g;

#		s�LaTeX�ConTeXt�gi;

# correct environments
		s�\\(start|stop)document�\\$1text�g;
		s�(\\startitemize)�$1\[1\]�g;
		s�\\(start|stop)enumeration�\\$1itemize[n]�g;
		s�\\(start|stop)(verbatim|alltt)�\\$1typing�g;
		s�\\(start|stop)verse�\\$1lines�g;
		s�\\(start|stop)center�\\$1centered�g;
		s�\\(start|stop)tabular(x*)�\\$1tabulate�g;
		s�\\(start|stop)long(table)�\\$1$2�g;
		s�\\(start|stop)multicols(\s*){(0-9)}�\\$1columns[n=$3]�g;
		s�\\twocolumn�\\startcolumns[n=2]�g;
		s�\\onecolumn�\\stopcolumns�g;

# correct titles
		s�(\\part)(\**)�$1�g;	# \part* --> \part
		s�\\chapter\*�\\title�g;
		s�\\(|sub)section\*�\\$1subject�g;
		s�\\paragraph�\\subsubsubsection�g;
#		s���g;
#		s���g;

# correct tables
		s�(\\starttabulate)(\s*){([^{}]*)}�$1$2\[\|$3\|\]�g;
		s�&�\\NC �g;
		s�\\hline�\\HL�g;
		s�\\(top|bottom|mid)rule�\\HL�g;

# correct typography
		s�\\verb(\||\+)([^\|]*)(\||\+)�\\type{$2}�g;
		s�\\verb�\\type�g;
		s�\\type{\\�\\tex{�g;
		s�\\texttt�\\type�g;
		s�\\emph{�{\\em �g;
		s�\\text(bf|sc|it|sl){�{\\$1 �g;
		s�\\(rm|ss|tt)family�\\switchtobodyfont\[$1\]�g;
		s�\\small �\\tfx �g;
		s�\\Small �\\tfxx �g;
		s�\\large �\\tfa �g;
		s�\\Large �\\tfb �g;
	} # unless comment line
	print ZIEL;
	print ".";
} # while
print "\n";

close (ZIEL);
close (QUELLE);

Reply via email to