Physikerwelt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/90748


Change subject: Breakdown of texvc
......................................................................

Breakdown of texvc

texvc had sevleral tasks in the past:
1 checking the input
2 convert mediawiki custom syntax to standard LaTeX
3 run LaTeX
4 convert dvi2png

This change provides a simplifided version that performs
only steps 1+2. This is required to avoid secuirty problems
with tools like MathJax, especially if these tools are
ran at the serverside.

Change-Id: I1650e6ec2ccefff6335fbc36bbe8ca8f59db0faa
---
A texvccheck/.gitignore
A texvccheck/Makefile
A texvccheck/README
A texvccheck/lexer.mll
A texvccheck/parser.mly
A texvccheck/test.pl
A texvccheck/tex.mli
A texvccheck/texutil.ml
A texvccheck/texvccheck.ml
A texvccheck/util.ml
10 files changed, 1,206 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Math 
refs/changes/48/90748/1

diff --git a/texvccheck/.gitignore b/texvccheck/.gitignore
new file mode 100644
index 0000000..be7dcb9
--- /dev/null
+++ b/texvccheck/.gitignore
@@ -0,0 +1,9 @@
+# Compiled source
+*.mli
+!tex.mli
+*.cmi
+*.cmx
+*.o
+
+# The binaries
+texvccheck
diff --git a/texvccheck/Makefile b/texvccheck/Makefile
new file mode 100644
index 0000000..f2eaf0c
--- /dev/null
+++ b/texvccheck/Makefile
@@ -0,0 +1,65 @@
+.PHONY: clean all
+PREFIX = /usr/local
+DESTDIR = /usr
+SOURCEDIR = $(PWD)
+INSTALL = /usr/bin/install
+OBJ= tex.cmo texutil.cmo parser.cmo lexer.cmo texvccheck.cmo \
+tex.cmx texutil.cmx parser.cmx lexer.cmx texvccheck.cmx \
+lexer.cmi parser.cmi tex.cmi texutil.cmi texvccheck.cmi \
+lexer.o parser.o tex.o texutil.o texvccheck.o \
+lexer.ml parser.ml parser.mli texvccheck texvccheck.bc util.o \
+util.cmo util.cmx util.cmi \
+
+COMMON_NATIVE_OBJ  =util.cmx parser.cmx texutil.cmx lexer.cmx
+COMMON_BYTECODE_OBJ=util.cmo parser.cmo texutil.cmo lexer.cmo
+
+all: texvccheck
+clean:
+       rm -f $(OBJ)
+
+# Native versions
+texvccheck: $(COMMON_NATIVE_OBJ) texvccheck.cmx
+       ocamlopt -o $@ unix.cmxa $^
+
+# Bytecode version
+texvccheck.bc: $(COMMON_BYTECODE_OBJ) texvccheck.cmo
+       ocamlc -o $@ unix.cma $^
+
+install: texvccheck
+       $(INSTALL) -dm777 $(DESTDIR)/bin
+       $(INSTALL) -m777 texvccheck $(DESTDIR)/bin
+
+remove:
+       rm -f $(DESTDIR)/bin/texvccheck
+
+#
+# Pattern rules
+#
+
+#  .ml source  .mli interface
+#  .cmi compiled interface
+#  .cmo object       .cma library object
+#  .cmx object file  .cmxa library object file
+%.ml: %.mll
+       ocamllex $<
+%.mli %.ml: %.mly
+       ocamlyacc $<
+%.cmo: %.ml
+       ocamlc -c $<
+%.cmx: %.ml
+       ocamlopt -c $<
+%.cmi: %.mli
+       ocamlc -c $<
+
+# Various dependencies
+
+lexer.cmo: parser.cmi tex.cmi texutil.cmi
+lexer.cmx: parser.cmx tex.cmi texutil.cmx
+parser.cmo: tex.cmi parser.cmi 
+parser.cmx: tex.cmi parser.cmi 
+parser.cmi: tex.cmi 
+texutil.cmo: parser.cmi tex.cmi util.cmo texutil.cmi
+texutil.cmx: parser.cmx tex.cmi util.cmx texutil.cmi
+texutil.cmi: parser.cmi tex.cmi
+texvccheck.cmo: lexer.cmo parser.cmi texutil.cmi util.cmo
+texvccheck.cmx: lexer.cmx parser.cmx texutil.cmx util.cmx
diff --git a/texvccheck/README b/texvccheck/README
new file mode 100644
index 0000000..fd0f041
--- /dev/null
+++ b/texvccheck/README
@@ -0,0 +1,77 @@
+== About texvc-lite ==
+
+texvc takes the user input validates it and replaces MediaWiki specific 
functions.
+Input data is parsed and scrutinized for safety.
+
+texvc was written by Tomasz Wegrzanowski for use with MediaWiki; it's
+included as part of the MediaWiki package (http://www.mediawiki.org) and is
+under the GPL license.
+
+texvc-lite, was extracted from the original texvc program in 2013 by
+Moritz Schubotz and uses only the sanitizing and custumation part.
+
+The list of all commands can be viewed by bowsing the source files.
+Most commands are listed at
+http://sdrv.ms/15w2gVw
+there is also a tool for convienet whitelisting of special commands
+that are used in local wikis.
+
+Please report bugs at: https://bugzilla.wikimedia.org/
+with "MediaWiki extensions" as product and "Math" as component.
+
+== Setup ==
+
+=== Requirements ===
+
+OCaml 3.06 or later is required to compile texvc; this can be acquired from
+http://caml.inria.fr/ if your system doesn't have it available.
+
+The makefile requires GNU make.
+
+In Ubuntu Precise, all dependencies can be installed using:
+
+  $ sudo apt-get install ocaml
+
+=== Installation ===
+
+Run 'make' (or 'gmake' if GNU make is not your default make). This should
+produce the texvc executable.
+
+By default,
+MediaWiki will search in this directory for texvc, if you moved it elsewhere,
+you'll have to modify $wgTexvc and set it to the path of the texvc executable.
+
+== Usage ==
+
+Normally texvc is called from MediaWiki's Math modules and everything
+Just Works. It can be run manually for testing or for use in another app.
+
+=== Command-line parameters ===
+
+    texvc '\TeX input string'
+
+Be sure to properly quote the TeX code!
+
+Example:
+
+    texvc "y=x+2"
+
+=== Output format ===
+
+The outout is the sanitized and customized tex string.
+
+texvc output format is like this:
+    +%s                ok 
+    S          syntax error
+    E          lexing error
+    F%s                unknown function %s
+    -          other error
+
+== Hacking ==
+
+Before you start hacking on the math package its good to know the workflow,
+which is basically:
+
+1. texvc gets called by Math/Math.body.php (check out the line begining with 
"$cmd")
+2. texvc does its magic, which is basically to check for invalid latex code.
+3. texvc takes the user input validates it and replaces MediaWiki specific 
functions
diff --git a/texvccheck/lexer.mll b/texvccheck/lexer.mll
new file mode 100644
index 0000000..1cd9579
--- /dev/null
+++ b/texvccheck/lexer.mll
@@ -0,0 +1,119 @@
+{
+    open Parser
+    open Tex
+}
+let space = [' ' '\t' '\n' '\r']
+let alpha = ['a'-'z' 'A'-'Z']
+let literal_id = ['a'-'z' 'A'-'Z']
+let literal_mn = ['0'-'9']
+let literal_uf_lt = [',' ':' ';' '?' '!' '\'']
+let delimiter_uf_lt = ['(' ')' '.']
+let literal_uf_op = ['+' '-' '*' '=']
+let delimiter_uf_op = ['/' '|']
+let boxchars  = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' 
';' '?' '.' '!' '\'' '`' ' ' '\128'-'\255']
+let aboxchars = ['0'-'9' 'a'-'z' 'A'-'Z' '+' '-' '*' ',' '=' '(' ')' ':' '/' 
';' '?' '.' '!' '\'' '`' ' ']
+let latex_function_names = "arccos" | "arcsin" | "arctan" | "arg" | "cos" | 
"cosh" | "cot" | "coth" | "csc"| "deg" | "det" | "dim" | "exp" | "gcd" | "hom" 
| "inf" | "ker" | "lg" | "lim" | "liminf" | "limsup" | "ln" | "log" | "max" | 
"min" | "Pr" | "sec" | "sin" | "sinh" | "sup" | "tan" | "tanh"
+let mediawiki_function_names = "arccot" | "arcsec" | "arccsc" | "sgn" | "sen"
+
+rule token = parse
+    space +                    { token lexbuf }
+  | "\\text" space * '{' aboxchars + '}'
+                               {  let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\text", String.sub str n 
(String.length str - n - 1)) }
+  | "\\mbox" space * '{' aboxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\mbox", String.sub str n 
(String.length str - n - 1)) }
+  | "\\hbox" space * '{' aboxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\hbox", String.sub str n 
(String.length str - n - 1)) }
+  | "\\vbox" space * '{' aboxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\vbox", String.sub str n 
(String.length str - n - 1)) }
+  | "\\text" space * '{' boxchars + '}'
+                               {  let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in           
                  
+                                 BOX ("\\text", String.sub str n 
(String.length str - n - 1)) }
+  | "\\mbox" space * '{' boxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in           
          
+                                 BOX ("\\mbox", String.sub str n 
(String.length str - n - 1)) }
+  | "\\hbox" space * '{' boxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\hbox", String.sub str n 
(String.length str - n - 1)) }
+  | "\\vbox" space * '{' boxchars + '}'
+                               { let str = Lexing.lexeme lexbuf in
+                                 let n = String.index str '{' + 1 in
+                                 BOX ("\\vbox", String.sub str n 
(String.length str - n - 1)) }
+  | literal_id                 { let str = Lexing.lexeme lexbuf in LITERAL 
(TEX_ONLY str) }
+  | literal_mn                 { let str = Lexing.lexeme lexbuf in LITERAL 
(TEX_ONLY str) }
+  | literal_uf_lt              { let str = Lexing.lexeme lexbuf in LITERAL 
(TEX_ONLY str) }
+  | delimiter_uf_lt            { let str = Lexing.lexeme lexbuf in DELIMITER 
(TEX_ONLY str) }
+  | "-"                                { let str = Lexing.lexeme lexbuf in 
LITERAL (TEX_ONLY str)}
+  | literal_uf_op              { let str = Lexing.lexeme lexbuf in LITERAL 
(TEX_ONLY str) }
+  | delimiter_uf_op            { let str = Lexing.lexeme lexbuf in DELIMITER 
(TEX_ONLY str) }
+  | "\\operatorname"            {  FUN_AR1nb "\\operatorname" }
+  | "\\sqrt" space * "["       { FUN_AR1opt "\\sqrt" }
+  | "\\xleftarrow" space * "[" {  FUN_AR1opt "\\xleftarrow" }
+  | "\\xrightarrow" space * "["        {  FUN_AR1opt "\\xrightarrow" }
+  | "\\" (latex_function_names as name) space * "("  { LITERAL (TEX_ONLY ("\\" 
^ name ^ "(")) }
+  | "\\" (latex_function_names as name) space * "["  { LITERAL (TEX_ONLY ("\\" 
^ name ^ "[") )}  
+  | "\\" (latex_function_names as name) space * "\\{"  { LITERAL (TEX_ONLY 
("\\" ^ name ^ "\\{")) }
+  | "\\" (latex_function_names as name) space * { LITERAL (TEX_ONLY("\\" ^ 
name ^ " ")) }
+  | "\\" (mediawiki_function_names as name) space * "("    { ( LITERAL 
(TEX_ONLY ("\\operatorname{" ^ name ^ "}("))) }  
+  | "\\" (mediawiki_function_names as name) space * "["    { ( LITERAL 
(TEX_ONLY ("\\operatorname{" ^ name ^ "}[")))}
+  | "\\" (mediawiki_function_names as name) space * "\\{"  { ( LITERAL 
(TEX_ONLY ("\\operatorname{" ^ name ^ "}\\{")))}
+  | "\\" (mediawiki_function_names as name) space *        { ( LITERAL 
(TEX_ONLY ("\\operatorname{" ^ name ^ "} "))) }
+  | "\\" alpha +               { Texutil.find (Lexing.lexeme lexbuf) }
+  | "\\,"                      { LITERAL (TEX_ONLY "\\,") }
+  | "\\ "                      { LITERAL (TEX_ONLY "\\ ") }
+  | "\\;"                      { LITERAL (TEX_ONLY "\\;") }
+  | "\\!"                      { LITERAL (TEX_ONLY "\\!") }
+  | "\\{"                      { DELIMITER (TEX_ONLY "\\{") }
+  | "\\}"                      { DELIMITER (TEX_ONLY "\\}") }
+  | "\\|"                      { DELIMITER (TEX_ONLY "\\|") }
+  | "\\_"                      { LITERAL (TEX_ONLY "\\_") }
+  | "\\#"                      { LITERAL (TEX_ONLY "\\#") }
+  | "\\%"                      { LITERAL (TEX_ONLY "\\%") }
+  | "\\$"                      { LITERAL (TEX_ONLY "\\$") }
+  | "\\&"                      { LITERAL (TEX_ONLY "\\&") }
+  | "&"                                { NEXT_CELL }
+  | "\\\\"                     { NEXT_ROW }
+  | "\\begin{matrix}"          {  BEGIN__MATRIX }
+  | "\\end{matrix}"            { END__MATRIX }
+  | "\\begin{pmatrix}"         {  BEGIN_PMATRIX }
+  | "\\end{pmatrix}"           { END_PMATRIX }
+  | "\\begin{bmatrix}"         {  BEGIN_BMATRIX }
+  | "\\end{bmatrix}"           { END_BMATRIX }
+  | "\\begin{Bmatrix}"         {  BEGIN_BBMATRIX }
+  | "\\end{Bmatrix}"           { END_BBMATRIX }
+  | "\\begin{vmatrix}"         {  BEGIN_VMATRIX }
+  | "\\end{vmatrix}"           { END_VMATRIX }
+  | "\\begin{Vmatrix}"         {  BEGIN_VVMATRIX }
+  | "\\end{Vmatrix}"           { END_VVMATRIX }
+  | "\\begin{array}"           {  BEGIN_ARRAY }
+  | "\\end{array}"             { END_ARRAY }
+  | "\\begin{align}"           {  BEGIN_ALIGN }
+  | "\\end{align}"             { END_ALIGN }
+  | "\\begin{alignat}"         {  BEGIN_ALIGNAT }
+  | "\\end{alignat}"           { END_ALIGNAT }
+  | "\\begin{smallmatrix}"     {  BEGIN_SMALLMATRIX }
+  | "\\end{smallmatrix}"       { END_SMALLMATRIX }
+  | "\\begin{cases}"           {  BEGIN_CASES }
+  | "\\end{cases}"             { END_CASES }
+  | '>'                                { LITERAL (TEX_ONLY ">") }
+  | '<'                                { LITERAL (TEX_ONLY "<") }
+  | '%'                                { LITERAL (TEX_ONLY "\\%") }
+  | '$'                                { LITERAL (TEX_ONLY "\\$") }
+  | '~'                                { LITERAL (TEX_ONLY "~") }
+  | '['                                { DELIMITER (TEX_ONLY "[") }
+  | ']'                                { SQ_CLOSE }
+  | '{'                                { CURLY_OPEN }
+  | '}'                                { CURLY_CLOSE }
+  | '^'                                { SUP }
+  | '_'                                { SUB }
+  | eof                                { EOF }
diff --git a/texvccheck/parser.mly b/texvccheck/parser.mly
new file mode 100644
index 0000000..d1254bb
--- /dev/null
+++ b/texvccheck/parser.mly
@@ -0,0 +1,118 @@
+%{
+    open Tex
+
+    let sq_close_ri = HTMLABLEC(FONT_UFH,"]", "]")
+%}
+%token <Tex.render_t> LITERAL DELIMITER
+%token <string> FUN_AR2 FUN_INFIX FUN_AR1 DECL FUN_AR1nb FUN_AR1opt BIG 
FUN_AR2nb
+%token <string*string> BOX
+%token <string*(string*string)> FUN_AR1hl
+%token <string*Tex.font_force> FUN_AR1hf DECLh
+%token <string*(Tex.t->Tex.t->string*string*string)> FUN_AR2h
+%token <string*(Tex.t list->Tex.t list->string*string*string)> FUN_INFIXh
+%token EOF CURLY_OPEN CURLY_CLOSE SUB SUP SQ_CLOSE NEXT_CELL NEXT_ROW
+%token BEGIN__MATRIX BEGIN_PMATRIX BEGIN_BMATRIX BEGIN_BBMATRIX BEGIN_VMATRIX 
BEGIN_VVMATRIX BEGIN_CASES BEGIN_ARRAY BEGIN_ALIGN BEGIN_ALIGNAT 
BEGIN_SMALLMATRIX
+%token END__MATRIX END_PMATRIX END_BMATRIX END_BBMATRIX END_VMATRIX 
END_VVMATRIX END_CASES END_ARRAY END_ALIGN END_ALIGNAT END_SMALLMATRIX
+%token LEFT RIGHT
+%type <Tex.t list> tex_expr
+%start tex_expr
+
+%%
+tex_expr:
+    expr EOF                   { $1 }
+  | ne_expr FUN_INFIX ne_expr EOF
+                               { [TEX_INFIX($2,$1,$3)] }
+  | ne_expr FUN_INFIXh ne_expr EOF
+                               { let t,h=$2 in [TEX_INFIXh(t,h,$1,$3)] }
+expr:
+    /* */                      { [] }
+  | ne_expr                    { $1 }
+ne_expr:
+    lit_aq expr                        { $1 :: $2 }
+  | litsq_aq expr              { $1 :: $2 }
+  | DECLh expr                 { let t,h = $1 in [TEX_DECLh(t,h,$2)] }
+litsq_aq:
+    litsq_zq                   { $1 }
+  | litsq_dq                   { let base,downi = $1 in TEX_DQ(base,downi) }
+  | litsq_uq                   { let base,upi = $1   in TEX_UQ(base,upi)}
+  | litsq_fq                   { $1 }
+litsq_fq:
+    litsq_dq SUP lit           { let base,downi = $1 in TEX_FQ(base,downi,$3) }
+  | litsq_uq SUB lit           { let base,upi = $1   in TEX_FQ(base,$3,upi) }
+litsq_uq:
+    litsq_zq SUP lit           { $1,$3 }
+litsq_dq:
+    litsq_zq SUB lit           { $1,$3 }
+litsq_zq:
+  | SQ_CLOSE                   { TEX_LITERAL sq_close_ri }
+expr_nosqc:
+    /* */                      { [] }
+  | lit_aq expr_nosqc          { $1 :: $2 }
+lit_aq:
+    lit                                { $1 }
+  | lit_dq                     { let base,downi = $1 in TEX_DQ(base,downi) }
+  | lit_uq                     { let base,upi = $1   in TEX_UQ(base,upi)}
+  | lit_dqn                    { TEX_DQN($1) }
+  | lit_uqn                    { TEX_UQN($1) }
+  | lit_fq                     { $1 }
+
+lit_fq:
+    lit_dq SUP lit             { let base,downi = $1 in TEX_FQ(base,downi,$3) }
+  | lit_uq SUB lit             { let base,upi = $1   in TEX_FQ(base,$3,upi) }
+  | lit_dqn SUP lit     { TEX_FQN($1, $3) }
+
+lit_uq:
+    lit SUP lit                        { $1,$3 }
+lit_dq:
+    lit SUB lit                        { $1,$3 }
+lit_uqn:
+    SUP lit             { $2 }
+lit_dqn:
+    SUB lit             { $2 }
+
+
+left:
+    LEFT DELIMITER             { $2 }
+  | LEFT SQ_CLOSE              { sq_close_ri }
+right:
+    RIGHT DELIMITER            { $2 }
+  | RIGHT SQ_CLOSE             { sq_close_ri }
+lit:
+    LITERAL                    { TEX_LITERAL $1 }
+  | DELIMITER                  { TEX_LITERAL $1 }
+  | BIG DELIMITER              { TEX_BIG ($1,$2) }
+  | BIG SQ_CLOSE               { TEX_BIG ($1,sq_close_ri) }
+  | left expr right            { TEX_LR ($1,$3,$2) }
+  | FUN_AR1 lit                        { TEX_FUN1($1,$2) }
+  | FUN_AR1nb lit              { TEX_FUN1nb($1,$2) }
+  | FUN_AR1hl lit              { let t,h=$1 in TEX_FUN1hl(t,h,$2) }
+  | FUN_AR1hf lit              { let t,h=$1 in TEX_FUN1hf(t,h,$2) }
+  | FUN_AR1opt expr_nosqc SQ_CLOSE lit { TEX_FUN2sq($1,TEX_CURLY $2,$4) }
+  | FUN_AR2 lit lit            { TEX_FUN2($1,$2,$3) }
+  | FUN_AR2nb lit lit          { TEX_FUN2nb($1,$2,$3) }
+  | FUN_AR2h lit lit           { let t,h=$1 in TEX_FUN2h(t,h,$2,$3) }
+  | BOX                                { let bt,s = $1 in TEX_BOX (bt,s) }
+  | CURLY_OPEN expr CURLY_CLOSE
+                               { TEX_CURLY $2 }
+  | CURLY_OPEN ne_expr FUN_INFIX ne_expr CURLY_CLOSE
+                               { TEX_INFIX($3,$2,$4) }
+  | CURLY_OPEN ne_expr FUN_INFIXh ne_expr CURLY_CLOSE
+                               { let t,h=$3 in TEX_INFIXh(t,h,$2,$4) }
+  | BEGIN__MATRIX  matrix END__MATRIX  { TEX_MATRIX ("matrix", $2) }
+  | BEGIN_PMATRIX  matrix END_PMATRIX  { TEX_MATRIX ("pmatrix", $2) }
+  | BEGIN_BMATRIX  matrix END_BMATRIX  { TEX_MATRIX ("bmatrix", $2) }
+  | BEGIN_BBMATRIX matrix END_BBMATRIX { TEX_MATRIX ("Bmatrix", $2) }
+  | BEGIN_VMATRIX  matrix END_VMATRIX  { TEX_MATRIX ("vmatrix", $2) }
+  | BEGIN_VVMATRIX matrix END_VVMATRIX { TEX_MATRIX ("Vmatrix", $2) }
+  | BEGIN_ARRAY    matrix END_ARRAY        { TEX_MATRIX ("array", $2) }
+  | BEGIN_ALIGN    matrix END_ALIGN        { TEX_MATRIX ("aligned", $2) }
+  | BEGIN_ALIGNAT  matrix END_ALIGNAT  { TEX_MATRIX ("alignedat", $2) }
+  | BEGIN_SMALLMATRIX  matrix END_SMALLMATRIX { TEX_MATRIX ("smallmatrix", $2) 
}
+  | BEGIN_CASES    matrix END_CASES    { TEX_MATRIX ("cases", $2) }
+matrix:
+    line                       { [$1] }
+  | line NEXT_ROW matrix       { $1::$3 }
+line:
+    expr                       { [$1] }
+  | expr NEXT_CELL line                { $1::$3 }
+;;
diff --git a/texvccheck/test.pl b/texvccheck/test.pl
new file mode 100644
index 0000000..343ee56
--- /dev/null
+++ b/texvccheck/test.pl
@@ -0,0 +1,9 @@
+#!/usr/local/bin/perl
+my $texvc = `texvc '\\sin(x)+{}{}\\cos(x)^2 newcommand'`;
+if (substr($result,0,1) eq "+") {
+       print "good";
+} else {
+       print "bad";
+}
+print $result;
+my $ = `tex2svg '\\sin(x)+{}{}\\cos(x)^2 newcommand'`;
\ No newline at end of file
diff --git a/texvccheck/tex.mli b/texvccheck/tex.mli
new file mode 100644
index 0000000..834df14
--- /dev/null
+++ b/texvccheck/tex.mli
@@ -0,0 +1,48 @@
+type font_force =
+    FONTFORCE_IT
+  | FONTFORCE_RM
+
+type font_class =
+    FONT_IT  (* IT default, may be forced to be RM *)
+  | FONT_RM  (* RM default, may be forced to be IT *)
+  | FONT_UF  (* not affected by IT/RM setting *)
+  | FONT_RTI (* RM - any, IT - not available in HTML *)
+  | FONT_UFH (* in TeX UF, in HTML RM *)
+
+type math_class =
+    MN
+  | MI
+  | MO
+
+type render_t =
+      HTMLABLEC of font_class * string * string
+    | HTMLABLEM of font_class * string * string
+    | HTMLABLE of font_class * string * string
+    | MHTMLABLEC of font_class * string * string * math_class * string
+    | HTMLABLE_BIG of string * string
+    | TEX_ONLY of string
+
+type t =
+      TEX_LITERAL of render_t
+    | TEX_CURLY of t list
+    | TEX_FQ of t * t * t
+    | TEX_DQ of t * t
+    | TEX_UQ of t * t
+    | TEX_FQN of t * t
+    | TEX_DQN of t
+    | TEX_UQN of t
+    | TEX_LR of render_t * render_t * t list
+    | TEX_BOX of string * string
+    | TEX_BIG of string * render_t
+    | TEX_FUN1 of string * t
+    | TEX_FUN1nb of string * t
+    | TEX_FUN2 of string * t * t
+    | TEX_FUN2nb of string * t * t
+    | TEX_INFIX of string * t list * t list
+    | TEX_FUN2sq of string * t * t
+    | TEX_FUN1hl  of string * (string * string) * t
+    | TEX_FUN1hf  of string * font_force * t
+    | TEX_FUN2h  of string * (t -> t -> string * string * string) * t * t
+    | TEX_INFIXh of string * (t list -> t list -> string * string * string) * 
t list * t list
+    | TEX_MATRIX of string * t list list list
+    | TEX_DECLh  of string * font_force * t list
diff --git a/texvccheck/texutil.ml b/texvccheck/texutil.ml
new file mode 100644
index 0000000..b2d2812
--- /dev/null
+++ b/texvccheck/texutil.ml
@@ -0,0 +1,712 @@
+(* vim: set sw=8 ts=8 et: *)
+open Parser
+open Tex
+open Util
+
+
+let tex_part = function
+    HTMLABLE (_,t,_) -> t
+  | HTMLABLEM (_,t,_) -> t
+  | HTMLABLEC (_,t,_) -> t
+  | MHTMLABLEC (_,t,_,_,_) -> t
+  | HTMLABLE_BIG (t,_) -> t
+  | TEX_ONLY t -> t
+
+let rec render_tex = function
+    TEX_FQ (a,b,c) -> (render_tex a) ^ "_{" ^ (render_tex  b) ^ "}^{" ^ 
(render_tex  c) ^ "}"
+  | TEX_DQ (a,b) -> (render_tex a) ^ "_{" ^ (render_tex  b) ^ "}"
+  | TEX_UQ (a,b) -> (render_tex a) ^ "^{" ^ (render_tex  b) ^ "}"
+  | TEX_FQN (a,b) -> "_{" ^ (render_tex  a) ^ "}^{" ^ (render_tex  b) ^ "}"
+  | TEX_DQN (a) -> "_{" ^ (render_tex  a) ^ "}"
+  | TEX_UQN (a) -> "^{" ^ (render_tex  a) ^ "}"
+  | TEX_LITERAL s -> tex_part s
+  | TEX_FUN1 (f,a) -> "{" ^ f ^ " " ^ (render_tex a) ^ "}"
+  | TEX_FUN1nb (f,a) -> f ^ " " ^ (render_tex a)
+  | TEX_FUN1hl (f,_,a) -> "{" ^ f ^ " " ^ (render_tex a) ^ "}"
+  | TEX_FUN1hf (f,_,a) -> "{" ^ f ^ " " ^ (render_tex a) ^ "}"
+  | TEX_DECLh (f,_,a) -> "{" ^ f ^ "{" ^ (mapjoin render_tex a) ^ "}}"
+  | TEX_FUN2 (f,a,b)    -> "{" ^ f ^ " " ^ (render_tex a) ^ (render_tex b) ^ 
"}"
+  | TEX_FUN2h (f,_,a,b) -> "{" ^ f ^ " " ^ (render_tex a) ^ (render_tex b) ^ 
"}"
+  | TEX_FUN2nb (f,a,b) -> f ^ (render_tex a) ^ (render_tex b)
+
+  | TEX_FUN2sq (f,a,b) -> "{" ^ f ^ "[ " ^ (render_tex a) ^ "]" ^ (render_tex 
b) ^ "}"
+  | TEX_CURLY (tl) -> "{" ^ (mapjoin render_tex tl) ^ "}"
+  | TEX_INFIX (s,ll,rl) -> "{" ^ (mapjoin render_tex ll) ^ " " ^ s ^ "" ^ 
(mapjoin render_tex rl) ^ "}"
+  | TEX_INFIXh (s,_,ll,rl) -> "{" ^ (mapjoin render_tex ll) ^ " " ^ s ^ "" ^ 
(mapjoin render_tex rl) ^ "}"
+  | TEX_BOX (bt,s) -> "{"^bt^"{" ^ s ^ "}}"
+  | TEX_BIG (bt,d) -> "{"^bt^(tex_part d)^"}"
+  | TEX_MATRIX (t,rows) -> "{\\begin{"^t^"}"^(mapjoine "\\\\" (mapjoine "&" 
(mapjoin render_tex)) rows)^"\\end{"^t^"}}"
+  | TEX_LR (l,r,tl) -> "\\left"^(tex_part l)^(mapjoin render_tex 
tl)^"\\right"^(tex_part r)
+
+
+(* Turn that into hash table lookup *)
+exception Illegal_tex_function of string
+
+let find cmd = match cmd with
+  "\\AA"
+  | "\\aleph"
+  | "\\alpha"
+  | "\\amalg"
+  | "\\And"
+  | "\\angle"
+  | "\\approx"
+  | "\\approxeq"
+  | "\\ast"
+  | "\\asymp"
+  | "\\backepsilon"
+  | "\\backprime"
+  | "\\backsim"
+  | "\\backsimeq"
+  | "\\barwedge"
+  | "\\Bbbk"
+  | "\\because"
+  | "\\beta"
+  | "\\beth"
+  | "\\between"
+  | "\\bigcap"
+  | "\\bigcirc"
+  | "\\bigcup"
+  | "\\bigodot"
+  | "\\bigoplus"
+  | "\\bigotimes"
+  | "\\bigsqcup"
+  | "\\bigstar"
+  | "\\bigtriangledown"
+  | "\\bigtriangleup"
+  | "\\biguplus"
+  | "\\bigvee"
+  | "\\bigwedge"
+  | "\\blacklozenge"
+  | "\\blacksquare"
+  | "\\blacktriangle"
+  | "\\blacktriangledown"
+  | "\\blacktriangleleft"
+  | "\\blacktriangleright"
+  | "\\bot"
+  | "\\bowtie"
+  | "\\Box"
+  | "\\boxdot"
+  | "\\boxminus"
+  | "\\boxplus"
+  | "\\boxtimes"
+  | "\\bullet"
+  | "\\bumpeq"
+  | "\\Bumpeq"
+  | "\\cap"
+  | "\\Cap"
+  | "\\cdot"
+  | "\\cdots"
+  | "\\centerdot"
+  | "\\checkmark"
+  | "\\chi"
+  | "\\circ"
+  | "\\circeq"
+  | "\\circlearrowleft"
+  | "\\circlearrowright"
+  | "\\circledast"
+  | "\\circledcirc"
+  | "\\circleddash"
+  | "\\circledS"
+  | "\\clubsuit"
+  | "\\colon"
+  | "\\color"
+  | "\\complement"
+  | "\\cong"
+  | "\\coprod"
+  | "\\cup"
+  | "\\Cup"
+  | "\\curlyeqprec"
+  | "\\curlyeqsucc"
+  | "\\curlyvee"
+  | "\\curlywedge"
+  | "\\curvearrowleft"
+  | "\\curvearrowright"
+  | "\\dagger"
+  | "\\daleth"
+  | "\\dashv"
+  | "\\ddagger"
+  | "\\ddots"
+  | "\\definecolor"
+  | "\\delta"
+  | "\\Delta"
+  | "\\diagdown"
+  | "\\diagup"
+  | "\\diamond"
+  | "\\Diamond"
+  | "\\diamondsuit"
+  | "\\digamma"
+  | "\\displaystyle"
+  | "\\div"
+  | "\\divideontimes"
+  | "\\doteq"
+  | "\\doteqdot"
+  | "\\dotplus"
+  | "\\dots"
+  | "\\dotsb"
+  | "\\dotsc"
+  | "\\dotsi"
+  | "\\dotsm"
+  | "\\dotso"
+  | "\\doublebarwedge"
+  | "\\downdownarrows"
+  | "\\downharpoonleft"
+  | "\\downharpoonright"
+  | "\\ell"
+  | "\\emptyset"
+  | "\\epsilon"
+  | "\\eqcirc"
+  | "\\eqsim"
+  | "\\eqslantgtr"
+  | "\\eqslantless"
+  | "\\equiv"
+  | "\\eta"
+  | "\\eth"
+  | "\\exists"
+  | "\\fallingdotseq"
+  | "\\Finv"
+  | "\\flat"
+  | "\\forall"
+  | "\\frown"
+  | "\\Game"
+  | "\\gamma"
+  | "\\Gamma"
+  | "\\geq"
+  | "\\geqq"
+  | "\\geqslant"
+  | "\\gets"
+  | "\\gg"
+  | "\\ggg"
+  | "\\gimel"
+  | "\\gnapprox"
+  | "\\gneq"
+  | "\\gneqq"
+  | "\\gnsim"
+  | "\\gtrapprox"
+  | "\\gtrdot"
+  | "\\gtreqless"
+  | "\\gtreqqless"
+  | "\\gtrless"
+  | "\\gtrsim"
+  | "\\gvertneqq"
+  | "\\hbar"
+  | "\\heartsuit"
+  | "\\hline"
+  | "\\hookleftarrow"
+  | "\\hookrightarrow"
+  | "\\hslash"
+  | "\\iff"
+  | "\\iiiint"
+  | "\\iiint"
+  | "\\iint"
+  | "\\Im"
+  | "\\imath"
+  | "\\implies"
+  | "\\in"
+  | "\\infty"
+  | "\\injlim"
+  | "\\int"
+  | "\\intercal"
+  | "\\iota"
+  | "\\jmath"
+  | "\\kappa"
+  | "\\lambda"
+  | "\\Lambda"
+  | "\\land"
+  | "\\lbrack"
+  | "\\ldots"
+  | "\\leftarrow"
+  | "\\Leftarrow"
+  | "\\leftarrowtail"
+  | "\\leftharpoondown"
+  | "\\leftharpoonup"
+  | "\\leftleftarrows"
+  | "\\leftrightarrow"
+  | "\\Leftrightarrow"
+  | "\\leftrightarrows"
+  | "\\leftrightharpoons"
+  | "\\leftrightsquigarrow"
+  | "\\leftthreetimes"
+  | "\\leq"
+  | "\\leqq"
+  | "\\leqslant"
+  | "\\lessapprox"
+  | "\\lessdot"
+  | "\\lesseqgtr"
+  | "\\lesseqqgtr"
+  | "\\lessgtr"
+  | "\\lesssim"
+  | "\\limits"
+  | "\\ll"
+  | "\\Lleftarrow"
+  | "\\lll"
+  | "\\lnapprox"
+  | "\\lneq"
+  | "\\lneqq"
+  | "\\lnot"
+  | "\\lnsim"
+  | "\\longleftarrow"
+  | "\\Longleftarrow"
+  | "\\longleftrightarrow"
+  | "\\Longleftrightarrow"
+  | "\\longmapsto"
+  | "\\longrightarrow"
+  | "\\Longrightarrow"
+  | "\\looparrowleft"
+  | "\\looparrowright"
+  | "\\lor"
+  | "\\lozenge"
+  | "\\Lsh"
+  | "\\ltimes"
+  | "\\lVert"
+  | "\\lvertneqq"
+  | "\\mapsto"
+  | "\\measuredangle"
+  | "\\mho"
+  | "\\mid"
+  | "\\mod"
+  | "\\models"
+  | "\\mp"
+  | "\\mu"
+  | "\\multimap"
+  | "\\nabla"
+  | "\\natural"
+  | "\\ncong"
+  | "\\nearrow"
+  | "\\neg"
+  | "\\neq"
+  | "\\nexists"
+  | "\\ngeq"
+  | "\\ngeqq"
+  | "\\ngeqslant"
+  | "\\ngtr"
+  | "\\ni"
+  | "\\nleftarrow"
+  | "\\nLeftarrow"
+  | "\\nleftrightarrow"
+  | "\\nLeftrightarrow"
+  | "\\nleq"
+  | "\\nleqq"
+  | "\\nleqslant"
+  | "\\nless"
+  | "\\nmid"
+  | "\\nolimits"
+  | "\\not"
+  | "\\notin"
+  | "\\nparallel"
+  | "\\nprec"
+  | "\\npreceq"
+  | "\\nrightarrow"
+  | "\\nRightarrow"
+  | "\\nshortmid"
+  | "\\nshortparallel"
+  | "\\nsim"
+  | "\\nsubseteq"
+  | "\\nsubseteqq"
+  | "\\nsucc"
+  | "\\nsucceq"
+  | "\\nsupseteq"
+  | "\\nsupseteqq"
+  | "\\ntriangleleft"
+  | "\\ntrianglelefteq"
+  | "\\ntriangleright"
+  | "\\ntrianglerighteq"
+  | "\\nu"
+  | "\\nvdash"
+  | "\\nVdash"
+  | "\\nvDash"
+  | "\\nVDash"
+  | "\\nwarrow"
+  | "\\odot"
+  | "\\oint"
+  | "\\omega"
+  | "\\Omega"
+  | "\\ominus"
+  | "\\oplus"
+  | "\\oslash"
+  | "\\otimes"
+  | "\\overbrace"
+  | "\\overleftarrow"
+  | "\\overleftrightarrow"
+  | "\\overline"
+  | "\\overrightarrow"
+  | "\\P"
+  | "\\pagecolor"
+  | "\\parallel"
+  | "\\partial"
+  | "\\perp"
+  | "\\phi"
+  | "\\Phi"
+  | "\\pi"
+  | "\\Pi"
+  | "\\pitchfork"
+  | "\\pm"
+  | "\\prec"
+  | "\\precapprox"
+  | "\\preccurlyeq"
+  | "\\preceq"
+  | "\\precnapprox"
+  | "\\precneqq"
+  | "\\precnsim"
+  | "\\precsim"
+  | "\\prime"
+  | "\\prod"
+  | "\\projlim"
+  | "\\propto"
+  | "\\psi"
+  | "\\Psi"
+  | "\\qquad"
+  | "\\quad"
+  | "\\rbrack"
+  | "\\Re"
+  | "\\rho"
+  | "\\rightarrow"
+  | "\\Rightarrow"
+  | "\\rightarrowtail"
+  | "\\rightharpoondown"
+  | "\\rightharpoonup"
+  | "\\rightleftarrows"
+  | "\\rightrightarrows"
+  | "\\rightsquigarrow"
+  | "\\rightthreetimes"
+  | "\\risingdotseq"
+  | "\\Rrightarrow"
+  | "\\Rsh"
+  | "\\rtimes"
+  | "\\rVert"
+  | "\\S"
+  | "\\scriptscriptstyle"
+  | "\\scriptstyle"
+  | "\\searrow"
+  | "\\setminus"
+  | "\\sharp"
+  | "\\shortmid"
+  | "\\shortparallel"
+  | "\\sigma"
+  | "\\Sigma"
+  | "\\sim"
+  | "\\simeq"
+  | "\\smallfrown"
+  | "\\smallsetminus"
+  | "\\smallsmile"
+  | "\\smile"
+  | "\\spadesuit"
+  | "\\sphericalangle"
+  | "\\sqcap"
+  | "\\sqcup"
+  | "\\sqsubset"
+  | "\\sqsubseteq"
+  | "\\sqsupset"
+  | "\\sqsupseteq"
+  | "\\square"
+  | "\\star"
+  | "\\subset"
+  | "\\Subset"
+  | "\\subseteq"
+  | "\\subseteqq"
+  | "\\subsetneq"
+  | "\\subsetneqq"
+  | "\\succ"
+  | "\\succapprox"
+  | "\\succcurlyeq"
+  | "\\succeq"
+  | "\\succnapprox"
+  | "\\succneqq"
+  | "\\succnsim"
+  | "\\succsim"
+  | "\\sum"
+  | "\\supset"
+  | "\\Supset"
+  | "\\supseteq"
+  | "\\supseteqq"
+  | "\\supsetneq"
+  | "\\supsetneqq"
+  | "\\surd"
+  | "\\swarrow"
+  | "\\tau"
+  | "\\textstyle"
+  | "\\textvisiblespace"
+  | "\\therefore"
+  | "\\theta"
+  | "\\Theta"
+  | "\\thickapprox"
+  | "\\thicksim"
+  | "\\times"
+  | "\\to"
+  | "\\top"
+  | "\\triangle"
+  | "\\triangledown"
+  | "\\triangleleft"
+  | "\\trianglelefteq"
+  | "\\triangleq"
+  | "\\triangleright"
+  | "\\trianglerighteq"
+  | "\\underbrace"
+  | "\\underline"
+  | "\\upharpoonleft"
+  | "\\upharpoonright"
+  | "\\uplus"
+  | "\\upsilon"
+  | "\\Upsilon"
+  | "\\upuparrows"
+  | "\\varepsilon"
+  | "\\varinjlim"
+  | "\\varkappa"
+  | "\\varliminf"
+  | "\\varlimsup"
+  | "\\varnothing"
+  | "\\varphi"
+  | "\\varpi"
+  | "\\varprojlim"
+  | "\\varpropto"
+  | "\\varrho"
+  | "\\varsigma"
+  | "\\varsubsetneq"
+  | "\\varsubsetneqq"
+  | "\\varsupsetneq"
+  | "\\varsupsetneqq"
+  | "\\vartheta"
+  | "\\vartriangle"
+  | "\\vartriangleleft"
+  | "\\vartriangleright"
+  | "\\vdash"
+  | "\\Vdash"
+  | "\\vDash"
+  | "\\vdots"
+  | "\\vee"
+  | "\\veebar"
+  | "\\vline"
+  | "\\Vvdash"
+  | "\\wedge"
+  | "\\widehat"
+  | "\\widetilde"
+  | "\\wp"
+  | "\\wr"
+  | "\\xi"
+  | "\\Xi"
+  | "\\zeta"
+  -> LITERAL ( TEX_ONLY( cmd ^ " " ) )
+
+  | "\\big"
+  | "\\Big"
+  | "\\bigg"
+  | "\\Bigg"
+  | "\\biggl"
+  | "\\Biggl"
+  | "\\biggr"
+  | "\\Biggr"
+  | "\\bigl"
+  | "\\Bigl"
+  | "\\bigr"
+  | "\\Bigr"
+  -> BIG (  cmd ^ " " )
+
+  | "\\backslash"
+  | "\\downarrow"
+  | "\\Downarrow"
+  | "\\langle"
+  | "\\lbrace"
+  | "\\lceil"
+  | "\\lfloor"
+  | "\\llcorner"
+  | "\\lrcorner"
+  | "\\rangle"
+  | "\\rbrace"
+  | "\\rceil"
+  | "\\rfloor"
+  | "\\rightleftharpoons"
+  | "\\twoheadleftarrow"
+  | "\\twoheadrightarrow"
+  | "\\ulcorner"
+  | "\\uparrow"
+  | "\\Uparrow"
+  | "\\updownarrow"
+  | "\\Updownarrow"
+  | "\\urcorner"
+  | "\\Vert"
+  | "\\vert"
+  -> DELIMITER( TEX_ONLY( cmd ^ " ") )
+  | "\\acute"
+  | "\\bar"
+  | "\\bcancel"
+  | "\\bmod"
+  | "\\boldsymbol"
+  | "\\breve"
+  | "\\cancel"
+  | "\\check"
+  | "\\ddot"
+  | "\\dot"
+  | "\\emph"
+  | "\\grave"
+  | "\\hat"
+  | "\\mathbb"
+  | "\\mathbf"
+  | "\\mathbin"
+  | "\\mathcal"
+  | "\\mathclose"
+  | "\\mathfrak"
+  | "\\mathit"
+  | "\\mathop"
+  | "\\mathopen"
+  | "\\mathord"
+  | "\\mathpunct"
+  | "\\mathrel"
+  | "\\mathrm"
+  | "\\mathsf"
+  | "\\mathtt"
+  | "\\operatorname"
+  | "\\pmod"
+  | "\\sqrt"
+  | "\\textbf"
+  | "\\textit"
+  | "\\textrm"
+  | "\\textsf"
+  | "\\texttt"
+  | "\\tilde"
+  | "\\vec"
+  | "\\xcancel"
+  | "\\xleftarrow"
+  | "\\xrightarrow"
+  -> FUN_AR1( cmd ^ " " )
+
+  | "\\binom"
+  | "\\cancelto"
+  | "\\cfrac"
+  | "\\dbinom"
+  | "\\dfrac"
+  | "\\frac"
+  | "\\overset"
+  | "\\stackrel"
+  | "\\tbinom"
+  | "\\tfrac"
+  | "\\underset"
+  -> FUN_AR2( cmd ^ " " )
+  | "\\atop"
+  | "\\choose"
+  | "\\over"
+  -> FUN_INFIX( cmd ^ " " )
+
+  | "\\Coppa"
+  | "\\coppa"
+  | "\\Digamma"
+  | "\\euro"
+  | "\\geneuro"
+  | "\\geneuronarrow"
+  | "\\geneurowide"
+  | "\\Koppa"
+  | "\\koppa"
+  | "\\officialeuro"
+  | "\\Sampi"
+  | "\\sampi"
+  | "\\Stigma"
+  | "\\stigma"
+  | "\\varstigma"
+  -> LITERAL ( TEX_ONLY( "\\mbox{" ^ cmd ^ "} " ) )
+
+  | "\\C"
+  | "\\H"
+  | "\\N"
+  | "\\Q"
+  | "\\R"
+  | "\\Z"
+  -> LITERAL ( TEX_ONLY( "\\mathbb{" ^ cmd ^ "} " ) )
+
+  | "\\darr" -> DELIMITER( TEX_ONLY( "\\downarrow" ^ " " ) )
+  | "\\dArr" -> DELIMITER( TEX_ONLY( "\\Downarrow" ^ " " ) )
+  | "\\Darr" -> DELIMITER( TEX_ONLY( "\\Downarrow" ^ " " ) )
+  | "\\lang" -> DELIMITER( TEX_ONLY( "\\langle" ^ " " ) )
+  | "\\rang" -> DELIMITER( TEX_ONLY( "\\rangle" ^ " " ) )
+  | "\\uarr" -> DELIMITER( TEX_ONLY( "\\uparrow" ^ " " ) )
+  | "\\uArr" -> DELIMITER( TEX_ONLY( "\\Uparrow" ^ " " ) )
+  | "\\Uarr" -> DELIMITER( TEX_ONLY( "\\Uparrow" ^ " " ) )
+
+  | "\\Bbb" -> FUN_AR1( "\\mathbb" ^ " " )
+  | "\\bold" -> FUN_AR1( "\\mathbf" ^ " " )
+
+  | "\\alef" -> LITERAL ( TEX_ONLY( "\\aleph" ^ " " ) )
+  | "\\alefsym" -> LITERAL ( TEX_ONLY( "\\aleph" ^ " " ) )
+  | "\\Alpha" -> LITERAL ( TEX_ONLY( "\\mathrm{A}" ^ " " ) )
+  | "\\and" -> LITERAL ( TEX_ONLY( "\\land" ^ " " ) )
+  | "\\ang" -> LITERAL ( TEX_ONLY( "\\angle" ^ " " ) )
+  | "\\Beta" -> LITERAL ( TEX_ONLY( "\\mathrm{B}" ^ " " ) )
+  | "\\bull" -> LITERAL ( TEX_ONLY( "\\bullet" ^ " " ) )
+  | "\\Chi" -> LITERAL ( TEX_ONLY( "\\mathrm{X}" ^ " " ) )
+  | "\\clubs" -> LITERAL ( TEX_ONLY( "\\clubsuit" ^ " " ) )
+  | "\\cnums" -> LITERAL ( TEX_ONLY( "\\mathbb{C}" ^ " " ) )
+  | "\\Complex" -> LITERAL ( TEX_ONLY( "\\mathbb{C}" ^ " " ) )
+  | "\\Dagger" -> LITERAL ( TEX_ONLY( "\\ddagger" ^ " " ) )
+  | "\\diamonds" -> LITERAL ( TEX_ONLY( "\\diamondsuit" ^ " " ) )
+  | "\\Doteq" -> LITERAL ( TEX_ONLY( "\\doteqdot" ^ " " ) )
+  | "\\doublecap" -> LITERAL ( TEX_ONLY( "\\Cap" ^ " " ) )
+  | "\\doublecup" -> LITERAL ( TEX_ONLY( "\\Cup" ^ " " ) )
+  | "\\empty" -> LITERAL ( TEX_ONLY( "\\emptyset" ^ " " ) )
+  | "\\Epsilon" -> LITERAL ( TEX_ONLY( "\\mathrm{E}" ^ " " ) )
+  | "\\Eta" -> LITERAL ( TEX_ONLY( "\\mathrm{H}" ^ " " ) )
+  | "\\exist" -> LITERAL ( TEX_ONLY( "\\exists" ^ " " ) )
+  | "\\ge" -> LITERAL ( TEX_ONLY( "\\geq" ^ " " ) )
+  | "\\gggtr" -> LITERAL ( TEX_ONLY( "\\ggg" ^ " " ) )
+  | "\\hAar" -> LITERAL ( TEX_ONLY( "\\Leftrightarrow" ^ " " ) )
+  | "\\harr" -> LITERAL ( TEX_ONLY( "\\leftrightarrow" ^ " " ) )
+  | "\\Harr" -> LITERAL ( TEX_ONLY( "\\Leftrightarrow" ^ " " ) )
+  | "\\hearts" -> LITERAL ( TEX_ONLY( "\\heartsuit" ^ " " ) )
+  | "\\image" -> LITERAL ( TEX_ONLY( "\\Im" ^ " " ) )
+  | "\\infin" -> LITERAL ( TEX_ONLY( "\\infty" ^ " " ) )
+  | "\\Iota" -> LITERAL ( TEX_ONLY( "\\mathrm{I}" ^ " " ) )
+  | "\\isin" -> LITERAL ( TEX_ONLY( "\\in" ^ " " ) )
+  | "\\Kappa" -> LITERAL ( TEX_ONLY( "\\mathrm{K}" ^ " " ) )
+  | "\\larr" -> LITERAL ( TEX_ONLY( "\\leftarrow" ^ " " ) )
+  | "\\Larr" -> LITERAL ( TEX_ONLY( "\\Leftarrow" ^ " " ) )
+  | "\\lArr" -> LITERAL ( TEX_ONLY( "\\Leftarrow" ^ " " ) )
+  | "\\le" -> LITERAL ( TEX_ONLY( "\\leq" ^ " " ) )
+  | "\\lrarr" -> LITERAL ( TEX_ONLY( "\\leftrightarrow" ^ " " ) )
+  | "\\Lrarr" -> LITERAL ( TEX_ONLY( "\\Leftrightarrow" ^ " " ) )
+  | "\\lrArr" -> LITERAL ( TEX_ONLY( "\\Leftrightarrow" ^ " " ) )
+  | "\\Mu" -> LITERAL ( TEX_ONLY( "\\mathrm{M}" ^ " " ) )
+  | "\\natnums" -> LITERAL ( TEX_ONLY( "\\mathbb{N}" ^ " " ) )
+  | "\\ne" -> LITERAL ( TEX_ONLY( "\\neq" ^ " " ) )
+  | "\\Nu" -> LITERAL ( TEX_ONLY( "\\mathrm{N}" ^ " " ) )
+  | "\\O" -> LITERAL ( TEX_ONLY( "\\emptyset" ^ " " ) )
+  | "\\omicron" -> LITERAL ( TEX_ONLY( "\\mathrm{o}" ^ " " ) )
+  | "\\Omicron" -> LITERAL ( TEX_ONLY( "\\mathrm{O}" ^ " " ) )
+  | "\\or" -> LITERAL ( TEX_ONLY( "\\lor" ^ " " ) )
+  | "\\part" -> LITERAL ( TEX_ONLY( "\\partial" ^ " " ) )
+  | "\\plusmn" -> LITERAL ( TEX_ONLY( "\\pm" ^ " " ) )
+  | "\\rarr" -> LITERAL ( TEX_ONLY( "\\rightarrow" ^ " " ) )
+  | "\\Rarr" -> LITERAL ( TEX_ONLY( "\\Rightarrow" ^ " " ) )
+  | "\\rArr" -> LITERAL ( TEX_ONLY( "\\Rightarrow" ^ " " ) )
+  | "\\real" -> LITERAL ( TEX_ONLY( "\\Re" ^ " " ) )
+  | "\\reals" -> LITERAL ( TEX_ONLY( "\\mathbb{R}" ^ " " ) )
+  | "\\Reals" -> LITERAL ( TEX_ONLY( "\\mathbb{R}" ^ " " ) )
+  | "\\restriction" -> LITERAL ( TEX_ONLY( "\\upharpoonright" ^ " " ) )
+  | "\\Rho" -> LITERAL ( TEX_ONLY( "\\mathrm{P}" ^ " " ) )
+  | "\\sdot" -> LITERAL ( TEX_ONLY( "\\cdot" ^ " " ) )
+  | "\\sect" -> LITERAL ( TEX_ONLY( "\\S" ^ " " ) )
+  | "\\spades" -> LITERAL ( TEX_ONLY( "\\spadesuit" ^ " " ) )
+  | "\\sub" -> LITERAL ( TEX_ONLY( "\\subset" ^ " " ) )
+  | "\\sube" -> LITERAL ( TEX_ONLY( "\\subseteq" ^ " " ) )
+  | "\\supe" -> LITERAL ( TEX_ONLY( "\\supseteq" ^ " " ) )
+  | "\\Tau" -> LITERAL ( TEX_ONLY( "\\mathrm{T}" ^ " " ) )
+  | "\\thetasym" -> LITERAL ( TEX_ONLY( "\\vartheta" ^ " " ) )
+  | "\\varcoppa" -> LITERAL ( TEX_ONLY( "\\mbox{coppa}" ^ " " ) )
+  | "\\weierp" -> LITERAL ( TEX_ONLY( "\\wp" ^ " " ) )
+  | "\\Zeta" -> LITERAL ( TEX_ONLY( "\\mathrm{Z}" ^ " " ) )
+
+  | "\\rm"
+  | "\\it"
+  | "\\cal"
+  | "\\bf"
+  -> DECLh ( cmd ^ " ", FONTFORCE_RM ) (* see bug 54818 *)
+
+  | "\\sideset" -> FUN_AR2nb "\\sideset "
+  | "\\left" -> LEFT
+  | "\\right" -> RIGHT
+
+  | "\\text"
+  | "\\mbox"
+  | "\\vbox"
+  | "\\hbox"
+  -> raise (Failure ("malformatted " ^ cmd))
+
+  | s -> raise (Illegal_tex_function s)
+
+
+
diff --git a/texvccheck/texvccheck.ml b/texvccheck/texvccheck.ml
new file mode 100644
index 0000000..a0528c9
--- /dev/null
+++ b/texvccheck/texvccheck.ml
@@ -0,0 +1,40 @@
+(* vim: set sw=8 ts=8 et: *)
+exception LexerException of string
+
+(* *)
+let lexer_token_safe lexbuf =
+    try Lexer.token lexbuf
+    with Failure s -> raise (LexerException s)
+
+(* *)
+let render tree =
+    let outtex = Util.mapjoin Texutil.render_tex tree in
+    begin
+    print_string ("+" ^ outtex);
+    end
+
+(* TODO: document
+ * Arguments:
+ * 1st :
+ * 2nd :
+ * 3rd :
+ *
+ * Output one character:
+ *  E : Lexer exception raised
+ *  F : TeX function not recognized
+ *  - : Generic/Default failure code. Might be an invalid argument,
+ *  S : Parsing error
+ *      output file already exist, a problem with an external
+ *      command ...
+ *)
+let _ =
+    try render (
+        Parser.tex_expr lexer_token_safe (
+            Lexing.from_string Sys.argv.(1))
+        )
+    with Parsing.Parse_error -> print_string "S"
+       | LexerException _ -> print_string "E"
+       | Texutil.Illegal_tex_function s -> print_string ("F" ^ s)
+       | Invalid_argument _ -> print_string "-"
+       | Failure _ -> print_string "-"
+       | _ -> print_string "-"
diff --git a/texvccheck/util.ml b/texvccheck/util.ml
new file mode 100644
index 0000000..e380279
--- /dev/null
+++ b/texvccheck/util.ml
@@ -0,0 +1,9 @@
+(* vim: set sw=8 ts=8 et: *)
+
+(* TODO document *)
+let mapjoin f l = (List.fold_left (fun a b -> a ^ (f b)) "" l)
+
+(* TODO document *)
+let mapjoine e f = function
+    [] -> ""
+  | h::t -> (List.fold_left (fun a b -> a ^ e ^ (f b)) (f h) t)
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/90748
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1650e6ec2ccefff6335fbc36bbe8ca8f59db0faa
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Math
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to