This is an automated email from the git hooks/post-receive script. mehdi pushed a commit to branch master in repository tyxml.
commit f36354ddf0c9b4f4f60feb91f9b908fbec85436b Author: Mehdi Dogguy <me...@debian.org> Date: Sun Oct 18 11:16:08 2015 +0200 Imported Upstream version 3.3.0 --- .merlin | 2 ++ CHANGES | 4 ++++ _oasis | 5 ++-- _tags | 3 ++- lib/META | 13 ++++++----- lib/xml_print.ml | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/xml_print.mli | 32 +++++++++++++++++++++++++ setup.ml | 54 +++++++++++++++++++++++------------------- 8 files changed, 150 insertions(+), 33 deletions(-) diff --git a/.merlin b/.merlin index 453a22c..4b95b98 100644 --- a/.merlin +++ b/.merlin @@ -4,3 +4,5 @@ S syntax B _build B _build/lib B _build/syntax + +PKG uutf \ No newline at end of file diff --git a/CHANGES b/CHANGES index 82bc0d0..a1269e4 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,9 @@ ===== dev ==== +===== 3.3.0 ==== + + * Add `Xml_print.Utf8` to encode html elements to utf8 properly. + ===== 3.2.1 ==== * Add signature functors to ease export of module created with the functorial interface. diff --git a/_oasis b/_oasis index 675e323..7dedb20 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: tyxml -Version: 3.2.1 +Version: 3.3.0 Homepage: http://ocsigen.org/tyxml/ Authors: Thorsten Ohl, @@ -45,7 +45,7 @@ Library tyxml Xml_print, Svg_f, Html5_f - BuildDepends: str + BuildDepends: str, uutf Library tyxml_f FindlibName: functor @@ -64,6 +64,7 @@ Library tyxml_f Html5_sigs, Html5_types, Html5_f + BuildDepends: uutf Library pa_tyxml Build$: flag(syntax) diff --git a/_tags b/_tags index b8b2305..0f090b9 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a8b2c1c0d619924a93a606114a692f33) +# DO NOT EDIT (digest: 9b16479512c40fcf68605f52a13f8d62) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -18,6 +18,7 @@ <lib/*.ml{,i}>: pkg_str # Library tyxml_f "lib/tyxml_f.cmxs": use_tyxml_f +<lib/*.ml{,i}>: pkg_uutf # Library pa_tyxml "syntax/pa_tyxml.cmxs": use_pa_tyxml <syntax/*.ml{,i}>: pkg_camlp4 diff --git a/lib/META b/lib/META index 844a010..44d9fba 100644 --- a/lib/META +++ b/lib/META @@ -1,15 +1,15 @@ # OASIS_START -# DO NOT EDIT (digest: a6fd6e6521a0512852d47b4b8b60917f) -version = "3.2.1" +# DO NOT EDIT (digest: c97aacbecdef7d92d60ce4f8423803b4) +version = "3.3.0" description = "HTML5 pages typed with polymorphic variants" -requires = "str" +requires = "str uutf" archive(byte) = "tyxml.cma" archive(byte, plugin) = "tyxml.cma" archive(native) = "tyxml.cmxa" archive(native, plugin) = "tyxml.cmxs" exists_if = "tyxml.cma" package "syntax" ( - version = "3.2.1" + version = "3.3.0" description = "HTML5 and SVG syntax extension" requires = "camlp4" archive(syntax, preprocessor) = "pa_tyxml.cma" @@ -21,7 +21,7 @@ package "syntax" ( ) package "parser" ( - version = "3.2.1" + version = "3.3.0" description = "Simple XML parser" requires = "camlp4.lib" archive(byte) = "tymlx_p.cma" @@ -32,9 +32,10 @@ package "parser" ( ) package "functor" ( - version = "3.2.1" + version = "3.3.0" description = "HTML5 pages typed with polymorphic variants (Functor version)" + requires = "uutf" archive(byte) = "tyxml_f.cma" archive(byte, plugin) = "tyxml_f.cma" archive(native) = "tyxml_f.cmxa" diff --git a/lib/xml_print.ml b/lib/xml_print.ml index 6e19134..65a0d16 100644 --- a/lib/xml_print.ml +++ b/lib/xml_print.ml @@ -90,6 +90,76 @@ let string_of_number v = then s2 else Printf.sprintf "%.18g" v + +module Utf8 = struct + type utf8 = string + type encoding = [ `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 | `US_ASCII | `ISO_8859_1] + let normalize_from ~encoding src = + let warn = ref false in + let rec loop d e = match Uutf.decode d with + | `Uchar _ as u -> ignore (Uutf.encode e u); loop d e + | `End -> ignore (Uutf.encode e `End) + | `Malformed _ -> ignore (Uutf.encode e (`Uchar Uutf.u_rep)); warn:=true;loop d e + | `Await -> assert false + in + let d = Uutf.decoder ~encoding (`String src) in + let buffer = Buffer.create (String.length src) in + let e = Uutf.encoder `UTF_8 (`Buffer buffer) in + loop d e; + Buffer.contents buffer, !warn + + let normalize src = normalize_from ~encoding:`UTF_8 src + + let normalize_html src = + let warn = ref false in + let str e s = + for i = 0 to String.length s - 1 do + ignore (Uutf.encode e (`Uchar (Char.code s.[i]))) + done in + let rec loop d e = match Uutf.decode d with + | `Uchar 34 -> str e """; loop d e + | `Uchar 38 -> str e "&"; loop d e + | `Uchar 60 -> str e "<"; loop d e + | `Uchar 62 -> str e ">"; loop d e + | `Uchar code as u -> + let u = + (* Illegal characters in html + http://en.wikipedia.org/wiki/Character_encodings_in_HTML + http://www.w3.org/TR/html5/syntax.html *) + if (* A. control C0 *) + (code <= 31 && code <> 9 && code <> 10 && code <> 13) + (* B. DEL + control C1 + - invalid in html + - discouraged in xml; + exept 0x85 see http://www.w3.org/TR/newline + but let's discard it anyway *) + || (code >= 127 && code <= 159) + (* C. UTF-16 surrogate halves : already discarded by uutf *) + (* || (code >= 0xD800 && code <= 0xDFFF) *) + (* D. BOOM related *) + || code land 0xFFFF = 0xFFFE + || code land 0xFFFF = 0xFFFF + + then (warn:=true;`Uchar Uutf.u_rep) + else u in + ignore (Uutf.encode e u); + loop d e + | `End -> ignore (Uutf.encode e `End) + | `Malformed _ -> + ignore (Uutf.encode e (`Uchar Uutf.u_rep)); + warn:=true; + loop d e + | `Await -> assert false + in + let d = Uutf.decoder ~encoding:`UTF_8 (`String src) in + let buffer = Buffer.create (String.length src) in + let e = Uutf.encoder `UTF_8 (`Buffer buffer) in + loop d e; + Buffer.contents buffer, !warn + +end + + module Make (Xml : Xml_sigs.Iterable) (F : sig val emptytags : string list end) diff --git a/lib/xml_print.mli b/lib/xml_print.mli index cdbf606..f254231 100644 --- a/lib/xml_print.mli +++ b/lib/xml_print.mli @@ -45,6 +45,38 @@ val compose_doctype : string -> string list -> string val string_of_number : float -> string (** Convert a float to a string using a compact representation compatible with Javascript norme. *) +(** Utf8 normalizer and encoder for HTML. + +Given a module [Htmlprinter] produced by one of the functors in {!Xml_print}, this modules is used as following: + {[ + let encode x = fst (Utf8.normalize_html x) in + Htmlprinter.print ~encode document + ]} *) +module Utf8 : sig + + type utf8 = string + (** [normalize str] take a possibly invalid utf-8 string + and return a valid utf-8 string + where invalid bytes have been replaced by + the replacement character [U+FFFD]. + The returned boolean is true if invalid bytes were found *) + val normalize : string -> utf8 * bool + + (** Same as [normalize] plus some extra work : + It encode '<' , '>' , '"' , '&' characters with + corresponding entities and replaced invalid html + character by [U+FFFD] *) + val normalize_html : string -> utf8 * bool + + type encoding = [ `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 | `US_ASCII | `ISO_8859_1] + + (** [normalize_from ~encoding str] convert the string [str] into an uft-8 string. + It assumes the [encoding] encoding and replace invalid bytes by + the replacement character [U+FFFD]. + The returned boolean is true if invalid bytes were found *) + val normalize_from : encoding:[<encoding] -> string -> utf8 * bool +end + module Make (Xml : Xml_sigs.Iterable) (I : sig val emptytags : string list end) diff --git a/setup.ml b/setup.ml index bc535f5..67af621 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.3.0 *) (* OASIS_START *) -(* DO NOT EDIT (digest: 3aded4e69a60f79123e2280b32078d22) *) +(* DO NOT EDIT (digest: 100e0c44c6c608ca72769bcd6b2a90c8) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -242,11 +242,9 @@ module OASISString = struct let replace_chars f s = - let buf = String.make (String.length s) 'X' in - for i = 0 to String.length s - 1 do - buf.[i] <- f s.[i] - done; - buf + let buf = Buffer.create (String.length s) in + String.iter (fun c -> Buffer.add_char buf (f c)) s; + Buffer.contents buf end @@ -2506,13 +2504,13 @@ module OASISFindlib = struct in let library_name_of_findlib_name = - Lazy.lazy_from_fun - (fun () -> - (* Revert findlib_name_of_library_name. *) - MapString.fold - (fun k v mp -> MapString.add v k mp) - fndlb_name_of_lib_name - MapString.empty) + lazy begin + (* Revert findlib_name_of_library_name. *) + MapString.fold + (fun k v mp -> MapString.add v k mp) + fndlb_name_of_lib_name + MapString.empty + end in let library_name_of_findlib_name fndlb_nm = try @@ -2882,7 +2880,7 @@ module OASISFileUtil = struct end -# 2885 "setup.ml" +# 2883 "setup.ml" module BaseEnvLight = struct (* # 22 "src/base/BaseEnvLight.ml" *) @@ -2987,7 +2985,7 @@ module BaseEnvLight = struct end -# 2990 "setup.ml" +# 2988 "setup.ml" module BaseContext = struct (* # 22 "src/base/BaseContext.ml" *) @@ -5398,7 +5396,7 @@ module BaseSetup = struct end -# 5401 "setup.ml" +# 5399 "setup.ml" module InternalConfigurePlugin = struct (* # 22 "src/plugins/internal/InternalConfigurePlugin.ml" *) @@ -6247,7 +6245,7 @@ module InternalInstallPlugin = struct end -# 6250 "setup.ml" +# 6248 "setup.ml" module OCamlbuildCommon = struct (* # 22 "src/plugins/ocamlbuild/OCamlbuildCommon.ml" *) @@ -6305,6 +6303,11 @@ module OCamlbuildCommon = struct else []; + if bool_of_string (tests ()) then + ["-tag"; "tests"] + else + []; + if bool_of_string (profile ()) then ["-tag"; "profile"] else @@ -6620,7 +6623,7 @@ module OCamlbuildDocPlugin = struct end -# 6623 "setup.ml" +# 6626 "setup.ml" open OASISTypes;; let setup_t = @@ -6655,7 +6658,7 @@ let setup_t = alpha_features = ["pure_interface"; "compiled_setup_ml"]; beta_features = []; name = "tyxml"; - version = "3.2.1"; + version = "3.3.0"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -6745,7 +6748,11 @@ let setup_t = bs_install = [(OASISExpr.EBool true, true)]; bs_path = "lib"; bs_compiled_object = Best; - bs_build_depends = [FindlibPackage ("str", None)]; + bs_build_depends = + [ + FindlibPackage ("str", None); + FindlibPackage ("uutf", None) + ]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; @@ -6782,7 +6789,7 @@ let setup_t = bs_install = [(OASISExpr.EBool true, true)]; bs_path = "lib"; bs_compiled_object = Best; - bs_build_depends = []; + bs_build_depends = [FindlibPackage ("uutf", None)]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; @@ -6925,8 +6932,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = - Some "\167\206\238@\214\216S}\255\132,\132\015\012\011\027"; + oasis_digest = Some "/V\255V\170\176\b\196k\227\178\026+\158\224\254"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -6934,6 +6940,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 6938 "setup.ml" +# 6944 "setup.ml" (* OASIS_STOP *) let () = setup ();; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ocaml-maint/packages/tyxml.git _______________________________________________ Pkg-ocaml-maint-commits mailing list Pkg-ocaml-maint-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-ocaml-maint-commits