This is an automated email from the git hooks/post-receive script.

mehdi pushed a commit to branch master
in repository tyxml.

commit f36354ddf0c9b4f4f60feb91f9b908fbec85436b
Author: Mehdi Dogguy <me...@debian.org>
Date:   Sun Oct 18 11:16:08 2015 +0200

    Imported Upstream version 3.3.0
---
 .merlin           |  2 ++
 CHANGES           |  4 ++++
 _oasis            |  5 ++--
 _tags             |  3 ++-
 lib/META          | 13 ++++++-----
 lib/xml_print.ml  | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xml_print.mli | 32 +++++++++++++++++++++++++
 setup.ml          | 54 +++++++++++++++++++++++-------------------
 8 files changed, 150 insertions(+), 33 deletions(-)

diff --git a/.merlin b/.merlin
index 453a22c..4b95b98 100644
--- a/.merlin
+++ b/.merlin
@@ -4,3 +4,5 @@ S syntax
 B _build
 B _build/lib
 B _build/syntax
+
+PKG uutf
\ No newline at end of file
diff --git a/CHANGES b/CHANGES
index 82bc0d0..a1269e4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,9 @@
 ===== dev ====
 
+===== 3.3.0 ====
+
+  * Add `Xml_print.Utf8` to encode html elements to utf8 properly.
+
 ===== 3.2.1 ====
 
   * Add signature functors to ease export of module created with the 
functorial interface.
diff --git a/_oasis b/_oasis
index 675e323..7dedb20 100644
--- a/_oasis
+++ b/_oasis
@@ -1,6 +1,6 @@
 OASISFormat: 0.4
 Name: tyxml
-Version: 3.2.1
+Version: 3.3.0
 Homepage: http://ocsigen.org/tyxml/
 Authors:
   Thorsten Ohl,
@@ -45,7 +45,7 @@ Library tyxml
     Xml_print,
     Svg_f,
     Html5_f
-  BuildDepends: str
+  BuildDepends: str, uutf
 
 Library tyxml_f
   FindlibName: functor
@@ -64,6 +64,7 @@ Library tyxml_f
     Html5_sigs,
     Html5_types,
     Html5_f
+  BuildDepends: uutf
 
 Library pa_tyxml
   Build$: flag(syntax)
diff --git a/_tags b/_tags
index b8b2305..0f090b9 100644
--- a/_tags
+++ b/_tags
@@ -1,5 +1,5 @@
 # OASIS_START
-# DO NOT EDIT (digest: a8b2c1c0d619924a93a606114a692f33)
+# DO NOT EDIT (digest: 9b16479512c40fcf68605f52a13f8d62)
 # Ignore VCS directories, you can use the same kind of rule outside
 # OASIS_START/STOP if you want to exclude directories that contains
 # useless stuff for the build process
@@ -18,6 +18,7 @@
 <lib/*.ml{,i}>: pkg_str
 # Library tyxml_f
 "lib/tyxml_f.cmxs": use_tyxml_f
+<lib/*.ml{,i}>: pkg_uutf
 # Library pa_tyxml
 "syntax/pa_tyxml.cmxs": use_pa_tyxml
 <syntax/*.ml{,i}>: pkg_camlp4
diff --git a/lib/META b/lib/META
index 844a010..44d9fba 100644
--- a/lib/META
+++ b/lib/META
@@ -1,15 +1,15 @@
 # OASIS_START
-# DO NOT EDIT (digest: a6fd6e6521a0512852d47b4b8b60917f)
-version = "3.2.1"
+# DO NOT EDIT (digest: c97aacbecdef7d92d60ce4f8423803b4)
+version = "3.3.0"
 description = "HTML5 pages typed with polymorphic variants"
-requires = "str"
+requires = "str uutf"
 archive(byte) = "tyxml.cma"
 archive(byte, plugin) = "tyxml.cma"
 archive(native) = "tyxml.cmxa"
 archive(native, plugin) = "tyxml.cmxs"
 exists_if = "tyxml.cma"
 package "syntax" (
- version = "3.2.1"
+ version = "3.3.0"
  description = "HTML5 and SVG syntax extension"
  requires = "camlp4"
  archive(syntax, preprocessor) = "pa_tyxml.cma"
@@ -21,7 +21,7 @@ package "syntax" (
 )
 
 package "parser" (
- version = "3.2.1"
+ version = "3.3.0"
  description = "Simple XML parser"
  requires = "camlp4.lib"
  archive(byte) = "tymlx_p.cma"
@@ -32,9 +32,10 @@ package "parser" (
 )
 
 package "functor" (
- version = "3.2.1"
+ version = "3.3.0"
  description =
  "HTML5 pages typed with polymorphic variants (Functor version)"
+ requires = "uutf"
  archive(byte) = "tyxml_f.cma"
  archive(byte, plugin) = "tyxml_f.cma"
  archive(native) = "tyxml_f.cmxa"
diff --git a/lib/xml_print.ml b/lib/xml_print.ml
index 6e19134..65a0d16 100644
--- a/lib/xml_print.ml
+++ b/lib/xml_print.ml
@@ -90,6 +90,76 @@ let string_of_number v =
         then s2
         else  Printf.sprintf "%.18g" v
 
+
+module Utf8 = struct
+  type utf8 = string
+  type encoding = [ `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 | `US_ASCII | 
`ISO_8859_1]
+  let normalize_from ~encoding src =
+    let warn = ref false in
+    let rec loop d e = match Uutf.decode d with
+      | `Uchar _ as u -> ignore (Uutf.encode e u); loop d e
+      | `End -> ignore (Uutf.encode e `End)
+      | `Malformed _ -> ignore (Uutf.encode e (`Uchar Uutf.u_rep)); 
warn:=true;loop d e
+      | `Await -> assert false
+    in
+    let d = Uutf.decoder ~encoding (`String src) in
+    let buffer = Buffer.create (String.length src) in
+    let e = Uutf.encoder `UTF_8 (`Buffer buffer) in
+    loop d e;
+    Buffer.contents buffer, !warn
+
+  let normalize src = normalize_from ~encoding:`UTF_8 src
+
+  let normalize_html src =
+    let warn = ref false in
+    let str e s =
+      for i = 0 to String.length s - 1 do
+        ignore (Uutf.encode e (`Uchar (Char.code s.[i])))
+      done in
+    let rec loop d e = match Uutf.decode d with
+      | `Uchar 34 -> str e "&quot;"; loop d e
+      | `Uchar 38 -> str e "&amp;"; loop d e
+      | `Uchar 60 -> str e "&lt;"; loop d e
+      | `Uchar 62 -> str e "&gt;"; loop d e
+      | `Uchar code as u ->
+        let u =
+          (* Illegal characters in html
+             http://en.wikipedia.org/wiki/Character_encodings_in_HTML
+             http://www.w3.org/TR/html5/syntax.html *)
+          if (* A. control C0 *)
+            (code <= 31 && code <> 9 && code <> 10 && code <> 13)
+            (* B. DEL + control C1
+               - invalid in html
+               - discouraged in xml;
+                 exept 0x85 see http://www.w3.org/TR/newline
+                 but let's discard it anyway *)
+            || (code >= 127 && code <= 159)
+            (* C. UTF-16 surrogate halves : already discarded by uutf *)
+            (* || (code >= 0xD800 && code <= 0xDFFF) *)
+            (* D. BOOM related *)
+            || code land 0xFFFF = 0xFFFE
+            || code land 0xFFFF = 0xFFFF
+
+          then (warn:=true;`Uchar Uutf.u_rep)
+          else u in
+        ignore (Uutf.encode e u);
+        loop d e
+      | `End -> ignore (Uutf.encode e `End)
+      | `Malformed _ ->
+        ignore (Uutf.encode e (`Uchar Uutf.u_rep));
+        warn:=true;
+        loop d e
+      | `Await -> assert false
+    in
+    let d = Uutf.decoder ~encoding:`UTF_8 (`String src) in
+    let buffer = Buffer.create (String.length src) in
+    let e = Uutf.encoder `UTF_8 (`Buffer buffer) in
+    loop d e;
+    Buffer.contents buffer, !warn
+
+end
+
+
 module Make
     (Xml : Xml_sigs.Iterable)
     (F : sig val emptytags : string list end)
diff --git a/lib/xml_print.mli b/lib/xml_print.mli
index cdbf606..f254231 100644
--- a/lib/xml_print.mli
+++ b/lib/xml_print.mli
@@ -45,6 +45,38 @@ val compose_doctype : string -> string list -> string
 val string_of_number : float -> string
 (** Convert a float to a string using a compact representation compatible with 
Javascript norme. *)
 
+(** Utf8 normalizer and encoder for HTML.
+
+Given a module [Htmlprinter] produced by one of the functors in {!Xml_print}, 
this modules is used as following:
+  {[
+    let encode x = fst (Utf8.normalize_html x) in
+    Htmlprinter.print ~encode document
+  ]} *)
+module Utf8 : sig
+
+  type utf8 = string
+  (** [normalize str] take a possibly invalid utf-8 string
+      and return a valid utf-8 string
+      where invalid bytes have been replaced by
+      the replacement character [U+FFFD].
+      The returned boolean is true if invalid bytes were found *)
+  val normalize : string -> utf8 * bool
+
+  (** Same as [normalize] plus some extra work :
+      It encode '<' , '>' , '"' , '&' characters with
+      corresponding entities and replaced invalid html
+      character by [U+FFFD] *)
+  val normalize_html : string -> utf8 * bool
+
+  type encoding = [ `UTF_16 | `UTF_16BE | `UTF_16LE | `UTF_8 | `US_ASCII | 
`ISO_8859_1]
+
+  (** [normalize_from ~encoding str] convert the string [str] into an uft-8 
string.
+      It assumes the [encoding] encoding and replace invalid bytes by
+      the replacement character [U+FFFD].
+      The returned boolean is true if invalid bytes were found *)
+  val normalize_from : encoding:[<encoding] -> string -> utf8 * bool
+end
+
 module Make
     (Xml : Xml_sigs.Iterable)
     (I : sig val emptytags : string list end)
diff --git a/setup.ml b/setup.ml
index bc535f5..67af621 100644
--- a/setup.ml
+++ b/setup.ml
@@ -1,7 +1,7 @@
 (* setup.ml generated for the first time by OASIS v0.3.0 *)
 
 (* OASIS_START *)
-(* DO NOT EDIT (digest: 3aded4e69a60f79123e2280b32078d22) *)
+(* DO NOT EDIT (digest: 100e0c44c6c608ca72769bcd6b2a90c8) *)
 (*
    Regenerated by OASIS v0.4.5
    Visit http://oasis.forge.ocamlcore.org for more information and
@@ -242,11 +242,9 @@ module OASISString = struct
 
 
   let replace_chars f s =
-    let buf = String.make (String.length s) 'X' in
-      for i = 0 to String.length s - 1 do
-        buf.[i] <- f s.[i]
-      done;
-      buf
+    let buf = Buffer.create (String.length s) in
+    String.iter (fun c -> Buffer.add_char buf (f c)) s;
+    Buffer.contents buf
 
 
 end
@@ -2506,13 +2504,13 @@ module OASISFindlib = struct
     in
 
     let library_name_of_findlib_name =
-      Lazy.lazy_from_fun
-        (fun () ->
-           (* Revert findlib_name_of_library_name. *)
-           MapString.fold
-             (fun k v mp -> MapString.add v k mp)
-             fndlb_name_of_lib_name
-             MapString.empty)
+      lazy begin
+        (* Revert findlib_name_of_library_name. *)
+        MapString.fold
+          (fun k v mp -> MapString.add v k mp)
+          fndlb_name_of_lib_name
+          MapString.empty
+      end
     in
     let library_name_of_findlib_name fndlb_nm =
       try
@@ -2882,7 +2880,7 @@ module OASISFileUtil = struct
 end
 
 
-# 2885 "setup.ml"
+# 2883 "setup.ml"
 module BaseEnvLight = struct
 (* # 22 "src/base/BaseEnvLight.ml" *)
 
@@ -2987,7 +2985,7 @@ module BaseEnvLight = struct
 end
 
 
-# 2990 "setup.ml"
+# 2988 "setup.ml"
 module BaseContext = struct
 (* # 22 "src/base/BaseContext.ml" *)
 
@@ -5398,7 +5396,7 @@ module BaseSetup = struct
 end
 
 
-# 5401 "setup.ml"
+# 5399 "setup.ml"
 module InternalConfigurePlugin = struct
 (* # 22 "src/plugins/internal/InternalConfigurePlugin.ml" *)
 
@@ -6247,7 +6245,7 @@ module InternalInstallPlugin = struct
 end
 
 
-# 6250 "setup.ml"
+# 6248 "setup.ml"
 module OCamlbuildCommon = struct
 (* # 22 "src/plugins/ocamlbuild/OCamlbuildCommon.ml" *)
 
@@ -6305,6 +6303,11 @@ module OCamlbuildCommon = struct
         else
           [];
 
+        if bool_of_string (tests ()) then
+          ["-tag"; "tests"]
+        else
+          [];
+
         if bool_of_string (profile ()) then
           ["-tag"; "profile"]
         else
@@ -6620,7 +6623,7 @@ module OCamlbuildDocPlugin = struct
 end
 
 
-# 6623 "setup.ml"
+# 6626 "setup.ml"
 open OASISTypes;;
 
 let setup_t =
@@ -6655,7 +6658,7 @@ let setup_t =
           alpha_features = ["pure_interface"; "compiled_setup_ml"];
           beta_features = [];
           name = "tyxml";
-          version = "3.2.1";
+          version = "3.3.0";
           license =
             OASISLicense.DEP5License
               (OASISLicense.DEP5Unit
@@ -6745,7 +6748,11 @@ let setup_t =
                       bs_install = [(OASISExpr.EBool true, true)];
                       bs_path = "lib";
                       bs_compiled_object = Best;
-                      bs_build_depends = [FindlibPackage ("str", None)];
+                      bs_build_depends =
+                        [
+                           FindlibPackage ("str", None);
+                           FindlibPackage ("uutf", None)
+                        ];
                       bs_build_tools = [ExternalTool "ocamlbuild"];
                       bs_c_sources = [];
                       bs_data_files = [];
@@ -6782,7 +6789,7 @@ let setup_t =
                       bs_install = [(OASISExpr.EBool true, true)];
                       bs_path = "lib";
                       bs_compiled_object = Best;
-                      bs_build_depends = [];
+                      bs_build_depends = [FindlibPackage ("uutf", None)];
                       bs_build_tools = [ExternalTool "ocamlbuild"];
                       bs_c_sources = [];
                       bs_data_files = [];
@@ -6925,8 +6932,7 @@ let setup_t =
        };
      oasis_fn = Some "_oasis";
      oasis_version = "0.4.5";
-     oasis_digest =
-       Some "\167\206\238@\214\216S}\255\132,\132\015\012\011\027";
+     oasis_digest = Some "/V\255V\170\176\b\196k\227\178\026+\158\224\254";
      oasis_exec = None;
      oasis_setup_args = [];
      setup_update = false
@@ -6934,6 +6940,6 @@ let setup_t =
 
 let setup () = BaseSetup.setup setup_t;;
 
-# 6938 "setup.ml"
+# 6944 "setup.ml"
 (* OASIS_STOP *)
 let () = setup ();;

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/pkg-ocaml-maint/packages/tyxml.git

_______________________________________________
Pkg-ocaml-maint-commits mailing list
Pkg-ocaml-maint-commits@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-ocaml-maint-commits

Reply via email to