svn commit: r1405742 [3/4] - in /subversion/upstream/utf8proc: ./ pgsql/ ruby/ ruby/gem/

brane Mon, 05 Nov 2012 02:55:32 -0800

Added: subversion/upstream/utf8proc/pgsql/utf8proc.sql
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/pgsql/utf8proc.sql?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/pgsql/utf8proc.sql (added)
+++ subversion/upstream/utf8proc/pgsql/utf8proc.sql Mon Nov  5 10:54:56 2012
@@ -0,0 +1,6 @@
+CREATE OR REPLACE FUNCTION unifold (text) RETURNS text
+  LANGUAGE 'C' IMMUTABLE STRICT AS '$libdir/utf8proc_pgsql.so',
+  'utf8proc_pgsql_unifold';
+CREATE OR REPLACE FUNCTION unistrip (text) RETURNS text
+  LANGUAGE 'C' IMMUTABLE STRICT AS '$libdir/utf8proc_pgsql.so',
+  'utf8proc_pgsql_unistrip';


Added: subversion/upstream/utf8proc/pgsql/utf8proc_pgsql.c
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/pgsql/utf8proc_pgsql.c?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/pgsql/utf8proc_pgsql.c (added)
+++ subversion/upstream/utf8proc/pgsql/utf8proc_pgsql.c Mon Nov  5 10:54:56 2012
@@ -0,0 +1,139 @@
+/*
+ *  Copyright (c) Public Software Group e. V., Berlin, Germany
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *  DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ *  File name:    pgsql/utf8proc_pgsql.c
+ *
+ *  Description:
+ *  PostgreSQL extension to provide two functions 'unifold' and 'unistrip',
+ *  which can be used to case-fold and normalize index fields and
+ *  optionally strip marks (e.g. accents) from strings.
+ */
+
+
+#include "../utf8proc.c"
+
+#include <postgres.h>
+#include <utils/elog.h>
+#include <fmgr.h>
+#include <string.h>
+#include <unistd.h>
+#include <utils/builtins.h>
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+#define UTF8PROC_PGSQL_FOLD_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
+  UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
+  UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP )
+#define UTF8PROC_PGSQL_STRIP_OPTS ( UTF8PROC_REJECTNA | UTF8PROC_COMPAT | \
+  UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | \
+  UTF8PROC_NLF2LF | UTF8PROC_CASEFOLD | UTF8PROC_LUMP | UTF8PROC_STRIPMARK )
+
+ssize_t utf8proc_pgsql_utf8map(
+  text *input_string, text **output_string_ptr, int options
+) {
+  ssize_t result;
+  text *output_string;
+  result = utf8proc_decompose(
+    VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
+    NULL, 0, options
+  );
+  if (result < 0) return result;
+  if (result > (SIZE_MAX-1-VARHDRSZ)/sizeof(int32_t))
+    return UTF8PROC_ERROR_OVERFLOW;
+  /* reserve one extra byte for termination */
+  *output_string_ptr = palloc(result * sizeof(int32_t) + 1 + VARHDRSZ);
+  output_string = *output_string_ptr;
+  if (!output_string) return UTF8PROC_ERROR_NOMEM;
+  result = utf8proc_decompose(
+    VARDATA(input_string), VARSIZE(input_string) - VARHDRSZ,
+    (int32_t *)VARDATA(output_string), result, options
+  );
+  if (result < 0) return result;
+  result = utf8proc_reencode(
+    (int32_t *)VARDATA(output_string), result, options
+  );
+  if (result >= 0) SET_VARSIZE(output_string, result + VARHDRSZ);
+  return result;
+}
+
+void utf8proc_pgsql_utf8map_errchk(ssize_t result, text *output_string) {
+  if (result < 0) {
+    int sqlerrcode;
+    if (output_string) pfree(output_string);
+    switch(result) {
+      case UTF8PROC_ERROR_NOMEM:
+      sqlerrcode = ERRCODE_OUT_OF_MEMORY; break;
+      case UTF8PROC_ERROR_OVERFLOW:
+      sqlerrcode = ERRCODE_PROGRAM_LIMIT_EXCEEDED; break;
+      case UTF8PROC_ERROR_INVALIDUTF8:
+      case UTF8PROC_ERROR_NOTASSIGNED:
+      return;
+      default:
+      sqlerrcode = ERRCODE_INTERNAL_ERROR;
+    }
+    ereport(ERROR, (
+      errcode(sqlerrcode),
+      errmsg("%s", utf8proc_errmsg(result))
+    ));
+  }
+}
+
+PG_FUNCTION_INFO_V1(utf8proc_pgsql_unifold);
+Datum utf8proc_pgsql_unifold(PG_FUNCTION_ARGS) {
+  text *input_string;
+  text *output_string = NULL;
+  ssize_t result;
+  input_string = PG_GETARG_TEXT_P(0);
+  result = utf8proc_pgsql_utf8map(
+    input_string, &output_string, UTF8PROC_PGSQL_FOLD_OPTS
+  );
+  PG_FREE_IF_COPY(input_string, 0);
+  utf8proc_pgsql_utf8map_errchk(result, output_string);
+  if (result >= 0) {
+    PG_RETURN_TEXT_P(output_string);
+  } else {
+    PG_RETURN_NULL();
+  }
+}
+
+PG_FUNCTION_INFO_V1(utf8proc_pgsql_unistrip);
+Datum utf8proc_pgsql_unistrip(PG_FUNCTION_ARGS) {
+  text *input_string;
+  text *output_string = NULL;
+  ssize_t result;
+  input_string = PG_GETARG_TEXT_P(0);
+  result = utf8proc_pgsql_utf8map(
+    input_string, &output_string, UTF8PROC_PGSQL_STRIP_OPTS
+  );
+  PG_FREE_IF_COPY(input_string, 0);
+  utf8proc_pgsql_utf8map_errchk(result, output_string);
+  if (result >= 0) {
+    PG_RETURN_TEXT_P(output_string);
+  } else {
+    PG_RETURN_NULL();
+  }
+}
+

Added: subversion/upstream/utf8proc/ruby/extconf.rb
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/ruby/extconf.rb?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/ruby/extconf.rb (added)
+++ subversion/upstream/utf8proc/ruby/extconf.rb Mon Nov  5 10:54:56 2012
@@ -0,0 +1,2 @@
+require 'mkmf'
+create_makefile("utf8proc_native")

Added: subversion/upstream/utf8proc/ruby/gem/LICENSE
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/ruby/gem/LICENSE?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/ruby/gem/LICENSE (added)
+++ subversion/upstream/utf8proc/ruby/gem/LICENSE Mon Nov  5 10:54:56 2012
@@ -0,0 +1,64 @@
+
+Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+This software distribution contains derived data from a modified version of
+the Unicode data files. The following license applies to that data:
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
+under the Terms of Use in http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of the Unicode data files and any associated documentation (the "Data
+Files") or Unicode software and any associated documentation (the
+"Software") to deal in the Data Files or Software without restriction,
+including without limitation the rights to use, copy, modify, merge,
+publish, distribute, and/or sell copies of the Data Files or Software, and
+to permit persons to whom the Data Files or Software are furnished to do
+so, provided that (a) the above copyright notice(s) and this permission
+notice appear with all copies of the Data Files or Software, (b) both the
+above copyright notice(s) and this permission notice appear in associated
+documentation, and (c) there is clear notice in each modified Data File or
+in the Software as well as in the documentation associated with the Data
+File(s) or Software that the data or software has been modified.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
+INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in these Data Files or Software without prior written
+authorization of the copyright holder.
+
+
+Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
+registered in some jurisdictions. All other trademarks and registered
+trademarks mentioned herein are the property of their respective owners.
+

Added: subversion/upstream/utf8proc/ruby/gem/utf8proc.gemspec
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/ruby/gem/utf8proc.gemspec?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/ruby/gem/utf8proc.gemspec (added)
+++ subversion/upstream/utf8proc/ruby/gem/utf8proc.gemspec Mon Nov  5 10:54:56 
2012
@@ -0,0 +1,12 @@
+require 'rubygems'
+SPEC = Gem::Specification.new do |s|
+  s.name = 'utf8proc'
+  s.version = '1.1.5'
+  s.author = 'Public Software Group e. V., Berlin, Germany'
+  s.homepage = 'http://www.public-software-group.org/utf8proc'
+  s.summary = 'UTF-8 Unicode string processing'
+  s.files = ['LICENSE', 'lib/utf8proc.rb', 'ext/utf8proc_native.c']
+  s.require_path = 'lib/'
+  s.extensions = ['ext/extconf.rb']
+  s.has_rdoc = false
+end

Added: subversion/upstream/utf8proc/ruby/utf8proc.rb
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/ruby/utf8proc.rb?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/ruby/utf8proc.rb (added)
+++ subversion/upstream/utf8proc/ruby/utf8proc.rb Mon Nov  5 10:54:56 2012
@@ -0,0 +1,98 @@
+#  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in
+#  all copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+#  DEALINGS IN THE SOFTWARE.
+
+
+#
+#  File name:    ruby/utf8proc.rb
+#
+#  Description:
+#  Part of the ruby wrapper for libutf8proc, which is written in ruby.
+#
+
+
+require 'utf8proc_native'
+
+
+module Utf8Proc
+
+  SpecialChars = {
+    :HT => "\x09",
+    :LF => "\x0A",
+    :VT => "\x0B",
+    :FF => "\x0C",
+    :CR => "\x0D",
+    :FS => "\x1C",
+    :GS => "\x1D",
+    :RS => "\x1E",
+    :US => "\x1F",
+    :LS => "\xE2\x80\xA8",
+    :PS => "\xE2\x80\xA9",
+  }
+
+  module StringExtensions
+    def utf8map(*option_array)
+      options = 0
+      option_array.each do |option|
+        flag = Utf8Proc::Options[option]
+        raise ArgumentError, "Unknown argument given to String#utf8map." unless
+          flag
+        options |= flag
+      end
+      return Utf8Proc::utf8map(self, options)
+    end
+    def utf8map!(*option_array)
+      self.replace(self.utf8map(*option_array))
+    end
+    def utf8nfd;   utf8map( :stable, :decompose); end
+    def utf8nfd!;  utf8map!(:stable, :decompose); end
+    def utf8nfc;   utf8map( :stable, :compose); end
+    def utf8nfc!;  utf8map!(:stable, :compose); end
+    def utf8nfkd;  utf8map( :stable, :decompose, :compat); end
+    def utf8nfkd!; utf8map!(:stable, :decompose, :compat); end
+    def utf8nfkc;  utf8map( :stable, :compose, :compat); end
+    def utf8nfkc!; utf8map!(:stable, :compose, :compat); end
+    def utf8chars
+      result = self.utf8map(:charbound).split("\377")
+      result.shift if result.first == ""
+      result
+    end
+    def char_ary
+      # depecated, use String#utf8chars instead
+      utf8chars
+    end
+  end
+
+  module IntegerExtensions
+    def utf8
+      return Utf8Proc::utf8char(self)
+    end
+  end
+
+end
+
+
+class String
+  include(Utf8Proc::StringExtensions)
+end
+
+class Integer
+  include(Utf8Proc::IntegerExtensions)
+end
+

Added: subversion/upstream/utf8proc/ruby/utf8proc_native.c
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/ruby/utf8proc_native.c?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/ruby/utf8proc_native.c (added)
+++ subversion/upstream/utf8proc/ruby/utf8proc_native.c Mon Nov  5 10:54:56 2012
@@ -0,0 +1,160 @@
+/*
+ *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *  DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ *  File name:    ruby/utf8proc_native.c
+ *
+ *  Description:
+ *  Native part of the ruby wrapper for libutf8proc.
+ */
+
+
+#include "../utf8proc.c"
+#include "ruby.h"
+
+#ifndef RSTRING_PTR
+#define RSTRING_PTR(s) (RSTRING(s)->ptr)
+#endif
+#ifndef RSTRING_LEN
+#define RSTRING_LEN(s) (RSTRING(s)->len)
+#endif
+
+typedef struct utf8proc_ruby_mapenv_struct {
+  int32_t *buffer;
+} utf8proc_ruby_mapenv_t;
+
+void utf8proc_ruby_mapenv_free(utf8proc_ruby_mapenv_t *env) {
+  free(env->buffer);
+  free(env);
+}
+
+VALUE utf8proc_ruby_module;
+VALUE utf8proc_ruby_options;
+VALUE utf8proc_ruby_eUnicodeError;
+VALUE utf8proc_ruby_eInvalidUtf8Error;
+VALUE utf8proc_ruby_eCodeNotAssignedError;
+
+VALUE utf8proc_ruby_map_error(ssize_t result) {
+  VALUE excpt_class;
+  switch (result) {
+    case UTF8PROC_ERROR_NOMEM:
+    excpt_class = rb_eNoMemError; break;
+    case UTF8PROC_ERROR_OVERFLOW:
+    case UTF8PROC_ERROR_INVALIDOPTS:
+    excpt_class = rb_eArgError; break;
+    case UTF8PROC_ERROR_INVALIDUTF8:
+    excpt_class = utf8proc_ruby_eInvalidUtf8Error; break;
+    case UTF8PROC_ERROR_NOTASSIGNED:
+    excpt_class = utf8proc_ruby_eCodeNotAssignedError; break;
+    default:
+    excpt_class = rb_eRuntimeError;
+  }
+  rb_raise(excpt_class, "%s", utf8proc_errmsg(result));
+  return Qnil;
+}
+
+VALUE utf8proc_ruby_map(VALUE self, VALUE str_param, VALUE options_param) {
+  VALUE str;
+  int options;
+  VALUE env_obj;
+  utf8proc_ruby_mapenv_t *env;
+  ssize_t result;
+  VALUE retval;
+  str = StringValue(str_param);
+  options = NUM2INT(options_param) & ~UTF8PROC_NULLTERM;
+  env_obj = Data_Make_Struct(rb_cObject, utf8proc_ruby_mapenv_t, NULL,
+    utf8proc_ruby_mapenv_free, env);
+  result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
+    NULL, 0, options);
+  if (result < 0) {
+    utf8proc_ruby_map_error(result);
+    return Qnil;  /* needed to prevent problems with optimization */
+  }
+  env->buffer = ALLOC_N(int32_t, result+1);
+  result = utf8proc_decompose(RSTRING_PTR(str), RSTRING_LEN(str),
+    env->buffer, result, options);
+  if (result < 0) {
+    free(env->buffer);
+    env->buffer = 0;
+    utf8proc_ruby_map_error(result);
+    return Qnil;  /* needed to prevent problems with optimization */
+  }
+  result = utf8proc_reencode(env->buffer, result, options);
+  if (result < 0) {
+    free(env->buffer);
+    env->buffer = 0;
+    utf8proc_ruby_map_error(result);
+    return Qnil;  /* needed to prevent problems with optimization */
+  }
+  retval = rb_str_new((char *)env->buffer, result);
+  free(env->buffer);
+  env->buffer = 0;
+  return retval;
+}
+
+static VALUE utf8proc_ruby_char(VALUE self, VALUE code_param) {
+  char buffer[4];
+  ssize_t result;
+  int uc;
+  uc = NUM2INT(code_param);
+  if (!utf8proc_codepoint_valid(uc))
+    rb_raise(rb_eArgError, "Invalid Unicode code point");
+  result = utf8proc_encode_char(uc, buffer);
+  return rb_str_new(buffer, result);
+}
+
+#define register_utf8proc_option(sym, field) \
+  rb_hash_aset(utf8proc_ruby_options, ID2SYM(rb_intern(sym)), INT2FIX(field))
+
+void Init_utf8proc_native() {
+  utf8proc_ruby_module = rb_define_module("Utf8Proc");
+  rb_define_module_function(utf8proc_ruby_module, "utf8map",
+    utf8proc_ruby_map, 2);
+  rb_define_module_function(utf8proc_ruby_module, "utf8char",
+    utf8proc_ruby_char, 1);
+  utf8proc_ruby_eUnicodeError = rb_define_class_under(utf8proc_ruby_module,
+    "UnicodeError", rb_eStandardError);
+  utf8proc_ruby_eInvalidUtf8Error = rb_define_class_under(
+    utf8proc_ruby_module, "InvalidUtf8Error", utf8proc_ruby_eUnicodeError);
+  utf8proc_ruby_eCodeNotAssignedError = rb_define_class_under(
+    utf8proc_ruby_module, "CodeNotAssignedError",
+    utf8proc_ruby_eUnicodeError);
+  utf8proc_ruby_options = rb_hash_new();
+  register_utf8proc_option("stable",    UTF8PROC_STABLE);
+  register_utf8proc_option("compat",    UTF8PROC_COMPAT);
+  register_utf8proc_option("compose",   UTF8PROC_COMPOSE);
+  register_utf8proc_option("decompose", UTF8PROC_DECOMPOSE);
+  register_utf8proc_option("ignore",    UTF8PROC_IGNORE);
+  register_utf8proc_option("rejectna",  UTF8PROC_REJECTNA);
+  register_utf8proc_option("nlf2ls",    UTF8PROC_NLF2LS);
+  register_utf8proc_option("nlf2ps",    UTF8PROC_NLF2PS);
+  register_utf8proc_option("nlf2lf",    UTF8PROC_NLF2LF);
+  register_utf8proc_option("stripcc",   UTF8PROC_STRIPCC);
+  register_utf8proc_option("casefold",  UTF8PROC_CASEFOLD);
+  register_utf8proc_option("charbound", UTF8PROC_CHARBOUND);
+  register_utf8proc_option("lump",      UTF8PROC_LUMP);
+  register_utf8proc_option("stripmark", UTF8PROC_STRIPMARK);
+  OBJ_FREEZE(utf8proc_ruby_options);
+  rb_define_const(utf8proc_ruby_module, "Options", utf8proc_ruby_options);
+}
+

Added: subversion/upstream/utf8proc/utf8proc.c
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/utf8proc.c?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/utf8proc.c (added)
+++ subversion/upstream/utf8proc/utf8proc.c Mon Nov  5 10:54:56 2012
@@ -0,0 +1,587 @@
+/*
+ *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *  DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ *  This library contains derived data from a modified version of the
+ *  Unicode data files.
+ *
+ *  The original data files are available at
+ *  http://www.unicode.org/Public/UNIDATA/
+ *
+ *  Please notice the copyright statement in the file "utf8proc_data.c".
+ */
+
+
+/*
+ *  File name:    utf8proc.c
+ *
+ *  Description:
+ *  Implementation of libutf8proc.
+ */
+
+
+#include "utf8proc.h"
+#include "utf8proc_data.c"
+
+
+const int8_t utf8proc_utf8class[256] = {
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+#define UTF8PROC_HANGUL_SBASE 0xAC00
+#define UTF8PROC_HANGUL_LBASE 0x1100
+#define UTF8PROC_HANGUL_VBASE 0x1161
+#define UTF8PROC_HANGUL_TBASE 0x11A7
+#define UTF8PROC_HANGUL_LCOUNT 19
+#define UTF8PROC_HANGUL_VCOUNT 21
+#define UTF8PROC_HANGUL_TCOUNT 28
+#define UTF8PROC_HANGUL_NCOUNT 588
+#define UTF8PROC_HANGUL_SCOUNT 11172
+/* END is exclusive */
+#define UTF8PROC_HANGUL_L_START  0x1100
+#define UTF8PROC_HANGUL_L_END    0x115A
+#define UTF8PROC_HANGUL_L_FILLER 0x115F
+#define UTF8PROC_HANGUL_V_START  0x1160
+#define UTF8PROC_HANGUL_V_END    0x11A3
+#define UTF8PROC_HANGUL_T_START  0x11A8
+#define UTF8PROC_HANGUL_T_END    0x11FA
+#define UTF8PROC_HANGUL_S_START  0xAC00
+#define UTF8PROC_HANGUL_S_END    0xD7A4
+
+
+#define UTF8PROC_BOUNDCLASS_START    0
+#define UTF8PROC_BOUNDCLASS_OTHER    1
+#define UTF8PROC_BOUNDCLASS_CR       2
+#define UTF8PROC_BOUNDCLASS_LF       3
+#define UTF8PROC_BOUNDCLASS_CONTROL  4
+#define UTF8PROC_BOUNDCLASS_EXTEND   5
+#define UTF8PROC_BOUNDCLASS_L        6
+#define UTF8PROC_BOUNDCLASS_V        7
+#define UTF8PROC_BOUNDCLASS_T        8
+#define UTF8PROC_BOUNDCLASS_LV       9
+#define UTF8PROC_BOUNDCLASS_LVT     10
+
+
+const char *utf8proc_version(void) {
+  return "1.1.5";
+}
+
+const char *utf8proc_errmsg(ssize_t errcode) {
+  switch (errcode) {
+    case UTF8PROC_ERROR_NOMEM:
+    return "Memory for processing UTF-8 data could not be allocated.";
+    case UTF8PROC_ERROR_OVERFLOW:
+    return "UTF-8 string is too long to be processed.";
+    case UTF8PROC_ERROR_INVALIDUTF8:
+    return "Invalid UTF-8 string";
+    case UTF8PROC_ERROR_NOTASSIGNED:
+    return "Unassigned Unicode code point found in UTF-8 string.";
+    case UTF8PROC_ERROR_INVALIDOPTS:
+    return "Invalid options for UTF-8 processing chosen.";
+    default:
+    return "An unknown error occured while processing UTF-8 data.";
+  }
+}
+
+ssize_t utf8proc_iterate(
+  const uint8_t *str, ssize_t strlen, int32_t *dst
+) {
+  int length;
+  int i;
+  int32_t uc = -1;
+  *dst = -1;
+  if (!strlen) return 0;
+  length = utf8proc_utf8class[str[0]];
+  if (!length) return UTF8PROC_ERROR_INVALIDUTF8;
+  if (strlen >= 0 && length > strlen) return UTF8PROC_ERROR_INVALIDUTF8;
+  for (i=1; i<length; i++) {
+    if ((str[i] & 0xC0) != 0x80) return UTF8PROC_ERROR_INVALIDUTF8;
+  }
+  switch (length) {
+    case 1:
+    uc = str[0];
+    break;
+    case 2:
+    uc = ((str[0] & 0x1F) <<  6) + (str[1] & 0x3F);
+    if (uc < 0x80) uc = -1;
+    break;
+    case 3:
+    uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) <<  6)
+      + (str[2] & 0x3F);
+    if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
+      (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+    break;
+    case 4:
+    uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
+      + ((str[2] & 0x3F) <<  6) + (str[3] & 0x3F);
+    if (uc < 0x10000 || uc >= 0x110000) uc = -1;
+    break;
+  }
+  if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
+    return UTF8PROC_ERROR_INVALIDUTF8;
+  *dst = uc;
+  return length;
+}
+
+bool utf8proc_codepoint_valid(int32_t uc) {
+  if (uc < 0 || uc >= 0x110000 ||
+    ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) ||
+    (uc >= 0xFDD0 && uc < 0xFDF0)) return false;
+  else return true;
+}
+
+ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) {
+  if (uc < 0x00) {
+    return 0;
+  } else if (uc < 0x80) {
+    dst[0] = uc;
+    return 1;
+  } else if (uc < 0x800) {
+    dst[0] = 0xC0 + (uc >> 6);
+    dst[1] = 0x80 + (uc & 0x3F);
+    return 2;
+  } else if (uc == 0xFFFF) {
+    dst[0] = 0xFF;
+    return 1;
+  } else if (uc == 0xFFFE) {
+    dst[0] = 0xFE;
+    return 1;
+  } else if (uc < 0x10000) {
+    dst[0] = 0xE0 + (uc >> 12);
+    dst[1] = 0x80 + ((uc >> 6) & 0x3F);
+    dst[2] = 0x80 + (uc & 0x3F);
+    return 3;
+  } else if (uc < 0x110000) {
+    dst[0] = 0xF0 + (uc >> 18);
+    dst[1] = 0x80 + ((uc >> 12) & 0x3F);
+    dst[2] = 0x80 + ((uc >> 6) & 0x3F);
+    dst[3] = 0x80 + (uc & 0x3F);
+    return 4;
+  } else return 0;
+}
+
+const utf8proc_property_t *utf8proc_get_property(int32_t uc) {
+  /* ASSERT: uc >= 0 && uc < 0x110000 */
+  return utf8proc_properties + (
+    utf8proc_stage2table[
+      utf8proc_stage1table[uc >> 8] + (uc & 0xFF)
+    ]
+  );
+}
+
+#define utf8proc_decompose_lump(replacement_uc) \
+  return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
+  options & ~UTF8PROC_LUMP, last_boundclass)
+
+ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize,
+    int options, int *last_boundclass) {
+  /* ASSERT: uc >= 0 && uc < 0x110000 */
+  const utf8proc_property_t *property;
+  utf8proc_propval_t category;
+  int32_t hangul_sindex;
+  property = utf8proc_get_property(uc);
+  category = property->category;
+  hangul_sindex = uc - UTF8PROC_HANGUL_SBASE;
+  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
+    if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT) {
+      int32_t hangul_tindex;
+      if (bufsize >= 1) {
+        dst[0] = UTF8PROC_HANGUL_LBASE +
+          hangul_sindex / UTF8PROC_HANGUL_NCOUNT;
+        if (bufsize >= 2) dst[1] = UTF8PROC_HANGUL_VBASE +
+          (hangul_sindex % UTF8PROC_HANGUL_NCOUNT) / UTF8PROC_HANGUL_TCOUNT;
+      }
+      hangul_tindex = hangul_sindex % UTF8PROC_HANGUL_TCOUNT;
+      if (!hangul_tindex) return 2;
+      if (bufsize >= 3) dst[2] = UTF8PROC_HANGUL_TBASE + hangul_tindex;
+      return 3;
+    }
+  }
+  if (options & UTF8PROC_REJECTNA) {
+    if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
+  }
+  if (options & UTF8PROC_IGNORE) {
+    if (property->ignorable) return 0;
+  }
+  if (options & UTF8PROC_LUMP) {
+    if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
+    if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
+      utf8proc_decompose_lump(0x0027);
+    if (category == UTF8PROC_CATEGORY_PD || uc == 0x2212)
+      utf8proc_decompose_lump(0x002D);
+    if (uc == 0x2044 || uc == 0x2215) utf8proc_decompose_lump(0x002F);
+    if (uc == 0x2236) utf8proc_decompose_lump(0x003A);
+    if (uc == 0x2039 || uc == 0x2329 || uc == 0x3008)
+      utf8proc_decompose_lump(0x003C);
+    if (uc == 0x203A || uc == 0x232A || uc == 0x3009)
+      utf8proc_decompose_lump(0x003E);
+    if (uc == 0x2216) utf8proc_decompose_lump(0x005C);
+    if (uc == 0x02C4 || uc == 0x02C6 || uc == 0x2038 || uc == 0x2303)
+      utf8proc_decompose_lump(0x005E);
+    if (category == UTF8PROC_CATEGORY_PC || uc == 0x02CD)
+      utf8proc_decompose_lump(0x005F);
+    if (uc == 0x02CB) utf8proc_decompose_lump(0x0060);
+    if (uc == 0x2223) utf8proc_decompose_lump(0x007C);
+    if (uc == 0x223C) utf8proc_decompose_lump(0x007E);
+    if ((options & UTF8PROC_NLF2LS) && (options & UTF8PROC_NLF2PS)) {
+      if (category == UTF8PROC_CATEGORY_ZL ||
+          category == UTF8PROC_CATEGORY_ZP)
+        utf8proc_decompose_lump(0x000A);
+    }
+  }
+  if (options & UTF8PROC_STRIPMARK) {
+    if (category == UTF8PROC_CATEGORY_MN ||
+      category == UTF8PROC_CATEGORY_MC ||
+      category == UTF8PROC_CATEGORY_ME) return 0;
+  }
+  if (options & UTF8PROC_CASEFOLD) {
+    if (property->casefold_mapping) {
+      const int32_t *casefold_entry;
+      ssize_t written = 0;
+      for (casefold_entry = property->casefold_mapping;
+          *casefold_entry >= 0; casefold_entry++) {
+        written += utf8proc_decompose_char(*casefold_entry, dst+written,
+          (bufsize > written) ? (bufsize - written) : 0, options,
+          last_boundclass);
+        if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
+      }
+      return written;
+    }
+  }
+  if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) {
+    if (property->decomp_mapping &&
+        (!property->decomp_type || (options & UTF8PROC_COMPAT))) {
+      const int32_t *decomp_entry;
+      ssize_t written = 0;
+      for (decomp_entry = property->decomp_mapping;
+          *decomp_entry >= 0; decomp_entry++) {
+        written += utf8proc_decompose_char(*decomp_entry, dst+written,
+          (bufsize > written) ? (bufsize - written) : 0, options,
+        last_boundclass);
+        if (written < 0) return UTF8PROC_ERROR_OVERFLOW;
+      }
+      return written;
+    }
+  }
+  if (options & UTF8PROC_CHARBOUND) {
+    bool boundary;
+    int tbc, lbc;
+    tbc =
+      (uc == 0x000D) ? UTF8PROC_BOUNDCLASS_CR :
+      (uc == 0x000A) ? UTF8PROC_BOUNDCLASS_LF :
+      ((category == UTF8PROC_CATEGORY_ZL ||
+        category == UTF8PROC_CATEGORY_ZP ||
+        category == UTF8PROC_CATEGORY_CC ||
+        category == UTF8PROC_CATEGORY_CF) &&
+        !(uc == 0x200C || uc == 0x200D)) ? UTF8PROC_BOUNDCLASS_CONTROL :
+      property->extend ? UTF8PROC_BOUNDCLASS_EXTEND :
+      ((uc >= UTF8PROC_HANGUL_L_START && uc < UTF8PROC_HANGUL_L_END) ||
+        uc == UTF8PROC_HANGUL_L_FILLER) ? UTF8PROC_BOUNDCLASS_L :
+      (uc >= UTF8PROC_HANGUL_V_START && uc < UTF8PROC_HANGUL_V_END) ?
+        UTF8PROC_BOUNDCLASS_V :
+      (uc >= UTF8PROC_HANGUL_T_START && uc < UTF8PROC_HANGUL_T_END) ?
+        UTF8PROC_BOUNDCLASS_T :
+      (uc >= UTF8PROC_HANGUL_S_START && uc < UTF8PROC_HANGUL_S_END) ? (
+        ((uc-UTF8PROC_HANGUL_SBASE) % UTF8PROC_HANGUL_TCOUNT == 0) ?
+          UTF8PROC_BOUNDCLASS_LV : UTF8PROC_BOUNDCLASS_LVT
+      ) :
+      UTF8PROC_BOUNDCLASS_OTHER;
+    lbc = *last_boundclass;
+    boundary =
+      (tbc == UTF8PROC_BOUNDCLASS_EXTEND) ? false :
+      (lbc == UTF8PROC_BOUNDCLASS_START) ? true :
+      (lbc == UTF8PROC_BOUNDCLASS_CR &&
+       tbc == UTF8PROC_BOUNDCLASS_LF) ? false :
+      (lbc == UTF8PROC_BOUNDCLASS_CONTROL) ? true :
+      (tbc == UTF8PROC_BOUNDCLASS_CONTROL) ? true :
+      (lbc == UTF8PROC_BOUNDCLASS_L &&
+       (tbc == UTF8PROC_BOUNDCLASS_L ||
+        tbc == UTF8PROC_BOUNDCLASS_V ||
+        tbc == UTF8PROC_BOUNDCLASS_LV ||
+        tbc == UTF8PROC_BOUNDCLASS_LVT)) ? false :
+      ((lbc == UTF8PROC_BOUNDCLASS_LV ||
+        lbc == UTF8PROC_BOUNDCLASS_V) &&
+       (tbc == UTF8PROC_BOUNDCLASS_V ||
+        tbc == UTF8PROC_BOUNDCLASS_T)) ? false :
+      ((lbc == UTF8PROC_BOUNDCLASS_LVT ||
+        lbc == UTF8PROC_BOUNDCLASS_T) &&
+       tbc == UTF8PROC_BOUNDCLASS_T) ? false :
+       true;
+    *last_boundclass = tbc;
+    if (boundary) {
+      if (bufsize >= 1) dst[0] = 0xFFFF;
+      if (bufsize >= 2) dst[1] = uc;
+      return 2;
+    }
+  }
+  if (bufsize >= 1) *dst = uc;
+  return 1;
+}
+
+ssize_t utf8proc_decompose(
+  const uint8_t *str, ssize_t strlen,
+  int32_t *buffer, ssize_t bufsize, int options
+) {
+  /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */
+  ssize_t wpos = 0;
+  if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE))
+    return UTF8PROC_ERROR_INVALIDOPTS;
+  if ((options & UTF8PROC_STRIPMARK) &&
+      !(options & UTF8PROC_COMPOSE) && !(options & UTF8PROC_DECOMPOSE))
+    return UTF8PROC_ERROR_INVALIDOPTS;
+  {
+    int32_t uc;
+    ssize_t rpos = 0;
+    ssize_t decomp_result;
+    int boundclass = UTF8PROC_BOUNDCLASS_START;
+    while (1) {
+      if (options & UTF8PROC_NULLTERM) {
+        rpos += utf8proc_iterate(str + rpos, -1, &uc);
+        /* checking of return value is not neccessary,
+           as 'uc' is < 0 in case of error */
+        if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
+        if (rpos < 0) return UTF8PROC_ERROR_OVERFLOW;
+        if (uc == 0) break;
+      } else {
+        if (rpos >= strlen) break;
+        rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc);
+        if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8;
+      }
+      decomp_result = utf8proc_decompose_char(
+        uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options,
+        &boundclass
+      );
+      if (decomp_result < 0) return decomp_result;
+      wpos += decomp_result;
+      /* prohibiting integer overflows due to too long strings: */
+      if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
+        return UTF8PROC_ERROR_OVERFLOW;
+    }
+  }
+  if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) {
+    ssize_t pos = 0;
+    while (pos < wpos-1) {
+      int32_t uc1, uc2;
+      const utf8proc_property_t *property1, *property2;
+      uc1 = buffer[pos];
+      uc2 = buffer[pos+1];
+      property1 = utf8proc_get_property(uc1);
+      property2 = utf8proc_get_property(uc2);
+      if (property1->combining_class > property2->combining_class &&
+          property2->combining_class > 0) {
+        buffer[pos] = uc2;
+        buffer[pos+1] = uc1;
+        if (pos > 0) pos--; else pos++;
+      } else {
+        pos++;
+      }
+    }
+  }
+  return wpos;
+}
+
+ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
+  /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
+     ASSERT: 'buffer' has one spare byte of free space at the end! */
+  if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
+    ssize_t rpos;
+    ssize_t wpos = 0;
+    int32_t uc;
+    for (rpos = 0; rpos < length; rpos++) {
+      uc = buffer[rpos];
+      if (uc == 0x000D && rpos < length-1 && buffer[rpos+1] == 0x000A) rpos++;
+      if (uc == 0x000A || uc == 0x000D || uc == 0x0085 ||
+          ((options & UTF8PROC_STRIPCC) && (uc == 0x000B || uc == 0x000C))) {
+        if (options & UTF8PROC_NLF2LS) {
+          if (options & UTF8PROC_NLF2PS) {
+            buffer[wpos++] = 0x000A;
+          } else {
+            buffer[wpos++] = 0x2028;
+          }
+        } else {
+          if (options & UTF8PROC_NLF2PS) {
+            buffer[wpos++] = 0x2029;
+          } else {
+            buffer[wpos++] = 0x0020;
+          }
+        }
+      } else if ((options & UTF8PROC_STRIPCC) &&
+          (uc < 0x0020 || (uc >= 0x007F && uc < 0x00A0))) {
+        if (uc == 0x0009) buffer[wpos++] = 0x0020;
+      } else {
+        buffer[wpos++] = uc;
+      }
+    }
+    length = wpos;
+  }
+  if (options & UTF8PROC_COMPOSE) {
+    int32_t *starter = NULL;
+    int32_t current_char;
+    const utf8proc_property_t *starter_property = NULL, *current_property;
+    utf8proc_propval_t max_combining_class = -1;
+    ssize_t rpos;
+    ssize_t wpos = 0;
+    int32_t composition;
+    for (rpos = 0; rpos < length; rpos++) {
+      current_char = buffer[rpos];
+      current_property = utf8proc_get_property(current_char);
+      if (starter && current_property->combining_class > max_combining_class) {
+        /* combination perhaps possible */
+        int32_t hangul_lindex;
+        int32_t hangul_sindex;
+        hangul_lindex = *starter - UTF8PROC_HANGUL_LBASE;
+        if (hangul_lindex >= 0 && hangul_lindex < UTF8PROC_HANGUL_LCOUNT) {
+          int32_t hangul_vindex;
+          hangul_vindex = current_char - UTF8PROC_HANGUL_VBASE;
+          if (hangul_vindex >= 0 && hangul_vindex < UTF8PROC_HANGUL_VCOUNT) {
+            *starter = UTF8PROC_HANGUL_SBASE +
+              (hangul_lindex * UTF8PROC_HANGUL_VCOUNT + hangul_vindex) *
+              UTF8PROC_HANGUL_TCOUNT;
+            starter_property = NULL;
+            continue;
+          }
+        }
+        hangul_sindex = *starter - UTF8PROC_HANGUL_SBASE;
+        if (hangul_sindex >= 0 && hangul_sindex < UTF8PROC_HANGUL_SCOUNT &&
+            (hangul_sindex % UTF8PROC_HANGUL_TCOUNT) == 0) {
+          int32_t hangul_tindex;
+          hangul_tindex = current_char - UTF8PROC_HANGUL_TBASE;
+          if (hangul_tindex >= 0 && hangul_tindex < UTF8PROC_HANGUL_TCOUNT) {
+            *starter += hangul_tindex;
+            starter_property = NULL;
+            continue;
+          }
+        }
+        if (!starter_property) {
+          starter_property = utf8proc_get_property(*starter);
+        }
+        if (starter_property->comb1st_index >= 0 &&
+            current_property->comb2nd_index >= 0) {
+          composition = utf8proc_combinations[
+            starter_property->comb1st_index +
+            current_property->comb2nd_index
+          ];
+          if (composition >= 0 && (!(options & UTF8PROC_STABLE) ||
+              !(utf8proc_get_property(composition)->comp_exclusion))) {
+            *starter = composition;
+            starter_property = NULL;
+            continue;
+          }
+        }
+      }
+      buffer[wpos] = current_char;
+      if (current_property->combining_class) {
+        if (current_property->combining_class > max_combining_class) {
+          max_combining_class = current_property->combining_class;
+        }
+      } else {
+        starter = buffer + wpos;
+        starter_property = NULL;
+        max_combining_class = -1;
+      }
+      wpos++;
+    }
+    length = wpos;
+  }
+  {
+    ssize_t rpos, wpos = 0;
+    int32_t uc;
+    for (rpos = 0; rpos < length; rpos++) {
+      uc = buffer[rpos];
+      wpos += utf8proc_encode_char(uc, ((uint8_t *)buffer) + wpos);
+    }
+    ((uint8_t *)buffer)[wpos] = 0;
+    return wpos;
+  }
+}
+
+ssize_t utf8proc_map(
+  const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
+) {
+  int32_t *buffer;
+  ssize_t result;
+  *dstptr = NULL;
+  result = utf8proc_decompose(str, strlen, NULL, 0, options);
+  if (result < 0) return result;
+  buffer = malloc(result * sizeof(int32_t) + 1);
+  if (!buffer) return UTF8PROC_ERROR_NOMEM;
+  result = utf8proc_decompose(str, strlen, buffer, result, options);
+  if (result < 0) {
+    free(buffer);
+    return result;
+  }
+  result = utf8proc_reencode(buffer, result, options);
+  if (result < 0) {
+    free(buffer);
+    return result;
+  }
+  {
+    int32_t *newptr;
+    newptr = realloc(buffer, (size_t)result+1);
+    if (newptr) buffer = newptr;
+  }
+  *dstptr = (uint8_t *)buffer;
+  return result;
+}
+
+uint8_t *utf8proc_NFD(const uint8_t *str) {
+  uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_DECOMPOSE);
+  return retval;
+}
+
+uint8_t *utf8proc_NFC(const uint8_t *str) {
+  uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_COMPOSE);
+  return retval;
+}
+
+uint8_t *utf8proc_NFKD(const uint8_t *str) {
+  uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
+  return retval;
+}
+
+uint8_t *utf8proc_NFKC(const uint8_t *str) {
+  uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
+  return retval;
+}
+

Added: subversion/upstream/utf8proc/utf8proc.h
URL: 
http://svn.apache.org/viewvc/subversion/upstream/utf8proc/utf8proc.h?rev=1405742&view=auto
==============================================================================
--- subversion/upstream/utf8proc/utf8proc.h (added)
+++ subversion/upstream/utf8proc/utf8proc.h Mon Nov  5 10:54:56 2012
@@ -0,0 +1,385 @@
+/*
+ *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *  DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ *  File name:    utf8proc.h
+ *
+ *  Description:
+ *  Header files for libutf8proc, which is a mapping tool for UTF-8 strings
+ *  with following features:
+ *  - decomposing and composing of strings
+ *  - replacing compatibility characters with their equivalents
+ *  - stripping of "default ignorable characters"
+ *    like SOFT-HYPHEN or ZERO-WIDTH-SPACE
+ *  - folding of certain characters for string comparison
+ *    (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-")
+ *    (see "LUMP" option)
+ *  - optional rejection of strings containing non-assigned code points
+ *  - stripping of control characters
+ *  - stripping of character marks (accents, etc.)
+ *  - transformation of LF, CRLF, CR and NEL to line-feed (LF)
+ *    or to the unicode chararacters for paragraph separation (PS)
+ *    or line separation (LS).
+ *  - unicode case folding (for case insensitive string comparisons)
+ *  - rejection of illegal UTF-8 data
+ *    (i.e. UTF-8 encoded UTF-16 surrogates)
+ *  - support for korean hangul characters
+ *  Unicode Version 5.0.0 is supported.
+ */
+
+
+#ifndef UTF8PROC_H
+#define UTF8PROC_H
+
+
+#include <stdlib.h>
+#include <sys/types.h>
+#ifdef _MSC_VER
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef int int32_t;
+#ifdef _WIN64
+#define ssize_t __int64
+#else
+#define ssize_t int
+#endif
+typedef unsigned char bool;
+enum {false, true};
+#else
+#include <stdbool.h>
+#include <inttypes.h>
+#endif
+#include <limits.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef SSIZE_MAX
+#define SSIZE_MAX ((size_t)SIZE_MAX/2)
+#endif
+
+#define UTF8PROC_NULLTERM  (1<<0)
+#define UTF8PROC_STABLE    (1<<1)
+#define UTF8PROC_COMPAT    (1<<2)
+#define UTF8PROC_COMPOSE   (1<<3)
+#define UTF8PROC_DECOMPOSE (1<<4)
+#define UTF8PROC_IGNORE    (1<<5)
+#define UTF8PROC_REJECTNA  (1<<6)
+#define UTF8PROC_NLF2LS    (1<<7)
+#define UTF8PROC_NLF2PS    (1<<8)
+#define UTF8PROC_NLF2LF    (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS)
+#define UTF8PROC_STRIPCC   (1<<9)
+#define UTF8PROC_CASEFOLD  (1<<10)
+#define UTF8PROC_CHARBOUND (1<<11)
+#define UTF8PROC_LUMP      (1<<12)
+#define UTF8PROC_STRIPMARK (1<<13)
+/*
+ *  Flags being regarded by several functions in the library:
+ *  NULLTERM:  The given UTF-8 input is NULL terminated.
+ *  STABLE:    Unicode Versioning Stability has to be respected.
+ *  COMPAT:    Compatiblity decomposition
+ *             (i.e. formatting information is lost)
+ *  COMPOSE:   Return a result with composed characters.
+ *  DECOMPOSE: Return a result with decomposed characters.
+ *  IGNORE:    Strip "default ignorable characters"
+ *  REJECTNA:  Return an error, if the input contains unassigned
+ *             code points.
+ *  NLF2LS:    Indicating that NLF-sequences (LF, CRLF, CR, NEL) are
+ *             representing a line break, and should be converted to the
+ *             unicode character for line separation (LS).
+ *  NLF2PS:    Indicating that NLF-sequences are representing a paragraph
+ *             break, and should be converted to the unicode character for
+ *             paragraph separation (PS).
+ *  NLF2LF:    Indicating that the meaning of NLF-sequences is unknown.
+ *  STRIPCC:   Strips and/or convers control characters.
+ *             NLF-sequences are transformed into space, except if one of
+ *             the NLF2LS/PS/LF options is given.
+ *             HorizontalTab (HT) and FormFeed (FF) are treated as a
+ *             NLF-sequence in this case.
+ *             All other control characters are simply removed.
+ *  CASEFOLD:  Performs unicode case folding, to be able to do a
+ *             case-insensitive string comparison.
+ *  CHARBOUND: Inserts 0xFF bytes at the beginning of each sequence which
+ *             is representing a single grapheme cluster (see UAX#29).
+ *  LUMP:      Lumps certain characters together
+ *             (e.g. HYPHEN U+2010 and MINUS U+2212 to ASCII "-").
+ *             (See lump.txt for details.)
+ *             If NLF2LF is set, this includes a transformation of
+ *             paragraph and line separators to ASCII line-feed (LF).
+ *  STRIPMARK: Strips all character markings
+ *             (non-spacing, spacing and enclosing) (i.e. accents)
+ *             NOTE: this option works only with COMPOSE or DECOMPOSE
+ */
+
+#define UTF8PROC_ERROR_NOMEM -1
+#define UTF8PROC_ERROR_OVERFLOW -2
+#define UTF8PROC_ERROR_INVALIDUTF8 -3
+#define UTF8PROC_ERROR_NOTASSIGNED -4
+#define UTF8PROC_ERROR_INVALIDOPTS -5
+/*
+ *  Error codes being returned by almost all functions:
+ *  ERROR_NOMEM:       Memory could not be allocated.
+ *  ERROR_OVERFLOW:    The given string is too long to be processed.
+ *  ERROR_INVALIDUTF8: The given string is not a legal UTF-8 string.
+ *  ERROR_NOTASSIGNED: The REJECTNA flag was set,
+ *                     and an unassigned code point was found.
+ *  ERROR_INVALIDOPTS: Invalid options have been used.
+ */
+
+typedef int16_t utf8proc_propval_t;
+typedef struct utf8proc_property_struct {
+  utf8proc_propval_t category;
+  utf8proc_propval_t combining_class;
+  utf8proc_propval_t bidi_class;
+  utf8proc_propval_t decomp_type;
+  const int32_t *decomp_mapping;
+  unsigned bidi_mirrored:1;
+  int32_t uppercase_mapping;
+  int32_t lowercase_mapping;
+  int32_t titlecase_mapping;
+  int32_t comb1st_index;
+  int32_t comb2nd_index;
+  unsigned comp_exclusion:1;
+  unsigned ignorable:1;
+  unsigned control_boundary:1;
+  unsigned extend:1;
+  const int32_t *casefold_mapping;
+} utf8proc_property_t;
+
+#define UTF8PROC_CATEGORY_LU  1
+#define UTF8PROC_CATEGORY_LL  2
+#define UTF8PROC_CATEGORY_LT  3
+#define UTF8PROC_CATEGORY_LM  4
+#define UTF8PROC_CATEGORY_LO  5
+#define UTF8PROC_CATEGORY_MN  6
+#define UTF8PROC_CATEGORY_MC  7
+#define UTF8PROC_CATEGORY_ME  8
+#define UTF8PROC_CATEGORY_ND  9
+#define UTF8PROC_CATEGORY_NL 10
+#define UTF8PROC_CATEGORY_NO 11
+#define UTF8PROC_CATEGORY_PC 12
+#define UTF8PROC_CATEGORY_PD 13
+#define UTF8PROC_CATEGORY_PS 14
+#define UTF8PROC_CATEGORY_PE 15
+#define UTF8PROC_CATEGORY_PI 16
+#define UTF8PROC_CATEGORY_PF 17
+#define UTF8PROC_CATEGORY_PO 18
+#define UTF8PROC_CATEGORY_SM 19
+#define UTF8PROC_CATEGORY_SC 20
+#define UTF8PROC_CATEGORY_SK 21
+#define UTF8PROC_CATEGORY_SO 22
+#define UTF8PROC_CATEGORY_ZS 23
+#define UTF8PROC_CATEGORY_ZL 24
+#define UTF8PROC_CATEGORY_ZP 25
+#define UTF8PROC_CATEGORY_CC 26
+#define UTF8PROC_CATEGORY_CF 27
+#define UTF8PROC_CATEGORY_CS 28
+#define UTF8PROC_CATEGORY_CO 29
+#define UTF8PROC_CATEGORY_CN 30
+#define UTF8PROC_BIDI_CLASS_L    1
+#define UTF8PROC_BIDI_CLASS_LRE  2
+#define UTF8PROC_BIDI_CLASS_LRO  3
+#define UTF8PROC_BIDI_CLASS_R    4
+#define UTF8PROC_BIDI_CLASS_AL   5
+#define UTF8PROC_BIDI_CLASS_RLE  6
+#define UTF8PROC_BIDI_CLASS_RLO  7
+#define UTF8PROC_BIDI_CLASS_PDF  8
+#define UTF8PROC_BIDI_CLASS_EN   9
+#define UTF8PROC_BIDI_CLASS_ES  10
+#define UTF8PROC_BIDI_CLASS_ET  11
+#define UTF8PROC_BIDI_CLASS_AN  12
+#define UTF8PROC_BIDI_CLASS_CS  13
+#define UTF8PROC_BIDI_CLASS_NSM 14
+#define UTF8PROC_BIDI_CLASS_BN  15
+#define UTF8PROC_BIDI_CLASS_B   16
+#define UTF8PROC_BIDI_CLASS_S   17
+#define UTF8PROC_BIDI_CLASS_WS  18
+#define UTF8PROC_BIDI_CLASS_ON  19
+#define UTF8PROC_DECOMP_TYPE_FONT      1
+#define UTF8PROC_DECOMP_TYPE_NOBREAK   2
+#define UTF8PROC_DECOMP_TYPE_INITIAL   3
+#define UTF8PROC_DECOMP_TYPE_MEDIAL    4
+#define UTF8PROC_DECOMP_TYPE_FINAL     5
+#define UTF8PROC_DECOMP_TYPE_ISOLATED  6
+#define UTF8PROC_DECOMP_TYPE_CIRCLE    7
+#define UTF8PROC_DECOMP_TYPE_SUPER     8
+#define UTF8PROC_DECOMP_TYPE_SUB       9
+#define UTF8PROC_DECOMP_TYPE_VERTICAL 10
+#define UTF8PROC_DECOMP_TYPE_WIDE     11
+#define UTF8PROC_DECOMP_TYPE_NARROW   12
+#define UTF8PROC_DECOMP_TYPE_SMALL    13
+#define UTF8PROC_DECOMP_TYPE_SQUARE   14
+#define UTF8PROC_DECOMP_TYPE_FRACTION 15
+#define UTF8PROC_DECOMP_TYPE_COMPAT   16
+
+extern const int8_t utf8proc_utf8class[256];
+
+const char *utf8proc_version(void);
+
+const char *utf8proc_errmsg(ssize_t errcode);
+/*
+ *  Returns a static error string for the given error code.
+ */
+
+ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *dst);
+/*
+ *  Reads a single char from the UTF-8 sequence being pointed to by 'str'.
+ *  The maximum number of bytes read is 'strlen', unless 'strlen' is
+ *  negative.
+ *  If a valid unicode char could be read, it is stored in the variable
+ *  being pointed to by 'dst', otherwise that variable will be set to -1.
+ *  In case of success the number of bytes read is returned, otherwise a
+ *  negative error code is returned.
+ */
+
+bool utf8proc_codepoint_valid(int32_t uc);
+/*
+ *  Returns 1, if the given unicode code-point is valid, otherwise 0.
+ */
+
+ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst);
+/*
+ *  Encodes the unicode char with the code point 'uc' as an UTF-8 string in
+ *  the byte array being pointed to by 'dst'. This array has to be at least
+ *  4 bytes long.
+ *  In case of success the number of bytes written is returned,
+ *  otherwise 0.
+ *  This function does not check if 'uc' is a valid unicode code point.
+ */
+
+const utf8proc_property_t *utf8proc_get_property(int32_t uc);
+/*
+ *  Returns a pointer to a (constant) struct containing information about
+ *  the unicode char with the given code point 'uc'.
+ *  If the character is not existent a pointer to a special struct is
+ *  returned, where 'category' is a NULL pointer.
+ *  WARNING: The parameter 'uc' has to be in the range of 0x0000 to
+ *           0x10FFFF, otherwise the program might crash!
+ */
+
+ssize_t utf8proc_decompose_char(
+  int32_t uc, int32_t *dst, ssize_t bufsize,
+  int options, int *last_boundclass
+);
+/*
+ *  Writes a decomposition of the unicode char 'uc' into the array being
+ *  pointed to by 'dst'.
+ *  Following flags in the 'options' field are regarded:
+ *  REJECTNA:  an unassigned unicode code point leads to an error
+ *  IGNORE:    "default ignorable" chars are stripped
+ *  CASEFOLD:  unicode casefolding is applied
+ *  COMPAT:    replace certain characters with their
+ *             compatibility decomposition
+ *  CHARBOUND: Inserts 0xFF bytes before each grapheme cluster
+ *  LUMP:      lumps certain different characters together
+ *  STRIPMARK: removes all character marks
+ *  The pointer 'last_boundclass' has to point to an integer variable which
+ *  is storing the last character boundary class, if the CHARBOUND option
+ *  is used.
+ *  In case of success the number of chars written is returned,
+ *  in case of an error, a negative error code is returned.
+ *  If the number of written chars would be bigger than 'bufsize',
+ *  the buffer (up to 'bufsize') has inpredictable data, and the needed
+ *  buffer size is returned.
+ *  WARNING: The parameter 'uc' has to be in the range of 0x0000 to
+ *           0x10FFFF, otherwise the program might crash!
+ */
+
+ssize_t utf8proc_decompose(
+  const uint8_t *str, ssize_t strlen,
+  int32_t *buffer, ssize_t bufsize, int options
+);
+/*
+ *  Does the same as 'utf8proc_decompose_char', but acts on a whole UTF-8
+ *  string, and orders the decomposed sequences correctly.
+ *  If the NULLTERM flag in 'options' is set, processing will be stopped,
+ *  when a NULL byte is encounted, otherwise 'strlen' bytes are processed.
+ *  The result in form of unicode code points is written into the buffer
+ *  being pointed to by 'buffer', having the length of 'bufsize' entries.
+ *  In case of success the number of chars written is returned,
+ *  in case of an error, a negative error code is returned.
+ *  If the number of written chars would be bigger than 'bufsize',
+ *  the buffer (up to 'bufsize') has inpredictable data, and the needed
+ *  buffer size is returned.
+ */
+
+ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options);
+/*
+ *  Reencodes the sequence of unicode characters given by the pointer
+ *  'buffer' and 'length' as UTF-8.
+ *  The result is stored in the same memory area where the data is read.
+ *  Following flags in the 'options' field are regarded:
+ *  NLF2LS:  converts LF, CRLF, CR and NEL into LS
+ *  NLF2PS:  converts LF, CRLF, CR and NEL into PS
+ *  NLF2LF:  converts LF, CRLF, CR and NEL into LF
+ *  STRIPCC: strips or converts all non-affected control characters
+ *  COMPOSE: tries to combine decomposed characters into composite
+ *           characters
+ *  STABLE:  prohibits combining characters which would violate
+ *           the unicode versioning stability
+ *  In case of success the length of the resulting UTF-8 string is
+ *  returned, otherwise a negative error code is returned.
+ *  WARNING: The amount of free space being pointed to by 'buffer', has to
+ *           exceed the amount of the input data by one byte, and the
+ *           entries of the array pointed to by 'str' have to be in the
+ *           range of 0x0000 to 0x10FFFF, otherwise the program might
+ *           crash!
+ */
+
+ssize_t utf8proc_map(
+  const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
+);
+/*
+ *  Maps the given UTF-8 string being pointed to by 'str' to a new UTF-8
+ *  string, which is allocated dynamically, and afterwards pointed to by
+ *  the pointer being pointed to by 'dstptr'.
+ *  If the NULLTERM flag in the 'options' field is set, the length is
+ *  determined by a NULL terminator, otherwise the parameter 'strlen' is
+ *  evaluated to determine the string length, but in any case the result
+ *  will be NULL terminated (though it might contain NULL characters
+ *  before). Other flags in the 'options' field are passed to the functions
+ *  defined above, and regarded as described.
+ *  In case of success the length of the new string is returned,
+ *  otherwise a negative error code is returned.
+ *  NOTICE: The memory of the new UTF-8 string will have been allocated with
+ *          'malloc', and has theirfore to be freed with 'free'.
+ */
+
+uint8_t *utf8proc_NFD(const uint8_t *str);
+uint8_t *utf8proc_NFC(const uint8_t *str);
+uint8_t *utf8proc_NFKD(const uint8_t *str);
+uint8_t *utf8proc_NFKC(const uint8_t *str);
+/*
+ *  Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
+ *  normalized version of the null-terminated string 'str'.
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+

svn commit: r1405742 [3/4] - in /subversion/upstream/utf8proc: ./ pgsql/ ruby/ ruby/gem/

Reply via email to