commit 5e861eff888c9557b9e40d418fa6d0839d2d886a
Author: Joey Adams <joeyadams3.14159@gmail.com>
Date:   Fri Aug 13 04:20:06 2010 -0400

    Added several utility functions/macros to the backend
    
     * getEnumLabelOids:
          streamlined conversion of enum labels to OIDs
     * FN_EXTRA, FN_EXTRA_ALLOC, FN_MCXT:
          macros to cut down on boilerplate when working with
          fcinfo->flinfo->fn_mcxt
     * getTypeInfo:
          wrapper around
          get_type_io_data / fmgr_info_cxt / get_type_category_preferred
          that stores results in a structure called TypeInfo
     * pg_substring, pg_encoding_substring:
          slicing of multibyte-encoded strings
     * server_to_utf8, utf8_to_server:
          convenience routines for converting between the database encoding
          and UTF-8
     * text_to_utf8_cstring, utf8_cstring_to_text, utf8_cstring_to_text_with_len:
          variants of text_to_cstring and company that also convert
          into and out of UTF-8

diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
index 69562db..9259c99 100644
--- a/src/backend/utils/adt/enum.c
+++ b/src/backend/utils/adt/enum.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/namespace.h"
 #include "catalog/pg_enum.h"
 #include "fmgr.h"
 #include "utils/array.h"
@@ -25,6 +26,7 @@
 
 static ArrayType *enum_range_internal(Oid enumtypoid, Oid lower, Oid upper);
 static int	enum_elem_cmp(const void *left, const void *right);
+static int	enum_label_cmp(const void *left, const void *right);
 
 
 /* Basic I/O support */
@@ -412,6 +414,84 @@ enum_range_internal(Oid enumtypoid, Oid lower, Oid upper)
 	return result;
 }
 
+/*
+ * getEnumLabelOids
+ *	  Look up the OIDs of enum labels.	Enum label OIDs are needed to
+ *	  return values of a custom enum type from a C function.
+ *
+ *	  Callers should typically cache the OIDs produced by this function
+ *	  using FN_EXTRA, as retrieving enum label OIDs is somewhat expensive.
+ *
+ *	  Every labels[i].index must be between 0 and count, and oid_out
+ *	  must be allocated to hold count items.  Note that getEnumLabelOids
+ *	  sorts the labels[] array passed to it.
+ *
+ *	  Any labels not found in the enum will have their corresponding
+ *	  oid_out entries set to InvalidOid.
+ *
+ *	  Sample usage:
+ *
+ *	  -- SQL --
+ *	  CREATE TYPE colors AS ENUM ('red', 'green', 'blue');
+ *
+ *	  -- C --
+ *	  enum Colors {RED, GREEN, BLUE, COLOR_COUNT};
+ *
+ *	  static EnumLabel enum_labels[COLOR_COUNT] =
+ *	  {
+ *		  {RED,   "red"},
+ *		  {GREEN, "green"},
+ *		  {BLUE,  "blue"}
+ *	  };
+ *
+ *	  Oid *label_oids = palloc(COLOR_COUNT * sizeof(Oid));
+ *	  getEnumLabelOids("colors", enum_labels, label_oids, COLOR_COUNT);
+ *
+ *	  PG_RETURN_OID(label_oids[GREEN]);
+ */
+void
+getEnumLabelOids(const char *typname, EnumLabel labels[], Oid oid_out[], int count)
+{
+	CatCList   *list;
+	Oid			enumtypoid;
+	int			total;
+	int			i;
+	EnumLabel	key;
+	EnumLabel  *found;
+
+	enumtypoid = TypenameGetTypid(typname);
+	Assert(OidIsValid(enumtypoid));
+
+	qsort(labels, count, sizeof(EnumLabel), enum_label_cmp);
+
+	for (i = 0; i < count; i++)
+	{
+		/* Initialize oid_out items to InvalidOid. */
+		oid_out[i] = InvalidOid;
+
+		/* Make sure EnumLabel indices are in range. */
+		Assert(labels[i].index >= 0 && labels[i].index < count);
+	}
+
+	list = SearchSysCacheList1(ENUMTYPOIDNAME,
+							   ObjectIdGetDatum(enumtypoid));
+	total = list->n_members;
+
+	for (i = 0; i < total; i++)
+	{
+		HeapTuple	tup = &list->members[i]->tuple;
+		Oid			oid = HeapTupleGetOid(tup);
+		Form_pg_enum en = (Form_pg_enum) GETSTRUCT(tup);
+
+		key.label = NameStr(en->enumlabel);
+		found = bsearch(&key, labels, count, sizeof(EnumLabel), enum_label_cmp);
+		if (found != NULL)
+			oid_out[found->index] = oid;
+	}
+
+	ReleaseCatCacheList(list);
+}
+
 /* qsort comparison function for Datums that are OIDs */
 static int
 enum_elem_cmp(const void *left, const void *right)
@@ -425,3 +505,13 @@ enum_elem_cmp(const void *left, const void *right)
 		return 1;
 	return 0;
 }
+
+/* qsort comparison function for EnumLabel entries used by getEnumLabelOids */
+static int
+enum_label_cmp(const void *left, const void *right)
+{
+	const char *l = ((EnumLabel *) left)->label;
+	const char *r = ((EnumLabel *) right)->label;
+
+	return strcmp(l, r);
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 94766cd..c8f23d4 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -177,6 +177,75 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
 		pfree(srcunpacked);
 }
 
+/*
+ * text_to_utf8_cstring
+ *
+ * Just like text_to_cstring, but yields a C string
+ * encoded in UTF-8 instead of the server encoding.
+ */
+char *
+text_to_utf8_cstring(const text *t)
+{
+	/* must cast away the const, just like in text_to_cstring */
+	text	   *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
+	const char *data = VARDATA_ANY(tunpacked);
+	int			len = VARSIZE_ANY_EXHDR(tunpacked);
+	char	   *result;
+
+	result = server_to_utf8(data, len);
+	if (result == data)
+		result = pnstrdup(data, len);
+
+	if (tunpacked != t)
+		pfree(tunpacked);
+
+	return result;
+}
+
+/*
+ * text_to_utf8_cstring
+ *
+ * Just like cstring_to_text, but takes a C string
+ * encoded in UTF-8 instead of the server encoding.
+ */
+text *
+utf8_cstring_to_text(const char *s)
+{
+	return utf8_cstring_to_text_with_len(s, strlen(s));
+}
+
+/*
+ * utf8_cstring_to_text_with_len
+ *
+ * Just like cstring_to_text_with_len, but takes a C string
+ * encoded in UTF-8 instead of the server encoding.
+ *
+ * The input string should not contain null characters.
+ */
+text *
+utf8_cstring_to_text_with_len(const char *s, int len)
+{
+	char	   *cstring;
+	int			cstring_len;
+	text	   *result;
+
+	cstring = utf8_to_server(s, len);
+	if (cstring == s)
+		cstring_len = len;
+	else
+		cstring_len = strlen(cstring);
+
+	result = (text *) palloc(len + VARHDRSZ);
+
+	SET_VARSIZE(result, len + VARHDRSZ);
+	memcpy(VARDATA(result), cstring, cstring_len);
+
+	if (cstring != s)
+		pfree(cstring);
+
+	return result;
+}
+
 
 /*****************************************************************************
  *	 USER I/O ROUTINES														 *
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 19a4a45..5b6b823 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -1844,6 +1844,37 @@ get_type_io_data(Oid typid,
 	ReleaseSysCache(typeTuple);
 }
 
+/*
+ * getTypeInfo
+ *	  Retrieve information about a type, along with either its
+ *	  input, output, binary receive, or binary send procedure.
+ *
+ *	  which_func should be one of:
+ *		  IOFunc_input
+ *		  IOFunc_output
+ *		  IOFunc_receive
+ *		  IOFunc_send
+ *
+ *	  mcxt is the memory context that the IO function will use to
+ *	  store subsidiary data.  It should live at least as long as
+ *	  the TypeInfo structure.
+ */
+void
+getTypeInfo(TypeInfo *d, Oid type, IOFuncSelector which_func, MemoryContext mcxt)
+{
+	d->type = type;
+	d->which_func = which_func;
+	d->mcxt = mcxt;
+
+	get_type_io_data(type, which_func,
+					 &d->typlen, &d->typbyval, &d->typalign,
+					 &d->typdelim, &d->typioparam, &d->typiofunc);
+	fmgr_info_cxt(d->typiofunc, &d->proc, d->mcxt);
+
+	get_type_category_preferred(type,
+								&d->typcategory, &d->typispreferred);
+}
+
 #ifdef NOT_USED
 char
 get_typalign(Oid typid)
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 0995a75..dfd4136 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -568,6 +568,36 @@ pg_server_to_client(const char *s, int len)
 }
 
 /*
+ * server_to_utf8, utf8_to_server
+ *	  Adaptations of pg_do_encoding_conversion for simplifying UTF-8 conversions.
+ *
+ *	  Sometimes, it makes more sense to operate primarily in UTF-8 rather than
+ *	  the server encoding.	For instance, the JSON data type operates in UTF-8
+ *	  because it needs to encode/decode individual characters when dealing with
+ *	  Unicode escapes, but there is no simple and efficient way to do that
+ *	  in the server encoding.
+ *
+ *	  Just like pg_do_encoding_conversion, if no conversion is done,
+ *	  the original pointer given is returned.
+ *
+ *	  These functions are no-ops when the server encoding is UTF-8.
+ */
+
+char *
+server_to_utf8(const char *s, int len)
+{
+	return (char *) pg_do_encoding_conversion(
+				   (unsigned char *) s, len, GetDatabaseEncoding(), PG_UTF8);
+}
+
+char *
+utf8_to_server(const char *s, int len)
+{
+	return (char *) pg_do_encoding_conversion(
+				   (unsigned char *) s, len, PG_UTF8, GetDatabaseEncoding());
+}
+
+/*
  *	Perform default encoding conversion using cached FmgrInfo. Since
  *	this function does not access database at all, it is safe to call
  *	outside transactions.  If the conversion has not been set up by
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 4b98c8b..8c56592 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1390,18 +1390,24 @@ pg_mic_mblen(const unsigned char *mbstr)
 	return pg_mule_mblen(mbstr);
 }
 
+static mblen_converter
+encoding_mblen_converter(int encoding)
+{
+	Assert(PG_VALID_ENCODING(encoding));
+
+	return ((encoding >= 0 &&
+			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
+			pg_wchar_table[encoding].mblen :
+			pg_wchar_table[PG_SQL_ASCII].mblen);
+}
+
 /*
  * Returns the byte length of a multibyte character.
  */
 int
 pg_encoding_mblen(int encoding, const char *mbstr)
 {
-	Assert(PG_VALID_ENCODING(encoding));
-
-	return ((encoding >= 0 &&
-			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
-		((*pg_wchar_table[encoding].mblen) ((const unsigned char *) mbstr)) :
-	((*pg_wchar_table[PG_SQL_ASCII].mblen) ((const unsigned char *) mbstr)));
+	return (*encoding_mblen_converter(encoding)) ((const unsigned char *) mbstr);
 }
 
 /*
@@ -1641,4 +1647,120 @@ report_untranslatable_char(int src_encoding, int dest_encoding,
 			 pg_enc2name_tbl[dest_encoding].name)));
 }
 
+/*
+ * pg_substring
+ *	  Find substring bounds in a string of a given encoding.
+ *
+ *	  The requested start and length are clipped to fit the string.
+ *
+ *	  src and srcbytes:			input string slice
+ *	  start and length:			start and number of characters requested
+ *	  out_start and out_bytes:	substring slice
+ *	  out_length:				number of characters in substring
+ *
+ *	  Unlike the SQL substring function, the start argument
+ *	  of this function is zero-based.
+ *
+ *	  Example (assume UTF-8 all around):
+ *		 const char *in = "⁰ ¹ ² ³"; // "\342\201\260 \302\271 \302\262 \302\263"
+ *		 const char *out_start;
+ *		 int		 out_bytes;
+ *		 int		 out_chars;
+ *
+ *		 pg_encoding_substring(in, strlen(in),
+ *							   2, 100,
+ *							   &out_start, &out_bytes, &out_chars);
+ *
+ *	  out_start will point to the "¹", or "\302\271".
+ *	  out_bytes will be 8.
+ *	  out_chars will be 5.
+ */
+void
+pg_substring(const char *src, int srcbytes,
+			 int start, int length,
+			 const char **out_start, int *out_bytes, int *out_length)
+{
+	pg_encoding_substring(GetDatabaseEncoding(),
+						  src, srcbytes,
+						  start, length,
+						  out_start, out_bytes, out_length);
+}
+
+/*
+ * pg_encoding_substring
+ *	  Find substring bounds in a string of a given encoding.
+ */
+void
+pg_encoding_substring(int encoding,
+					  const char *src, int srcbytes,
+					  int start, int length,
+					  const char **out_start, int *out_bytes, int *out_length)
+{
+	const char *e = src + srcbytes;
+	const char *sub_start;
+	const char *sub_end;
+	int			sub_length;
+	mblen_converter mblen;
+	int			len;
+
+	if (start < 0)
+	{
+		length += start;
+		start = 0;
+	}
+	if (length < 0)
+		length = 0;
+
+	/* optimization for single-byte encoding */
+	if (pg_encoding_max_length(encoding) == 1)
+	{
+		*out_start = src + start;
+		*out_bytes = *out_length = Min(length, srcbytes - start);
+		return;
+	}
+
+	/*
+	 * Get the length callback once so it doesn't have to be looked up every
+	 * time we call it.
+	 */
+	mblen = encoding_mblen_converter(encoding);
+
+	/* Find the beginning of the substring. */
+	sub_start = src;
+	while (start > 0 && sub_start < e)
+	{
+		len = (*mblen) ((const unsigned char *) sub_start);
+
+		if (sub_start + len > e)
+		{
+			Assert(false);		/* Clipped multibyte character */
+			break;
+		}
+
+		sub_start += len;
+		start--;
+	}
+
+	/* Find the end and length of the substring. */
+	sub_end = sub_start;
+	sub_length = 0;
+	while (sub_length < length && sub_end < e)
+	{
+		len = (*mblen) ((const unsigned char *) sub_end);
+
+		if (sub_end + len > e)
+		{
+			Assert(false);		/* Clipped multibyte character */
+			break;
+		}
+
+		sub_end += len;
+		sub_length++;
+	}
+
+	*out_start = sub_start;
+	*out_bytes = sub_end - sub_start;
+	*out_length = sub_length;
+}
+
 #endif
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index c502b96..3d6d0c1 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -544,6 +544,40 @@ extern void **find_rendezvous_variable(const char *varName);
 extern int AggCheckCallContext(FunctionCallInfo fcinfo,
 					MemoryContext *aggcontext);
 
+/*
+ * FN_EXTRA, FN_EXTRA_ALLOC, FN_MCXT
+ *	   Macros for manipulating context preserved across function calls.
+ *
+ *	   FN_EXTRA is typically used for caching lookups and other nontrivial
+ *	   operations across multiple calls of a user-defined function.
+ *
+ *	   Do not use FN_EXTRA in a set-returning function.  Use user_fctx instead.
+ *
+ *	   Typical usage looks like:
+ *
+ *	   my_extra = FN_EXTRA();
+ *	   if (my_extra == NULL)
+ *	   {
+ *		   my_extra = FN_EXTRA_ALLOC(sizeof(MyExtra));
+ *		   my_extra->type_name = NULL;
+ *	   }
+ *
+ *	   if (my_extra->type_name == NULL ||
+ *		   strcmp(my_extra->type_name, type_name) != 0)
+ *	   {
+ *		   my_extra->type_name = MemoryContextStrdup(FN_MCXT(), type_name);
+ *		   my_extra->type_id   = TypenameGetTypid(my_extra->type_name);
+ *	   }
+ */
+#define FN_EXTRA() (fcinfo->flinfo->fn_extra)
+#define FN_EXTRA_ALLOC(size) \
+	(fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, size))
+
+/*
+ * Data allocated inside of FN_EXTRA() should be allocated into FN_MCXT()
+ * so it is preserved across calls
+ */
+#define FN_MCXT() (fcinfo->flinfo->fn_mcxt)
 
 /*
  * !!! OLD INTERFACE !!!
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 389be5c..2500b81 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -420,6 +420,9 @@ extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
 extern char *pg_client_to_server(const char *s, int len);
 extern char *pg_server_to_client(const char *s, int len);
 
+extern char *server_to_utf8(const char *s, int len);
+extern char *utf8_to_server(const char *s, int len);
+
 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 
@@ -466,4 +469,12 @@ extern bool pg_utf8_islegal(const unsigned char *source, int length);
 extern WCHAR *pgwin32_toUTF16(const char *str, int len, int *utf16len);
 #endif
 
+void pg_substring(const char *src, int srcbytes,
+			 int start, int length,
+			 const char **out_start, int *out_bytes, int *out_length);
+void pg_encoding_substring(int encoding,
+					  const char *src, int srcbytes,
+					  int start, int length,
+					const char **out_start, int *out_bytes, int *out_length);
+
 #endif   /* PG_WCHAR_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index a4c6180..427fbb3 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -17,6 +17,12 @@
 #include "fmgr.h"
 #include "nodes/parsenodes.h"
 
+typedef struct
+{
+	int			index;
+	const char *label;
+}	EnumLabel;
+
 /*
  *		Defined in adt/
  */
@@ -163,6 +169,7 @@ extern Datum enum_first(PG_FUNCTION_ARGS);
 extern Datum enum_last(PG_FUNCTION_ARGS);
 extern Datum enum_range_bounds(PG_FUNCTION_ARGS);
 extern Datum enum_range_all(PG_FUNCTION_ARGS);
+void		getEnumLabelOids(const char *typname, EnumLabel labels[], Oid oid_out[], int count);
 
 /* int.c */
 extern Datum int2in(PG_FUNCTION_ARGS);
@@ -675,6 +682,9 @@ extern text *cstring_to_text(const char *s);
 extern text *cstring_to_text_with_len(const char *s, int len);
 extern char *text_to_cstring(const text *t);
 extern void text_to_cstring_buffer(const text *src, char *dst, size_t dst_len);
+extern char *text_to_utf8_cstring(const text *t);
+extern text *utf8_cstring_to_text(const char *s);
+extern text *utf8_cstring_to_text_with_len(const char *s, int len);
 
 #define CStringGetTextDatum(s) PointerGetDatum(cstring_to_text(s))
 #define TextDatumGetCString(d) text_to_cstring((text *) DatumGetPointer(d))
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 066ad76..7ec3e35 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -15,6 +15,7 @@
 
 #include "access/attnum.h"
 #include "access/htup.h"
+#include "fmgr.h"
 #include "nodes/pg_list.h"
 
 /* I/O function selector for get_type_io_data */
@@ -26,6 +27,24 @@ typedef enum IOFuncSelector
 	IOFunc_send
 } IOFuncSelector;
 
+typedef struct TypeInfo
+{
+	Oid			type;
+	IOFuncSelector which_func;
+	MemoryContext mcxt;
+
+	int16		typlen;
+	bool		typbyval;
+	char		typalign;
+	char		typdelim;
+	Oid			typioparam;
+	Oid			typiofunc;
+	FmgrInfo	proc;
+
+	char		typcategory;
+	bool		typispreferred;
+} TypeInfo;
+
 /* Hook for plugins to get control in get_attavgwidth() */
 typedef int32 (*get_attavgwidth_hook_type) (Oid relid, AttrNumber attnum);
 extern PGDLLIMPORT get_attavgwidth_hook_type get_attavgwidth_hook;
@@ -106,6 +125,8 @@ extern void get_type_io_data(Oid typid,
 				 char *typdelim,
 				 Oid *typioparam,
 				 Oid *func);
+void getTypeInfo(TypeInfo *d, Oid type, IOFuncSelector which_func,
+			MemoryContext mcxt);
 extern char get_typstorage(Oid typid);
 extern Node *get_typdefault(Oid typid);
 extern char get_typtype(Oid typid);