http://git-wip-us.apache.org/repos/asf/couchdb/blob/e62a4fc1/apps/couch_collate/platform/osx/icu/unicode/umsg.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/umsg.h b/apps/couch_collate/platform/osx/icu/unicode/umsg.h new file mode 100644 index 0000000..32ed063 --- /dev/null +++ b/apps/couch_collate/platform/osx/icu/unicode/umsg.h @@ -0,0 +1,647 @@ +/* +******************************************************************************* +* Copyright (C) 1996-2006, International Business Machines Corporation +* and others. All Rights Reserved. +******************************************************************************* +* +* file name: umsg.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Change history: +* +* 08/5/2001 Ram Added C wrappers for C++ API. +* +* +*/ + +#ifndef UMSG_H +#define UMSG_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uloc.h" +#include "unicode/parseerr.h" +#include <stdarg.h> +/** + * \file + * \brief C API: MessageFormat + * + * <h2>Message Format C API </h2> + * + * Provides means to produce concatenated messages in language-neutral way. + * Use this for all concatenations that show up to end users. + * <P> + * Takes a set of objects, formats them, then inserts the formatted + * strings into the pattern at the appropriate places. + * <P> + * Here are some examples of usage: + * Example 1: + * <pre> + * \code + * UChar *result, *tzID, *str; + * UChar pattern[100]; + * int32_t resultLengthOut, resultlength; + * UCalendar *cal; + * UDate d1; + * UDateFormat *def1; + * UErrorCode status = U_ZERO_ERROR; + * + * str=(UChar*)malloc(sizeof(UChar) * (strlen("disturbance in force") +1)); + * u_uastrcpy(str, "disturbance in force"); + * tzID=(UChar*)malloc(sizeof(UChar) * 4); + * u_uastrcpy(tzID, "PST"); + * cal=ucal_open(tzID, u_strlen(tzID), "en_US", UCAL_TRADITIONAL, &status); + * ucal_setDateTime(cal, 1999, UCAL_MARCH, 18, 0, 0, 0, &status); + * d1=ucal_getMillis(cal, &status); + * u_uastrcpy(pattern, "On {0, date, long}, there was a {1} on planet {2,number,integer}"); + * resultlength=0; + * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, d1, str, 7); + * if(status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * resultlength=resultLengthOut+1; + * result=(UChar*)realloc(result, sizeof(UChar) * resultlength); + * u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, d1, str, 7); + * } + * printf("%s\n", austrdup(result) );//austrdup( a function used to convert UChar* to char*) + * //output>: "On March 18, 1999, there was a disturbance in force on planet 7 + * \endcode + * </pre> + * Typically, the message format will come from resources, and the + * arguments will be dynamically set at runtime. + * <P> + * Example 2: + * <pre> + * \code + * UChar* str; + * UErrorCode status = U_ZERO_ERROR; + * UChar *result; + * UChar pattern[100]; + * int32_t resultlength, resultLengthOut, i; + * double testArgs= { 100.0, 1.0, 0.0}; + * + * str=(UChar*)malloc(sizeof(UChar) * 10); + * u_uastrcpy(str, "MyDisk"); + * u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}"); + * for(i=0; i<3; i++){ + * resultlength=0; + * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str); + * if(status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * resultlength=resultLengthOut+1; + * result=(UChar*)malloc(sizeof(UChar) * resultlength); + * u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str); + * } + * printf("%s\n", austrdup(result) ); //austrdup( a function used to convert UChar* to char*) + * free(result); + * } + * // output, with different testArgs: + * // output: The disk "MyDisk" contains 100 files. + * // output: The disk "MyDisk" contains one file. + * // output: The disk "MyDisk" contains no files. + * \endcode + * </pre> + * + * The pattern is of the following form. Legend: + * <pre> + * \code + * {optional item} + * (group that may be repeated)* + * \endcode + * </pre> + * Do not confuse optional items with items inside quotes braces, such + * as this: "{". Quoted braces are literals. + * <pre> + * \code + * messageFormatPattern := string ( "{" messageFormatElement "}" string )* + * + * messageFormatElement := argument { "," elementFormat } + * + * elementFormat := "time" { "," datetimeStyle } + * | "date" { "," datetimeStyle } + * | "number" { "," numberStyle } + * | "choice" "," choiceStyle + * + * datetimeStyle := "short" + * | "medium" + * | "long" + * | "full" + * | dateFormatPattern + * + * numberStyle := "currency" + * | "percent" + * | "integer" + * | numberFormatPattern + * + * choiceStyle := choiceFormatPattern + * \endcode + * </pre> + * If there is no elementFormat, then the argument must be a string, + * which is substituted. If there is no dateTimeStyle or numberStyle, + * then the default format is used (e.g. NumberFormat.getInstance(), + * DateFormat.getDefaultTime() or DateFormat.getDefaultDate(). For + * a ChoiceFormat, the pattern must always be specified, since there + * is no default. + * <P> + * In strings, single quotes can be used to quote the "{" sign if + * necessary. A real single quote is represented by ''. Inside a + * messageFormatElement, quotes are [not] removed. For example, + * {1,number,$'#',##} will produce a number format with the pound-sign + * quoted, with a result such as: "$#31,45". + * <P> + * If a pattern is used, then unquoted braces in the pattern, if any, + * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab + * {0'}' de" and "ab } de" are not. + * <p> + * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message + * format patterns unfortunately have shown to be somewhat confusing. + * In particular, it isn't always obvious to localizers whether single + * quotes need to be doubled or not. Make sure to inform localizers about + * the rules, and tell them (for example, by using comments in resource + * bundle source files) which strings will be processed by MessageFormat. + * Note that localizers may need to use single quotes in translated + * strings where the original version doesn't have them. + * <br>Note also that the simplest way to avoid the problem is to + * use the real apostrophe (single quote) character U+2019 (') for + * human-readable text, and to use the ASCII apostrophe (U+0027 ' ) + * only in program syntax, like quoting in MessageFormat. + * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p> + * </dl> + * <P> + * The argument is a number from 0 to 9, which corresponds to the + * arguments presented in an array to be formatted. + * <P> + * It is ok to have unused arguments in the array. With missing + * arguments or arguments that are not of the right class for the + * specified format, a failing UErrorCode result is set. + * <P> + + * <P> + * [Note:] As we see above, the string produced by a choice Format in + * MessageFormat is treated specially; occurances of '{' are used to + * indicated subformats. + * <P> + * [Note:] Formats are numbered by order of variable in the string. + * This is [not] the same as the argument numbering! + * <pre> + * \code + * For example: with "abc{2}def{3}ghi{0}...", + * + * format0 affects the first variable {2} + * format1 affects the second variable {3} + * format2 affects the second variable {0} + * \endcode + * </pre> + * and so on. + */ + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments specified + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_formatMessage(const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param ap A variable-length argument list containing the arguments specified + * @param status A pointer to an UErrorCode to receive any errors + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_vformatMessage( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_parseMessage( const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_vparseMessage(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + va_list ap, + UErrorCode *status); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments specified + * in pattern. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_formatMessageWithError( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UParseError *parseError, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param ap A variable-length argument list containing the arguments specified + * @param status A pointer to an UErrorCode to receive any errors + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_vformatMessageWithError( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UParseError* parseError, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_parseMessageWithError(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + UParseError *parseError, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param ap A variable-length argument list containing the arguments + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_vparseMessageWithError(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + va_list ap, + UParseError *parseError, + UErrorCode* status); + +/*----------------------- New experimental API --------------------------- */ +/** + * The message format object + * @stable ICU 2.0 + */ +typedef void* UMessageFormat; + + +/** + * Open a message formatter with given pattern and for the given locale. + * @param pattern A pattern specifying the format to use. + * @param patternLength Length of the pattern to use + * @param locale The locale for which the messages are formatted. + * @param parseError A pointer to UParseError struct to receive any errors + * occured during parsing. Can be NULL. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UMessageFormat to use for formatting + * messages, or 0 if an error occurred. + * @stable ICU 2.0 + */ +U_STABLE UMessageFormat* U_EXPORT2 +umsg_open( const UChar *pattern, + int32_t patternLength, + const char *locale, + UParseError *parseError, + UErrorCode *status); + +/** + * Close a UMessageFormat. + * Once closed, a UMessageFormat may no longer be used. + * @param format The formatter to close. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_close(UMessageFormat* format); + +/** + * Open a copy of a UMessageFormat. + * This function performs a deep copy. + * @param fmt The formatter to copy + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UDateFormat identical to fmt. + * @stable ICU 2.0 + */ +U_STABLE UMessageFormat U_EXPORT2 +umsg_clone(const UMessageFormat *fmt, + UErrorCode *status); + +/** + * Sets the locale. This locale is used for fetching default number or date + * format information. + * @param fmt The formatter to set + * @param locale The locale the formatter should use. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_setLocale(UMessageFormat *fmt, + const char* locale); + +/** + * Gets the locale. This locale is used for fetching default number or date + * format information. + * @param fmt The formatter to querry + * @return the locale. + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +umsg_getLocale(const UMessageFormat *fmt); + +/** + * Sets the pattern. + * @param fmt The formatter to use + * @param pattern The pattern to be applied. + * @param patternLength Length of the pattern to use + * @param parseError Struct to receive information on position + * of error if an error is encountered.Can be NULL. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_applyPattern( UMessageFormat *fmt, + const UChar* pattern, + int32_t patternLength, + UParseError* parseError, + UErrorCode* status); + +/** + * Gets the pattern. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the pattern. + * @param resultLength The maximum size of result. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @return the pattern of the format + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_toPattern(const UMessageFormat *fmt, + UChar* result, + int32_t resultLength, + UErrorCode* status); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_format( const UMessageFormat *fmt, + UChar *result, + int32_t resultLength, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_vformat( const UMessageFormat *fmt, + UChar *result, + int32_t resultLength, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #umsg_format }. + * @param fmt The formatter to use + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param count Output param to receive number of elements returned. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_parse( const UMessageFormat *fmt, + const UChar *source, + int32_t sourceLength, + int32_t *count, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #umsg_format }. + * @param fmt The formatter to use + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param count Output param to receive number of elements returned. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_vparse(const UMessageFormat *fmt, + const UChar *source, + int32_t sourceLength, + int32_t *count, + va_list ap, + UErrorCode *status); + + +/** + * Convert an 'apostrophe-friendly' pattern into a standard + * pattern. Standard patterns treat all apostrophes as + * quotes, which is problematic in some languages, e.g. + * French, where apostrophe is commonly used. This utility + * assumes that only an unpaired apostrophe immediately before + * a brace is a true quote. Other unpaired apostrophes are paired, + * and the resulting standard pattern string is returned. + * + * <p><b>Note</b> it is not guaranteed that the returned pattern + * is indeed a valid pattern. The only effect is to convert + * between patterns having different quoting semantics. + * + * @param pattern the 'apostrophe-friendly' patttern to convert + * @param patternLength the length of pattern, or -1 if unknown and pattern is null-terminated + * @param dest the buffer for the result, or NULL if preflight only + * @param destCapacity the length of the buffer, or 0 if preflighting + * @param ec the error code + * @return the length of the resulting text, not including trailing null + * if buffer has room for the trailing null, it is provided, otherwise + * not + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +umsg_autoQuoteApostrophe(const UChar* pattern, + int32_t patternLength, + UChar* dest, + int32_t destCapacity, + UErrorCode* ec); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif
http://git-wip-us.apache.org/repos/asf/couchdb/blob/e62a4fc1/apps/couch_collate/platform/osx/icu/unicode/unifilt.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/unifilt.h b/apps/couch_collate/platform/osx/icu/unicode/unifilt.h new file mode 100644 index 0000000..5bf1ba4 --- /dev/null +++ b/apps/couch_collate/platform/osx/icu/unicode/unifilt.h @@ -0,0 +1,127 @@ +/* +********************************************************************** +* Copyright (C) 1999-2006, International Business Machines Corporation and others. +* All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. +********************************************************************** +*/ +#ifndef UNIFILT_H +#define UNIFILT_H + +#include "unicode/unifunct.h" +#include "unicode/unimatch.h" + +/** + * \file + * \brief C++ API: Unicode Filter + */ + +U_NAMESPACE_BEGIN + +/** + * U_ETHER is used to represent character values for positions outside + * a range. For example, transliterator uses this to represent + * characters outside the range contextStart..contextLimit-1. This + * allows explicit matching by rules and UnicodeSets of text outside a + * defined range. + * @stable ICU 3.0 + */ +#define U_ETHER ((UChar)0xFFFF) + +/** + * + * <code>UnicodeFilter</code> defines a protocol for selecting a + * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. + * Currently, filters are used in conjunction with classes like {@link + * Transliterator} to only process selected characters through a + * transformation. + * + * <p>Note: UnicodeFilter currently stubs out two pure virtual methods + * of its base class, UnicodeMatcher. These methods are toPattern() + * and matchesIndexValue(). This is done so that filter classes that + * are not actually used as matchers -- specifically, those in the + * UnicodeFilterLogic component, and those in tests -- can continue to + * work without defining these methods. As long as a filter is not + * used in an RBT during real transliteration, these methods will not + * be called. However, this breaks the UnicodeMatcher base class + * protocol, and it is not a correct solution. + * + * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter + * hierarchy and either redesign it, or simply remove the stubs in + * UnicodeFilter and force subclasses to implement the full + * UnicodeMatcher protocol. + * + * @see UnicodeFilterLogic + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { + +public: + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~UnicodeFilter(); + + /** + * Returns <tt>true</tt> for characters that are in the selected + * subset. In other words, if a character is <b>to be + * filtered</b>, then <tt>contains()</tt> returns + * <b><tt>false</tt></b>. + * @stable ICU 2.0 + */ + virtual UBool contains(UChar32 c) const = 0; + + /** + * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer + * and return the pointer. + * @stable ICU 2.4 + */ + virtual UnicodeMatcher* toMatcher() const; + + /** + * Implement UnicodeMatcher API. + * @stable ICU 2.4 + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental); + + /** + * UnicodeFunctor API. Nothing to do. + * @stable ICU 2.4 + */ + virtual void setData(const TransliterationRuleData*); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const = 0; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +protected: + + /* + * Since this class has pure virtual functions, + * a constructor can't be used. + * @stable ICU 2.0 + */ +/* UnicodeFilter();*/ +}; + +/*inline UnicodeFilter::UnicodeFilter() {}*/ + +U_NAMESPACE_END + +#endif http://git-wip-us.apache.org/repos/asf/couchdb/blob/e62a4fc1/apps/couch_collate/platform/osx/icu/unicode/unifunct.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/unifunct.h b/apps/couch_collate/platform/osx/icu/unicode/unifunct.h new file mode 100644 index 0000000..3aa7b03 --- /dev/null +++ b/apps/couch_collate/platform/osx/icu/unicode/unifunct.h @@ -0,0 +1,125 @@ +/* +********************************************************************** +* Copyright (c) 2002-2005, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 01/14/2002 aliu Creation. +********************************************************************** +*/ +#ifndef UNIFUNCT_H +#define UNIFUNCT_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Unicode Functor + */ + +U_NAMESPACE_BEGIN + +class UnicodeMatcher; +class UnicodeReplacer; +class TransliterationRuleData; + +/** + * <code>UnicodeFunctor</code> is an abstract base class for objects + * that perform match and/or replace operations on Unicode strings. + * @author Alan Liu + * @stable ICU 2.4 + */ +class U_COMMON_API UnicodeFunctor : public UObject { + +public: + + /** + * Destructor + * @stable ICU 2.4 + */ + virtual ~UnicodeFunctor(); + + /** + * Return a copy of this object. All UnicodeFunctor objects + * have to support cloning in order to allow classes using + * UnicodeFunctor to implement cloning. + * @stable ICU 2.4 + */ + virtual UnicodeFunctor* clone() const = 0; + + /** + * Cast 'this' to a UnicodeMatcher* pointer and return the + * pointer, or null if this is not a UnicodeMatcher*. Subclasses + * that mix in UnicodeMatcher as a base class must override this. + * This protocol is required because a pointer to a UnicodeFunctor + * cannot be cast to a pointer to a UnicodeMatcher, since + * UnicodeMatcher is a mixin that does not derive from + * UnicodeFunctor. + * @stable ICU 2.4 + */ + virtual UnicodeMatcher* toMatcher() const; + + /** + * Cast 'this' to a UnicodeReplacer* pointer and return the + * pointer, or null if this is not a UnicodeReplacer*. Subclasses + * that mix in UnicodeReplacer as a base class must override this. + * This protocol is required because a pointer to a UnicodeFunctor + * cannot be cast to a pointer to a UnicodeReplacer, since + * UnicodeReplacer is a mixin that does not derive from + * UnicodeFunctor. + * @stable ICU 2.4 + */ + virtual UnicodeReplacer* toReplacer() const; + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID <b>polymorphically</b>. This method + * is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * <p>Concrete subclasses of UnicodeFunctor should use the macro + * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to + * provide definitios getStaticClassID and getDynamicClassID. + * + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 2.4 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Set the data object associated with this functor. The data + * object provides context for functor-to-standin mapping. This + * method is required when assigning a functor to a different data + * object. This function MAY GO AWAY later if the architecture is + * changed to pass data object pointers through the API. + * @internal ICU 2.1 + */ + virtual void setData(const TransliterationRuleData*) = 0; + +protected: + + /** + * Since this class has pure virtual functions, + * a constructor can't be used. + * @stable ICU 2.0 + */ + /*UnicodeFunctor();*/ + +}; + +/*inline UnicodeFunctor::UnicodeFunctor() {}*/ + +U_NAMESPACE_END + +#endif http://git-wip-us.apache.org/repos/asf/couchdb/blob/e62a4fc1/apps/couch_collate/platform/osx/icu/unicode/unimatch.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/unimatch.h b/apps/couch_collate/platform/osx/icu/unicode/unimatch.h new file mode 100644 index 0000000..0dbb14e --- /dev/null +++ b/apps/couch_collate/platform/osx/icu/unicode/unimatch.h @@ -0,0 +1,163 @@ +/* +* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/18/01 aliu Creation. +********************************************************************** +*/ +#ifndef UNIMATCH_H +#define UNIMATCH_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Unicode Matcher + */ + + +U_NAMESPACE_BEGIN + +class Replaceable; +class UnicodeString; +class UnicodeSet; + +/** + * Constants returned by <code>UnicodeMatcher::matches()</code> + * indicating the degree of match. + * @stable ICU 2.4 + */ +enum UMatchDegree { + /** + * Constant returned by <code>matches()</code> indicating a + * mismatch between the text and this matcher. The text contains + * a character which does not match, or the text does not contain + * all desired characters for a non-incremental match. + * @stable ICU 2.4 + */ + U_MISMATCH, + + /** + * Constant returned by <code>matches()</code> indicating a + * partial match between the text and this matcher. This value is + * only returned for incremental match operations. All characters + * of the text match, but more characters are required for a + * complete match. Alternatively, for variable-length matchers, + * all characters of the text match, and if more characters were + * supplied at limit, they might also match. + * @stable ICU 2.4 + */ + U_PARTIAL_MATCH, + + /** + * Constant returned by <code>matches()</code> indicating a + * complete match between the text and this matcher. For an + * incremental variable-length match, this value is returned if + * the given text matches, and it is known that additional + * characters would not alter the extent of the match. + * @stable ICU 2.4 + */ + U_MATCH +}; + +/** + * <code>UnicodeMatcher</code> defines a protocol for objects that can + * match a range of characters in a Replaceable string. + * @stable ICU 2.4 + */ +class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { + +public: + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~UnicodeMatcher(); + + /** + * Return a UMatchDegree value indicating the degree of match for + * the given text at the given offset. Zero, one, or more + * characters may be matched. + * + * Matching in the forward direction is indicated by limit > + * offset. Characters from offset forwards to limit-1 will be + * considered for matching. + * + * Matching in the reverse direction is indicated by limit < + * offset. Characters from offset backwards to limit+1 will be + * considered for matching. + * + * If limit == offset then the only match possible is a zero + * character match (which subclasses may implement if desired). + * + * As a side effect, advance the offset parameter to the limit of + * the matched substring. In the forward direction, this will be + * the index of the last matched character plus one. In the + * reverse direction, this will be the index of the last matched + * character minus one. + * + * <p>Note: This method is not const because some classes may + * modify their state as the result of a match. + * + * @param text the text to be matched + * @param offset on input, the index into text at which to begin + * matching. On output, the limit of the matched text. The + * number of matched characters is the output value of offset + * minus the input value. Offset should always point to the + * HIGH SURROGATE (leading code unit) of a pair of surrogates, + * both on entry and upon return. + * @param limit the limit index of text to be matched. Greater + * than offset for a forward direction match, less than offset for + * a backward direction match. The last character to be + * considered for matching will be text.charAt(limit-1) in the + * forward direction or text.charAt(limit+1) in the backward + * direction. + * @param incremental if TRUE, then assume further characters may + * be inserted at limit and check for partial matching. Otherwise + * assume the text as given is complete. + * @return a match degree value indicating a full match, a partial + * match, or a mismatch. If incremental is FALSE then + * U_PARTIAL_MATCH should never be returned. + * @stable ICU 2.4 + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) = 0; + + /** + * Returns a string representation of this matcher. If the result of + * calling this function is passed to the appropriate parser, it + * will produce another matcher that is equal to this one. + * @param result the string to receive the pattern. Previous + * contents will be deleted. + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @stable ICU 2.4 + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = FALSE) const = 0; + + /** + * Returns TRUE if this matcher will match a character c, where c + * & 0xFF == v, at offset, in the forward direction (with limit > + * offset). This is used by <tt>RuleBasedTransliterator</tt> for + * indexing. + * @stable ICU 2.4 + */ + virtual UBool matchesIndexValue(uint8_t v) const = 0; + + /** + * Union the set of all characters that may be matched by this object + * into the given set. + * @param toUnionTo the set into which to union the source characters + * @stable ICU 2.4 + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; +}; + +U_NAMESPACE_END + +#endif http://git-wip-us.apache.org/repos/asf/couchdb/blob/e62a4fc1/apps/couch_collate/platform/osx/icu/unicode/unirepl.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/unirepl.h b/apps/couch_collate/platform/osx/icu/unicode/unirepl.h new file mode 100644 index 0000000..6b7746b --- /dev/null +++ b/apps/couch_collate/platform/osx/icu/unicode/unirepl.h @@ -0,0 +1,97 @@ +/* +********************************************************************** +* Copyright (c) 2002-2005, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 01/14/2002 aliu Creation. +********************************************************************** +*/ +#ifndef UNIREPL_H +#define UNIREPL_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: UnicodeReplacer + */ + +U_NAMESPACE_BEGIN + +class Replaceable; +class UnicodeString; +class UnicodeSet; + +/** + * <code>UnicodeReplacer</code> defines a protocol for objects that + * replace a range of characters in a Replaceable string with output + * text. The replacement is done via the Replaceable API so as to + * preserve out-of-band data. + * + * <p>This is a mixin class. + * @author Alan Liu + * @stable ICU 2.4 + */ +class U_I18N_API UnicodeReplacer /* not : public UObject because this is an interface/mixin class */ { + + public: + + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~UnicodeReplacer(); + + /** + * Replace characters in 'text' from 'start' to 'limit' with the + * output text of this object. Update the 'cursor' parameter to + * give the cursor position and return the length of the + * replacement text. + * + * @param text the text to be matched + * @param start inclusive start index of text to be replaced + * @param limit exclusive end index of text to be replaced; + * must be greater than or equal to start + * @param cursor output parameter for the cursor position. + * Not all replacer objects will update this, but in a complete + * tree of replacer objects, representing the entire output side + * of a transliteration rule, at least one must update it. + * @return the number of 16-bit code units in the text replacing + * the characters at offsets start..(limit-1) in text + * @stable ICU 2.4 + */ + virtual int32_t replace(Replaceable& text, + int32_t start, + int32_t limit, + int32_t& cursor) = 0; + + /** + * Returns a string representation of this replacer. If the + * result of calling this function is passed to the appropriate + * parser, typically TransliteratorParser, it will produce another + * replacer that is equal to this one. + * @param result the string to receive the pattern. Previous + * contents will be deleted. + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are defined by + * Utility.isUnprintable(). + * @return a reference to 'result'. + * @stable ICU 2.4 + */ + virtual UnicodeString& toReplacerPattern(UnicodeString& result, + UBool escapeUnprintable) const = 0; + + /** + * Union the set of all characters that may output by this object + * into the given set. + * @param toUnionTo the set into which to union the output characters + * @stable ICU 2.4 + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0; +}; + +U_NAMESPACE_END + +#endif
