This is an automated email from the git hooks/post-receive script. plessy pushed a commit to branch debian/unstable in repository libgtextutils.
commit 92c38b649ec15120deaff77ee6f39b17f2e28d7b Author: A. Gordon <[email protected]> Date: Tue Mar 24 20:34:46 2009 -0400 Added Natural-Sort predicates. --- src/gtextutils/Makefile.am | 16 ++-- src/gtextutils/natsort.h | 110 ++++++++++++++++++++++++++++ src/gtextutils/strnatcmp.c | 178 +++++++++++++++++++++++++++++++++++++++++++++ src/gtextutils/strnatcmp.h | 31 ++++++++ 4 files changed, 329 insertions(+), 6 deletions(-) diff --git a/src/gtextutils/Makefile.am b/src/gtextutils/Makefile.am index f99ba43..c7ac53b 100644 --- a/src/gtextutils/Makefile.am +++ b/src/gtextutils/Makefile.am @@ -9,14 +9,18 @@ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -lib_LIBRARIES = libgtextutils-0.1.a +lib_LIBRARIES = libgtextutils-0.2.a -libgtextutils_0_1_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \ +libgtextutils_0_2_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \ text_line_reader.cpp text_line_reader.h \ - print_utils.h + container_join.h \ + natsort.h \ + strnatcmp.c strnatcmp.h -libgtextutils_0_1_a_includedir = $(includedir)/gtextutils-0.1/gtextutils +libgtextutils_0_2_a_includedir = $(includedir)/gtextutils-0.2/gtextutils -libgtextutils_0_1_a_include_HEADERS = print_utils.h \ +libgtextutils_0_2_a_include_HEADERS = container_join.h \ text_line_reader.h \ - stream_wrapper.h + stream_wrapper.h \ + natsort.h \ + strnatcmp.h diff --git a/src/gtextutils/natsort.h b/src/gtextutils/natsort.h new file mode 100644 index 0000000..a3520ef --- /dev/null +++ b/src/gtextutils/natsort.h @@ -0,0 +1,110 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon ([email protected]) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/> +*/ +#ifndef __NATURAL_SORT_STL_H__ +#define __NATURAL_SORT_STL_H__ + +/* + * natsort.h - + * STL-compatible interface to Martin Pool's Natural-Order sorting routines. + * + * see http://sourcefrog.net/projects/natsort/ for more details + * + * Note 1: + * There's a boost equivalent version of 'composable natural sort' + * at http://www.boostcookbook.com/Recipe:/1235053 + * But it requires the boost library (including the regex engine), + * and I prefer not to use it for now. + * + * Note 2: + * As of FSF GNU Coreutils version 7.1, the 'sort' progarm as a similar sorting order + * called 'version' (with the -V command argument). + * Coreutils's implementation is found in <coreutils-7.1>/src/filevercmp.{ch}. + * The results are similar to Matrin Poo's NatSort, but not identical + * if the sorted strings are more complex than <prefixNUM>. + */ + +/* +Usage: + // sort an array in natural order + vector<string> v; + v.push_back("chr20"); + v.push_back("chr10"); + v.push_back("chr5"); + v.push_back("chr1"); + v.push_back("chr2"); + + // "regular" sort + sort(v.begin(), v.end() ); + // order will be: + // chr1 + // chr10 + // chr2 + // chr20 + // chr5 + + // "natural order" sort + sort(v.begin(), v.end(), natural_sort_predicate() ); + // order will be: + // chr1 + // chr2 + // chr5 + // chr10 + // chr20 +*/ + + +extern "C" { +/* + strnatcmp.{h,c} -- Perform 'natural order' comparisons of strings in C. + Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net> +*/ +#include "strnatcmp.h" +} + +#include <string> + +struct natural_sort_predicate : public std::binary_function<std::string, std::string, bool> +{ + bool operator() ( const std::string& s1, const std::string& s2 ) + { + return strnatcmp(s1.c_str(), s2.c_str()) < 0 ; + } +}; + +struct natural_sort_ignore_case_predicate : public std::binary_function<std::string, std::string, bool> +{ + bool operator() ( const std::string& s1, const std::string& s2 ) + { + return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ; + } +}; + +/* +inline bool natural_sort_predicate(const std::string& s1, const std::string& s2) +{ + return strnatcmp(s1.c_str(), s2.c_str()) < 0 ; +} + +inline bool natural_sort_ignore_case_predicate(const std::string& s1, const std::string& s2) +{ + return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ; +}*/ + + +#endif + diff --git a/src/gtextutils/strnatcmp.c b/src/gtextutils/strnatcmp.c new file mode 100644 index 0000000..74cbb61 --- /dev/null +++ b/src/gtextutils/strnatcmp.c @@ -0,0 +1,178 @@ +/* -*- mode: c; c-file-style: "k&r" -*- + + strnatcmp.c -- Perform 'natural order' comparisons of strings in C. + Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net> + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + + +/* partial change history: + * + * 2004-10-10 mbp: Lift out character type dependencies into macros. + * + * Eric Sosman pointed out that ctype functions take a parameter whose + * value must be that of an unsigned int, even on platforms that have + * negative chars in their default char type. + */ + +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> + +#include "strnatcmp.h" + + +/* These are defined as macros to make it easier to adapt this code to + * different characters types or comparison functions. */ +static inline int +nat_isdigit(nat_char a) +{ + return isdigit((unsigned char) a); +} + + +static inline int +nat_isspace(nat_char a) +{ + return isspace((unsigned char) a); +} + + +static inline nat_char +nat_toupper(nat_char a) +{ + return toupper((unsigned char) a); +} + + + +static int +compare_right(nat_char const *a, nat_char const *b) +{ + int bias = 0; + + /* The longest run of digits wins. That aside, the greatest + value wins, but we can't know that it will until we've scanned + both numbers to know that they have the same magnitude, so we + remember it in BIAS. */ + for (;; a++, b++) { + if (!nat_isdigit(*a) && !nat_isdigit(*b)) + return bias; + else if (!nat_isdigit(*a)) + return -1; + else if (!nat_isdigit(*b)) + return +1; + else if (*a < *b) { + if (!bias) + bias = -1; + } else if (*a > *b) { + if (!bias) + bias = +1; + } else if (!*a && !*b) + return bias; + } + + return 0; +} + + +static int +compare_left(nat_char const *a, nat_char const *b) +{ + /* Compare two left-aligned numbers: the first to have a + different value wins. */ + for (;; a++, b++) { + if (!nat_isdigit(*a) && !nat_isdigit(*b)) + return 0; + else if (!nat_isdigit(*a)) + return -1; + else if (!nat_isdigit(*b)) + return +1; + else if (*a < *b) + return -1; + else if (*a > *b) + return +1; + } + + return 0; +} + + +static int strnatcmp0(nat_char const *a, nat_char const *b, int fold_case) +{ + int ai, bi; + nat_char ca, cb; + int fractional, result; + + assert(a && b); + ai = bi = 0; + while (1) { + ca = a[ai]; cb = b[bi]; + + /* skip over leading spaces or zeros */ + while (nat_isspace(ca)) + ca = a[++ai]; + + while (nat_isspace(cb)) + cb = b[++bi]; + + /* process run of digits */ + if (nat_isdigit(ca) && nat_isdigit(cb)) { + fractional = (ca == '0' || cb == '0'); + + if (fractional) { + if ((result = compare_left(a+ai, b+bi)) != 0) + return result; + } else { + if ((result = compare_right(a+ai, b+bi)) != 0) + return result; + } + } + + if (!ca && !cb) { + /* The strings compare the same. Perhaps the caller + will want to call strcmp to break the tie. */ + return 0; + } + + if (fold_case) { + ca = nat_toupper(ca); + cb = nat_toupper(cb); + } + + if (ca < cb) + return -1; + else if (ca > cb) + return +1; + + ++ai; ++bi; + } +} + + + +int strnatcmp(nat_char const *a, nat_char const *b) { + return strnatcmp0(a, b, 0); +} + + +/* Compare, recognizing numeric string and ignoring case. */ +int strnatcasecmp(nat_char const *a, nat_char const *b) { + return strnatcmp0(a, b, 1); +} diff --git a/src/gtextutils/strnatcmp.h b/src/gtextutils/strnatcmp.h new file mode 100644 index 0000000..51a3c4e --- /dev/null +++ b/src/gtextutils/strnatcmp.h @@ -0,0 +1,31 @@ +/* -*- mode: c; c-file-style: "k&r" -*- + + strnatcmp.c -- Perform 'natural order' comparisons of strings in C. + Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net> + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + + +/* CUSTOMIZATION SECTION + * + * You can change this typedef, but must then also change the inline + * functions in strnatcmp.c */ +typedef char nat_char; + +int strnatcmp(nat_char const *a, nat_char const *b); +int strnatcasecmp(nat_char const *a, nat_char const *b); -- Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/libgtextutils.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
