Date: Friday, May 18, 2012 @ 05:15:06 Author: andyrtr Revision: 159210
upgpkg: icu 49.1.1-2 fix broken regex; FS#29700 Added: icu/trunk/fix_broken_regex.diff Modified: icu/trunk/PKGBUILD -----------------------+ PKGBUILD | 10 +++++++--- fix_broken_regex.diff | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) Modified: PKGBUILD =================================================================== --- PKGBUILD 2012-05-18 01:02:05 UTC (rev 159209) +++ PKGBUILD 2012-05-18 09:15:06 UTC (rev 159210) @@ -4,7 +4,7 @@ pkgname=icu pkgver=49.1.1 -pkgrel=1 +pkgrel=2 pkgdesc="International Components for Unicode library" arch=(i686 x86_64) url="http://www.icu-project.org/" @@ -12,14 +12,18 @@ depends=('gcc-libs' 'sh') source=(#http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver/./_}-src.tgz http://download.icu-project.org/files/${pkgname}4c/${pkgver}/${pkgname}4c-${pkgver//./_}-src.tgz - icu.8198.revert.icu5431.patch) + icu.8198.revert.icu5431.patch + fix_broken_regex.diff) md5sums=('7c53f83e0327343f4060c0eb83842daf' - 'ebd5470fc969c75e52baf4af94a9ee82') + 'ebd5470fc969c75e52baf4af94a9ee82' + '5bbcd600fdf9b35cbd89a06cab522f3f') build() { cd ${srcdir}/icu/source # fix Malayalam encoding https://bugzilla.redhat.com/show_bug.cgi?id=654200 patch -Rp3 -i ${srcdir}/icu.8198.revert.icu5431.patch + # patch broken regex - https://bugs.archlinux.org/task/29700 / http://bugs.icu-project.org/trac/ticket/9276 + patch -Np0 -i ${srcdir}/fix_broken_regex.diff ./configure --prefix=/usr \ --sysconfdir=/etc \ --mandir=/usr/share/man Added: fix_broken_regex.diff =================================================================== --- fix_broken_regex.diff (rev 0) +++ fix_broken_regex.diff 2012-05-18 09:15:06 UTC (rev 159210) @@ -0,0 +1,35 @@ +--- i18n/regexcmp.cpp ++++ i18n/regexcmp.cpp +@@ -3307,8 +3307,29 @@ + + case URX_STRING_I: +- // TODO: Is the case-folded string the longest? +- // If so we can optimize this the same as URX_STRING. +- loc++; +- currentLen = INT32_MAX; ++ // TODO: This code assumes that any user string that matches will be no longer ++ // than our compiled string, with case insensitive matching. ++ // Our compiled string has been case-folded already. ++ // ++ // Any matching user string will have no more code points than our ++ // compiled (folded) string. Folding may add code points, but ++ // not remove them. ++ // ++ // There is a potential problem if a supplemental code point ++ // case-folds to a BMP code point. In this case our compiled string ++ // could be shorter (in code units) than a matching user string. ++ // ++ // At this time (Unicode 6.1) there are no such characters, and this case ++ // is not being handled. A test, intltest regex/Bug9283, will fail if ++ // any problematic characters are added to Unicode. ++ // ++ // If this happens, we can make a set of the BMP chars that the ++ // troublesome supplementals fold to, scan our string, and bump the ++ // currentLen one extra for each that is found. ++ // ++ { ++ loc++; ++ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); ++ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp)); ++ } + break;
