Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package ghc-utf8-string for openSUSE:Factory
checked in at 2021-01-20 18:25:02
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-utf8-string (Old)
and /work/SRC/openSUSE:Factory/.ghc-utf8-string.new.28504 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-utf8-string"
Wed Jan 20 18:25:02 2021 rev:15 rq:862335 version:1.0.2
Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-utf8-string/ghc-utf8-string.changes
2020-12-22 11:48:30.825961957 +0100
+++
/work/SRC/openSUSE:Factory/.ghc-utf8-string.new.28504/ghc-utf8-string.changes
2021-01-20 18:25:36.199413823 +0100
@@ -1,0 +2,10 @@
+Mon Jan 4 11:06:07 UTC 2021 - [email protected]
+
+- Update utf8-string to version 1.0.2.
+ 1.0.2
+ -----
+ * Add fromChar
+ * Add IsString UTF8 instance
+ * Fixup documentation and tests
+
+-------------------------------------------------------------------
Old:
----
utf8-string-1.0.1.1.tar.gz
utf8-string.cabal
New:
----
utf8-string-1.0.2.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ ghc-utf8-string.spec ++++++
--- /var/tmp/diff_new_pack.IlLqsQ/_old 2021-01-20 18:25:37.319414889 +0100
+++ /var/tmp/diff_new_pack.IlLqsQ/_new 2021-01-20 18:25:37.319414889 +0100
@@ -1,7 +1,7 @@
#
# spec file for package ghc-utf8-string
#
-# Copyright (c) 2020 SUSE LLC
+# Copyright (c) 2021 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -17,18 +17,21 @@
%global pkg_name utf8-string
+%bcond_with tests
Name: ghc-%{pkg_name}
-Version: 1.0.1.1
+Version: 1.0.2
Release: 0
Summary: Support for reading and writing UTF8 Strings
License: BSD-3-Clause
URL: https://hackage.haskell.org/package/%{pkg_name}
Source0:
https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz
-Source1:
https://hackage.haskell.org/package/%{pkg_name}-%{version}/revision/3.cabal#/%{pkg_name}.cabal
BuildRequires: ghc-Cabal-devel
BuildRequires: ghc-bytestring-devel
BuildRequires: ghc-rpm-macros
ExcludeArch: %{ix86}
+%if %{with tests}
+BuildRequires: ghc-HUnit-devel
+%endif
%description
A UTF8 layer for Strings. The utf8-string package provides operations for
@@ -47,7 +50,6 @@
%prep
%autosetup -n %{pkg_name}-%{version}
-cp -p %{SOURCE1} %{pkg_name}.cabal
%build
%ghc_lib_build
@@ -55,6 +57,9 @@
%install
%ghc_lib_install
+%check
+%cabal_test
+
%post devel
%ghc_pkg_recache
++++++ utf8-string-1.0.1.1.tar.gz -> utf8-string-1.0.2.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/CHANGELOG.markdown
new/utf8-string-1.0.2/CHANGELOG.markdown
--- old/utf8-string-1.0.1.1/CHANGELOG.markdown 2015-08-23 18:19:40.000000000
+0200
+++ new/utf8-string-1.0.2/CHANGELOG.markdown 2001-09-09 03:46:40.000000000
+0200
@@ -1,3 +1,9 @@
+1.0.2
+-----
+* Add fromChar
+* Add IsString UTF8 instance
+* Fixup documentation and tests
+
1.0.1.1
-----
* Build correctly on GHC-7.0 (#14)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/Codec/Binary/UTF8/Generic.hs
new/utf8-string-1.0.2/Codec/Binary/UTF8/Generic.hs
--- old/utf8-string-1.0.1.1/Codec/Binary/UTF8/Generic.hs 2015-08-23
18:19:40.000000000 +0200
+++ new/utf8-string-1.0.2/Codec/Binary/UTF8/Generic.hs 2001-09-09
03:46:40.000000000 +0200
@@ -94,7 +94,7 @@
fromString xs = pack (encode xs)
-- | Convert a UTF8 encoded bytestring into a Haskell string.
--- Invalid characters are replaced with '\xFFFD'.
+-- Invalid characters are replaced with @\'\\0xFFFD\'@.
{-# SPECIALIZE toString :: B.ByteString -> String #-}
{-# SPECIALIZE toString :: L.ByteString -> String #-}
{-# SPECIALIZE toString :: [Word8] -> String #-}
@@ -109,7 +109,7 @@
-- Returns 'Nothing' if there are no more bytes in the byte string.
-- Otherwise, it returns a decoded character and the number of
-- bytes used in its representation.
--- Errors are replaced by character '\0xFFFD'.
+-- Errors are replaced by character @\'\\0xFFFD\'@.
-- XXX: Should we combine sequences of errors into a single replacement
-- character?
@@ -195,7 +195,7 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
{-# SPECIALIZE span :: (Char -> Bool) -> B.ByteString ->
(B.ByteString,B.ByteString) #-}
{-# SPECIALIZE span :: (Char -> Bool) -> L.ByteString ->
(L.ByteString,L.ByteString) #-}
{-# SPECIALIZE span :: (Char -> Bool) -> [Word8] -> ([Word8],[Word8]) #-}
@@ -208,13 +208,13 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
{-# INLINE break #-}
break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b,b)
break p bs = span (not . p) bs
-- | Get the first character of a byte string, if any.
--- Malformed characters are replaced by '\0xFFFD'.
+-- Malformed characters are replaced by @\'\\0xFFFD\'@.
{-# INLINE uncons #-}
uncons :: UTF8Bytes b s => b -> Maybe (Char,b)
uncons bs = do (c,n) <- decode bs
@@ -252,9 +252,9 @@
Nothing -> n
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
--- See also 'lines\''.
+-- See also 'lines''.
{-# SPECIALIZE lines :: B.ByteString -> [B.ByteString] #-}
{-# SPECIALIZE lines :: L.ByteString -> [L.ByteString] #-}
{-# SPECIALIZE lines :: [Word8] -> [[Word8]] #-}
@@ -266,7 +266,7 @@
Nothing -> [bs]
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/Codec/Binary/UTF8/String.hs
new/utf8-string-1.0.2/Codec/Binary/UTF8/String.hs
--- old/utf8-string-1.0.1.1/Codec/Binary/UTF8/String.hs 2015-08-23
18:19:40.000000000 +0200
+++ new/utf8-string-1.0.2/Codec/Binary/UTF8/String.hs 2001-09-09
03:46:40.000000000 +0200
@@ -12,7 +12,7 @@
-- Stability : experimental
-- Portability : portable
--
--- Support for encoding UTF8 Strings to and from @[Word8]@
+-- Support for encoding UTF8 Strings to and from @['Word8']@
--
module Codec.Binary.UTF8.String (
@@ -37,15 +37,15 @@
encodeString xs = map (toEnum . fromEnum) (encode xs)
-- | Decode a string using 'decode' using a 'String' as input.
--- | This is not safe but it is necessary if UTF-8 encoded text
--- | has been loaded into a 'String' prior to being decoded.
+-- This is not safe but it is necessary if UTF-8 encoded text
+-- has been loaded into a 'String' prior to being decoded.
decodeString :: String -> String
decodeString xs = decode (map (toEnum . fromEnum) xs)
replacement_character :: Char
replacement_character = '\xfffd'
--- | Encode a single Haskell Char to a list of Word8 values, in UTF8 format.
+-- | Encode a single Haskell 'Char' to a list of 'Word8' values, in UTF8
format.
encodeChar :: Char -> [Word8]
encodeChar = map fromIntegral . go . ord
where
@@ -67,12 +67,12 @@
]
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
+-- | Encode a Haskell 'String' to a list of 'Word8' values, in UTF8 format.
encode :: String -> [Word8]
encode = concatMap encodeChar
--
--- | Decode a UTF8 string packed into a list of Word8 values, directly to
String
+-- | Decode a UTF8 string packed into a list of 'Word8' values, directly to
'String'
--
decode :: [Word8] -> String
decode [ ] = ""
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/Data/ByteString/Lazy/UTF8.hs
new/utf8-string-1.0.2/Data/ByteString/Lazy/UTF8.hs
--- old/utf8-string-1.0.1.1/Data/ByteString/Lazy/UTF8.hs 2015-08-23
18:19:40.000000000 +0200
+++ new/utf8-string-1.0.2/Data/ByteString/Lazy/UTF8.hs 2001-09-09
03:46:40.000000000 +0200
@@ -101,7 +101,7 @@
-- DECODING
-- | Convert a UTF8 encoded bytestring into a Haskell string.
--- Invalid characters are replaced with '\xFFFD'.
+-- Invalid characters are replaced with @\'\\0xFFFD\'@.
toString :: B.ByteString -> String
toString bs = foldr (:) [] bs
@@ -113,7 +113,7 @@
-- Returns 'Nothing' if there are no more bytes in the byte string.
-- Otherwise, it returns a decoded character and the number of
-- bytes used in its representation.
--- Errors are replaced by character '\0xFFFD'.
+-- Errors are replaced by character @\'\\0xFFFD\'@.
-- XXX: Should we combine sequences of errors into a single replacement
-- character?
@@ -208,7 +208,7 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
span :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
span p bs = loop 0 bs
where loop a cs = case decode cs of
@@ -218,12 +218,12 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
break :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
break p bs = span (not . p) bs
-- | Get the first character of a byte string, if any.
--- Malformed characters are replaced by '\0xFFFD'.
+-- Malformed characters are replaced by @\'\\0xFFFD\'@.
uncons :: B.ByteString -> Maybe (Char,B.ByteString)
uncons bs = do (c,n) <- decode bs
return (c, B.drop n bs)
@@ -251,9 +251,9 @@
Nothing -> n
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
--- See also 'lines\''.
+-- See also 'lines''.
lines :: B.ByteString -> [B.ByteString]
lines bs | B.null bs = []
lines bs = case B.elemIndex 10 bs of
@@ -262,7 +262,7 @@
Nothing -> [bs]
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
@@ -288,5 +288,10 @@
createUptoN' l f = do
fp <- S.mallocByteString l
(l', res) <- withForeignPtr fp $ \p -> f p
- assert (l' <= l) $ return (S.PS fp 0 l', res)
+#if MIN_VERSION_bytestring(0,11,0)
+ let bs = S.BS fp l'
+#else
+ let bs = S.PS fp 0 l'
+#endif
+ assert (l' <= l) $ return (bs, res)
{-# INLINE createUptoN' #-}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/Data/ByteString/UTF8.hs
new/utf8-string-1.0.2/Data/ByteString/UTF8.hs
--- old/utf8-string-1.0.1.1/Data/ByteString/UTF8.hs 2015-08-23
18:19:40.000000000 +0200
+++ new/utf8-string-1.0.2/Data/ByteString/UTF8.hs 2001-09-09
03:46:40.000000000 +0200
@@ -26,6 +26,7 @@
, drop
, span
, break
+ , fromChar
, fromString
, toString
, foldl
@@ -43,12 +44,16 @@
import Codec.Binary.UTF8.String(encode)
import Codec.Binary.UTF8.Generic (buncons)
+-- | Converts a Haskell char into a UTF8 encoded bytestring.
+fromChar :: Char -> B.ByteString
+fromChar x = fromString [x]
+
-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> B.ByteString
fromString xs = B.pack (encode xs)
-- | Convert a UTF8 encoded bytestring into a Haskell string.
--- Invalid characters are replaced with '\xFFFD'.
+-- Invalid characters are replaced with @\'\\0xFFFD\'@.
toString :: B.ByteString -> String
toString bs = foldr (:) [] bs
@@ -60,7 +65,7 @@
-- Returns 'Nothing' if there are no more bytes in the byte string.
-- Otherwise, it returns a decoded character and the number of
-- bytes used in its representation.
--- Errors are replaced by character '\0xFFFD'.
+-- Errors are replaced by character @\'\\0xFFFD\'@.
-- XXX: Should we combine sequences of errors into a single replacement
-- character?
@@ -146,7 +151,7 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
span :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
span p bs = loop 0 bs
where loop a cs = case decode cs of
@@ -156,12 +161,12 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
break :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
break p bs = span (not . p) bs
-- | Get the first character of a byte string, if any.
--- Malformed characters are replaced by '\0xFFFD'.
+-- Malformed characters are replaced by @\'\\0xFFFD\'@.
uncons :: B.ByteString -> Maybe (Char,B.ByteString)
uncons bs = do (c,n) <- decode bs
return (c, B.drop n bs)
@@ -189,9 +194,9 @@
Nothing -> n
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
--- See also 'lines\''.
+-- See also 'lines''.
lines :: B.ByteString -> [B.ByteString]
lines bs | B.null bs = []
lines bs = case B.elemIndex 10 bs of
@@ -200,7 +205,7 @@
Nothing -> [bs]
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/Data/String/UTF8.hs
new/utf8-string-1.0.2/Data/String/UTF8.hs
--- old/utf8-string-1.0.1.1/Data/String/UTF8.hs 2015-08-23 18:19:40.000000000
+0200
+++ new/utf8-string-1.0.2/Data/String/UTF8.hs 2001-09-09 03:46:40.000000000
+0200
@@ -48,6 +48,7 @@
,foldl,foldr,length,lines,splitAt)
import qualified Codec.Binary.UTF8.Generic as G
import Codec.Binary.UTF8.Generic (UTF8Bytes)
+import qualified Data.String as S
-- | The type of strings that are represented using the UTF8 encoding.
-- The parameter is the type of the container for the representation.
@@ -56,6 +57,9 @@
instance UTF8Bytes string index => Show (UTF8 string) where
show x = show (toString x)
+instance UTF8Bytes string index => S.IsString (UTF8 string) where
+ fromString = fromString
+
fromRep :: string -> UTF8 string
fromRep = Str
@@ -68,7 +72,7 @@
fromString xs = Str (G.fromString xs)
-- | Convert a UTF8 encoded string into a Haskell string.
--- Invalid characters are replaced by 'replacement_char'.
+-- Invalid characters are replaced by 'G.replacement_char'.
-- Complexity: linear.
toString :: UTF8Bytes string index => UTF8 string -> String
toString (Str xs) = G.toString xs
@@ -79,7 +83,6 @@
-- | Split after a given number of characters.
-- Negative values are treated as if they are 0.
--- See also 'bytesSplitAt'.
splitAt :: UTF8Bytes string index
=> index -> UTF8 string -> (UTF8 string, UTF8 string)
splitAt x (Str bs) = case G.splitAt x bs of
@@ -116,7 +119,7 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as '\0xFFFD' to the predicate.
+-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
span :: UTF8Bytes string index
=> (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
span p (Str bs) = case G.span p bs of
@@ -125,14 +128,14 @@
-- | Split a string into two parts: the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
--- Invalid characters are passed as 'replacement_char' to the predicate.
+-- Invalid characters are passed as 'G.replacement_char' to the predicate.
break :: UTF8Bytes string index
=> (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)
break p (Str bs) = case G.break p bs of
(s1,s2) -> (Str s1, Str s2)
-- | Get the first character of a byte string, if any.
--- Invalid characters are replaced by 'replacement_char'.
+-- Invalid characters are replaced by 'G.replacement_char'.
uncons :: UTF8Bytes string index
=> UTF8 string -> Maybe (Char, UTF8 string)
uncons (Str x) = do (c,y) <- G.uncons x
@@ -141,7 +144,7 @@
-- | Extract the first character for the underlying representation,
-- if one is available. It also returns the number of bytes used
-- in the representation of the character.
--- See also 'uncons', 'dropBytes'.
+-- See also 'uncons'.
decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)
decode (Str x) = G.decode x
@@ -161,14 +164,14 @@
length (Str b) = G.length b
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
--- See also 'lines\''.
+-- See also 'lines''.
lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]
lines (Str b) = map Str (G.lines b) -- XXX: unnecessary map
-- | Split a string into a list of lines.
--- Lines are terminated by '\n' or the end of the string.
+-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/tests/Tests.hs
new/utf8-string-1.0.2/tests/Tests.hs
--- old/utf8-string-1.0.1.1/tests/Tests.hs 1970-01-01 01:00:00.000000000
+0100
+++ new/utf8-string-1.0.2/tests/Tests.hs 2001-09-09 03:46:40.000000000
+0200
@@ -0,0 +1,208 @@
+import Codec.Binary.UTF8.String
+import Test.HUnit (Test (TestCase, TestList, TestLabel), assertEqual, errors,
failures, runTestTT)
+import System.Exit (exitFailure)
+import Control.Monad (when)
+
+main :: IO ()
+main = do counts <- runTestTT tests
+ when (errors counts > 0 || failures counts > 0) exitFailure
+
+tests :: Test
+tests = TestList [test_1, test_2, test_3, test_4, test_5, test_6]
+
+test_1 :: Test
+test_1 = TestLabel "1 Some correct UTF-8 text" $
+ TestCase $ assertEqual "kosme, " "\x03ba\x1f79\x03c3\x03bc\x03b5 "
+ (decode [0xce,0xba,0xe1,0xbd,0xb9,0xcf,0x83,0xce,0xbc,0xce,0xb5,0x20])
+
+test_2 :: Test
+test_2 = TestLabel "2 Boundary condition test cases" $
+ TestList [test_2_1, test_2_2, test_2_3]
+
+test_2_1 :: Test
+test_2_1 = TestLabel "2.1 First possible sequence of a certain length" $
+ TestList $ map TestCase $
+ [ assertEqual "2.1.1, " "\0\0" (decode [0, 0])
+ , assertEqual "2.1.2, " "\x80\0" (decode [0xc2, 0x80, 0])
+ , assertEqual "2.1.3, " "\x800\0" (decode [0xe0, 0xa0, 0x80, 0])
+ , assertEqual "2.1.4, " "\x10000\0" (decode [0xf0, 0x90, 0x80, 0x80, 0])
+ , assertEqual "2.1.5, " "\xfffd\0" (decode [0xf8, 0x88, 0x80, 0x80, 0x80, 0])
+ , assertEqual "2.1.6, " "\xfffd\0" (decode [0xfc,0x84,0x80,0x80,0x80,0x80,0])
+ ]
+
+test_2_2 :: Test
+test_2_2 = TestLabel "2.2 Last possible sequence of a certain length" $
+ TestList $ map TestCase $
+ [ assertEqual "2.2.1, " "\x7f\0" (decode [0x7f, 0])
+ , assertEqual "2.2.2, " "\x7ff\0" (decode [0xdf, 0xbf, 0])
+ , assertEqual "2.2.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbf, 0])
+ , assertEqual "2.2.4, " "\xfffd\0" (decode [0xf7, 0xbf, 0xbf, 0xbf, 0])
+ , assertEqual "2.2.5, " "\xfffd\0" (decode [0xfb, 0xbf, 0xbf, 0xbf, 0xbf, 0])
+ , assertEqual "2.2.6, " "\xfffd\0" (decode [0xfd,0xbf,0xbf,0xbf,0xbf,0xbf,0])
+ ]
+
+test_2_3 :: Test
+test_2_3 = TestLabel "2.3 Other boundary conditions" $
+ TestList $ map TestCase $
+ [ assertEqual "2.3.1, " "\xd7ff\0" (decode [0xed, 0x9f, 0xbf, 0])
+ , assertEqual "2.3.2, " "\xe000\0" (decode [0xee, 0x80, 0x80, 0])
+ , assertEqual "2.3.3, " "\xfffd\0" (decode [0xef, 0xbf, 0xbd, 0])
+ , assertEqual "2.3.4, " "\x10ffff\0" (decode [0xf4, 0x8f, 0xbf, 0xbf, 0])
+ , assertEqual "2.3.5, " "\xfffd\0" (decode [0xf4, 0x90, 0x80, 0x80, 0])
+ ]
+
+test_3 :: Test
+test_3 = TestLabel "3 Malformed sequences" $
+ TestList [test_3_1, test_3_2, test_3_3, test_3_4, test_3_5]
+
+test_3_1 :: Test
+test_3_1 = TestLabel "3.1 Unexpected continuation bytes" $
+ TestList $ map TestCase $
+ [ assertEqual "3.1.1, " "\xfffd\0" (decode [0x80, 0])
+ , assertEqual "3.1.2, " "\xfffd\0" (decode [0xbf, 0])
+ , assertEqual "3.1.3, " "\xfffd\xfffd\0" (decode [0x80, 0xbf, 0])
+ , assertEqual "3.1.4, " "\xfffd\xfffd\xfffd\0" (decode [0x80, 0xbf, 0x80, 0])
+ , assertEqual "3.1.5, " "\xfffd\xfffd\xfffd\xfffd\0"
+ (decode [0x80, 0xbf, 0x80, 0xbf, 0])
+ , assertEqual "3.1.6, " "\xfffd\xfffd\xfffd\xfffd\xfffd\0"
+ (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0])
+ , assertEqual "3.1.7, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0"
+ (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0])
+ , assertEqual "3.1.8, " "\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\xfffd\0"
+ (decode [0x80, 0xbf, 0x80, 0xbf, 0x80, 0xbf, 0x80,
0])
+ , assertEqual "3.1.9, " (replicate 64 '\xfffd') (decode [0x80..0xbf])
+ ]
+
+test_3_2 :: Test
+test_3_2 = TestLabel "3.2 Lonely start characters" $
+ TestList $ map TestCase $
+ [ assertEqual "3.2.1, " (concat (replicate 32 "\xfffd "))
+ (decode (concat [[x,0x20] | x <- [0xc0..0xdf]]))
+ , assertEqual "3.2.2, " (concat (replicate 16 "\xfffd "))
+ (decode (concat [[x,0x20] | x <- [0xe0..0xef]]))
+ , assertEqual "3.2.3, " (concat (replicate 8 "\xfffd "))
+ (decode (concat [[x,0x20] | x <- [0xf0..0xf7]]))
+ , assertEqual "3.2.4, " "\xfffd \xfffd \xfffd \xfffd "
+ (decode (concat [[x,0x20] | x <- [0xf8..0xfb]]))
+ , assertEqual "3.2.5, " "\xfffd \xfffd " (decode [0xfc, 0x20, 0xfd, 0x20])
+ ]
+
+test_3_3 :: Test
+test_3_3 = TestLabel "3.3 Sequences with last continuation byte missing" $
+ TestList $ map TestCase $
+ [ assertEqual "3.3.1, " "\xfffd " (decode [0xc0, 0x20])
+ , assertEqual "3.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x20])
+ , assertEqual "3.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x20])
+ , assertEqual "3.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80, 0x20])
+ , assertEqual "3.3.5, " "\xfffd " (decode [0xfc, 0x80, 0x80, 0x80,0x80,0x20])
+ , assertEqual "3.3.6, " "\xfffd " (decode [0xdf, 0x20])
+ , assertEqual "3.3.7, " "\xfffd " (decode [0xef, 0xbf, 0x20])
+ , assertEqual "3.3.8, " "\xfffd " (decode [0xf7, 0xbf, 0xbf, 0x20])
+ , assertEqual "3.3.9, " "\xfffd " (decode [0xfb, 0xbf, 0xbf, 0xbf, 0x20])
+ , assertEqual "3.3.10, " "\xfffd " (decode [0xfd, 0xbf, 0xbf,
0xbf,0xbf,0x20])
+ ]
+
+test_3_4 :: Test
+test_3_4 = TestLabel "3.4 Concatenation of incomplete sequences" $
+ TestCase $ assertEqual "3.4, "
+ (replicate 10 '\xfffd')
+ (decode [0xc0, 0xe0, 0x80, 0xf0, 0x80, 0x80, 0xf8, 0x80, 0x80, 0x80,
+ 0xfc, 0x80, 0x80, 0x80,0x80, 0xdf, 0xef, 0xbf, 0xf7, 0xbf, 0xbf,
+ 0xfb, 0xbf, 0xbf, 0xbf, 0xfd, 0xbf, 0xbf, 0xbf,0xbf])
+
+test_3_5 :: Test
+test_3_5 = TestLabel "3.5 Impossible bytes" $
+ TestList $ map TestCase $
+ [ assertEqual "3.5.1, " "\xfffd " (decode [0xfe, 0x20])
+ , assertEqual "3.5.2, " "\xfffd " (decode [0xff, 0x20])
+ , assertEqual "3.5.3, " "\xfffd\xfffd\xfffd\xfffd "
+ (decode [0xfe, 0xfe, 0xff, 0xff, 0x20])
+ ]
+
+test_4 :: Test
+test_4 = TestLabel "4 Overlong sequences" $
+ TestList [test_4_1, test_4_2, test_4_3]
+
+test_4_1 :: Test
+test_4_1 = TestLabel "4.1" $ TestList $ map TestCase $
+ [ assertEqual "4.1.1, " "\xfffd " (decode [0xc0, 0xaf, 0x20])
+ , assertEqual "4.1.2, " "\xfffd " (decode [0xe0, 0x80, 0xaf, 0x20])
+ , assertEqual "4.1.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0xaf, 0x20])
+ , assertEqual "4.1.4, " "\xfffd " (decode [0xf8, 0x80, 0x80,0x80,0xaf, 0x20])
+ , assertEqual "4.1.5, " "\xfffd "
(decode[0xfc,0x80,0x80,0x80,0x80,0xaf,0x20])
+ ]
+
+test_4_2 :: Test
+test_4_2 = TestLabel "4.2 Maximum overlong sequences" $
+ TestList $ map TestCase $
+ [ assertEqual "4.2.1, " "\xfffd " (decode [0xc1, 0xbf, 0x20])
+ , assertEqual "4.2.2, " "\xfffd " (decode [0xe0, 0x9f, 0xbf, 0x20])
+ , assertEqual "4.2.3, " "\xfffd " (decode [0xf0, 0x8f, 0xbf, 0xbf, 0x20])
+ , assertEqual "4.2.4, " "\xfffd " (decode [0xf8, 0x87, 0xbf, 0xbf,0xbf,0x20])
+ , assertEqual "4.2.5, " "\xfffd "(decode[0xfc,0x83,0xbf,0xbf,0xbf,0xbf,0x20])
+ ]
+
+test_4_3 :: Test
+test_4_3 = TestLabel "4.2 Overlong NUL" $
+ TestList $ map TestCase $
+ [ assertEqual "4.3.1, " "\xfffd " (decode [0xc0, 0x80, 0x20])
+ , assertEqual "4.3.2, " "\xfffd " (decode [0xe0, 0x80, 0x80, 0x20])
+ , assertEqual "4.3.3, " "\xfffd " (decode [0xf0, 0x80, 0x80, 0x80, 0x20])
+ , assertEqual "4.3.4, " "\xfffd " (decode [0xf8, 0x80, 0x80, 0x80,0x80,0x20])
+ , assertEqual "4.3.5, " "\xfffd "(decode[0xfc,0x80,0x80,0x80,0x80,0x80,0x20])
+ ]
+
+test_5 :: Test
+test_5 = TestLabel "Illegal code positions" $
+ TestList [test_5_1, test_5_2, test_5_3]
+
+test_5_1 :: Test
+test_5_1 = TestLabel "5.1 Single UTF-16 surrogates" $
+ TestList $ map TestCase $
+ [ assertEqual "5.1.1, " "\xfffd " (decode [0xed,0xa0,0x80,0x20])
+ , assertEqual "5.1.2, " "\xfffd " (decode [0xed,0xad,0xbf,0x20])
+ , assertEqual "5.1.3, " "\xfffd " (decode [0xed,0xae,0x80,0x20])
+ , assertEqual "5.1.4, " "\xfffd " (decode [0xed,0xaf,0xbf,0x20])
+ , assertEqual "5.1.5, " "\xfffd " (decode [0xed,0xb0,0x80,0x20])
+ , assertEqual "5.1.6, " "\xfffd " (decode [0xed,0xbe,0x80,0x20])
+ , assertEqual "5.1.7, " "\xfffd " (decode [0xed,0xbf,0xbf,0x20])
+ ]
+
+test_5_2 :: Test
+test_5_2 = TestLabel "5.2 Paired UTF-16 surrogates" $
+ TestList $ map TestCase $
+ [ assertEqual "5.2.1, " res (decode [0xed,0xa0,0x80,0xed,0xb0,0x80,0x20])
+ , assertEqual "5.2.2, " res (decode [0xed,0xa0,0x80,0xed,0xbf,0xbf,0x20])
+ , assertEqual "5.2.3, " res (decode [0xed,0xad,0xbf,0xed,0xb0,0x80,0x20])
+ , assertEqual "5.2.4, " res (decode [0xed,0xad,0xbf,0xed,0xbf,0xbf,0x20])
+ , assertEqual "5.2.5, " res (decode [0xed,0xae,0x80,0xed,0xb0,0x80,0x20])
+ , assertEqual "5.2.6, " res (decode [0xed,0xae,0x80,0xed,0xbf,0xbf,0x20])
+ , assertEqual "5.2.7, " res (decode [0xed,0xaf,0xbf,0xed,0xb0,0x80,0x20])
+ , assertEqual "5.2.8, " res (decode [0xed,0xaf,0xbf,0xed,0xbf,0xbf,0x20])
+ ]
+ where res = "\xfffd\xfffd "
+
+test_5_3 :: Test
+test_5_3 = TestLabel "5.3 Other illegal code positions" $
+ TestList $ map TestCase $
+ [ assertEqual "5.3.1, " "\xfffd " (decode [0xef, 0xbf, 0xbe, 0x20])
+ , assertEqual "5.3.2, " "\xfffd " (decode [0xef, 0xbf, 0xbf, 0x20])
+ ]
+
+test_6 :: Test
+test_6 = TestLabel "Encode then decode" $
+ TestList $ map TestCase $
+ [ assertEqual "6.1" encodeDecodeTest []
+ ]
+
+--
+-- test decode . encode == id for the class of chars we know that to be true of
+--
+encodeDecodeTest :: [Char]
+encodeDecodeTest = filter (\x -> [x] /= decode (encode [x])) legal_codepoints
++
+ filter (\x -> ['\xfffd'] /= decode (encode [x]))
illegal_codepoints
+ where
+ legal_codepoints = ['\0'..'\xd7ff'] ++ ['\xe000'..'\xfffd'] ++
['\x10000'..'\x10ffff']
+ illegal_codepoints = '\xffff' : '\xfffe' : ['\xd800'..'\xdfff']
+
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/utf8-string-1.0.1.1/utf8-string.cabal
new/utf8-string-1.0.2/utf8-string.cabal
--- old/utf8-string-1.0.1.1/utf8-string.cabal 2015-08-23 18:19:40.000000000
+0200
+++ new/utf8-string-1.0.2/utf8-string.cabal 2001-09-09 03:46:40.000000000
+0200
@@ -1,10 +1,11 @@
Name: utf8-string
-Version: 1.0.1.1
+Version: 1.0.2
Author: Eric Mertens
Maintainer: [email protected]
License: BSD3
License-file: LICENSE
-Homepage: http://github.com/glguy/utf8-string/
+Homepage: https://github.com/glguy/utf8-string/
+Bug-Reports: https://github.com/glguy/utf8-string/issues
Synopsis: Support for reading and writing UTF8 Strings
Description: A UTF8 layer for Strings. The utf8-string
package provides operations for encoding UTF8
@@ -12,18 +13,30 @@
writing UTF8 without truncation.
Category: Codec
Build-type: Simple
-cabal-version: >= 1.2
+cabal-version: >= 1.10
Extra-Source-Files: CHANGELOG.markdown
-Tested-With: GHC==7.0.4, GHC==7.4.2, GHC==7.6.3, GHC==7.8.4, GHC==7.10.2
+Tested-With: GHC==7.0.4, GHC==7.4.2, GHC==7.6.3, GHC==7.8.4,
GHC==7.10.3, GHC==8.0.2, GHC==8.2.1
+
+source-repository head
+ type: git
+ location: https://github.com/glguy/utf8-string
library
Ghc-options: -W -O2
- build-depends: base >= 4.3 && < 4.9, bytestring >= 0.9
+ build-depends: base >= 4.3 && < 5, bytestring >= 0.9
- Extensions: CPP
Exposed-modules: Codec.Binary.UTF8.String
Codec.Binary.UTF8.Generic
Data.String.UTF8
Data.ByteString.UTF8
Data.ByteString.Lazy.UTF8
+
+ default-language: Haskell2010
+
+test-suite unit-tests
+ type: exitcode-stdio-1.0
+ hs-source-dirs: tests
+ main-is: Tests.hs
+ build-depends: base, HUnit >= 1.3 && < 1.7, utf8-string
+ default-language: Haskell2010