commit ghc-doclayout for openSUSE:Factory

Source-Sync Thu, 11 Nov 2021 12:37:13 -0800

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package ghc-doclayout for openSUSE:Factory 
checked in at 2021-11-11 21:36:26
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-doclayout (Old)
 and      /work/SRC/openSUSE:Factory/.ghc-doclayout.new.1890 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "ghc-doclayout"

Thu Nov 11 21:36:26 2021 rev:7 rq:930324 version:0.3.1.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-doclayout/ghc-doclayout.changes      
2021-03-24 16:15:58.704129520 +0100
+++ /work/SRC/openSUSE:Factory/.ghc-doclayout.new.1890/ghc-doclayout.changes    
2021-11-11 21:36:39.528899014 +0100
@@ -1,0 +2,21 @@
+Tue Oct 12 10:51:12 UTC 2021 - [email protected]
+
+- Update doclayout to version 0.3.1.1.
+  ## 0.3.1.1
+
+    * Fix the end of the block of zero width characters which contains
+      the zero-width joiners and directional markings (Stephen Morgan, #5).
+      This fixes a regression introduced in 0.3.1, affecting code
+      points 0x2010 to 0x2030.
+
+  ## 0.3.1
+
+    * Improved handling of emojis.  Emojis are double-wide, but
+      previously this library did not treat them as such.  We now
+      have comprehensive support of emojis, including variation
+      modifiers and zero-width joiners, verified by a test suite.
+      Performance has been confirmed to be no worse for text without emojis.
+      (Stephen Morgan, #1).  API changes: export `realLengthNoShortcut`,
+      `isEmojiModifier`, `isEmojiVariation`, `isEmojiJoiner`.
+
+-------------------------------------------------------------------

Old:
----
  doclayout-0.3.0.2.tar.gz

New:
----
  doclayout-0.3.1.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ghc-doclayout.spec ++++++
--- /var/tmp/diff_new_pack.pyhXqp/_old  2021-11-11 21:36:39.984899347 +0100
+++ /var/tmp/diff_new_pack.pyhXqp/_new  2021-11-11 21:36:39.988899350 +0100
@@ -19,13 +19,15 @@
 %global pkg_name doclayout
 %bcond_with tests
 Name:           ghc-%{pkg_name}
-Version:        0.3.0.2
+Version:        0.3.1.1
 Release:        0
 Summary:        A prettyprinting library for laying out text documents
 License:        BSD-3-Clause
 URL:            https://hackage.haskell.org/package/%{pkg_name}
 Source0:        
https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz
 BuildRequires:  ghc-Cabal-devel
+BuildRequires:  ghc-containers-devel
+BuildRequires:  ghc-emojis-devel
 BuildRequires:  ghc-mtl-devel
 BuildRequires:  ghc-rpm-macros
 BuildRequires:  ghc-safe-devel
@@ -35,6 +37,7 @@
 BuildRequires:  ghc-tasty-devel
 BuildRequires:  ghc-tasty-golden-devel
 BuildRequires:  ghc-tasty-hunit-devel
+BuildRequires:  ghc-tasty-quickcheck-devel
 %endif
 
 %description

++++++ doclayout-0.3.0.2.tar.gz -> doclayout-0.3.1.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/doclayout-0.3.0.2/changelog.md 
new/doclayout-0.3.1.1/changelog.md
--- old/doclayout-0.3.0.2/changelog.md  2021-03-15 22:20:36.000000000 +0100
+++ new/doclayout-0.3.1.1/changelog.md  2021-10-12 05:54:17.000000000 +0200
@@ -1,5 +1,22 @@
 # doclayout
 
+## 0.3.1.1
+
+  * Fix the end of the block of zero width characters which contains
+    the zero-width joiners and directional markings (Stephen Morgan, #5).
+    This fixes a regression introduced in 0.3.1, affecting code
+    points 0x2010 to 0x2030.
+
+## 0.3.1
+
+  * Improved handling of emojis.  Emojis are double-wide, but
+    previously this library did not treat them as such.  We now
+    have comprehensive support of emojis, including variation
+    modifiers and zero-width joiners, verified by a test suite.
+    Performance has been confirmed to be no worse for text without emojis.
+    (Stephen Morgan, #1).  API changes: export `realLengthNoShortcut`,
+    `isEmojiModifier`, `isEmojiVariation`, `isEmojiJoiner`.
+
 ## 0.3.0.2
 
  * NOINLINE `literal` instead of `fromString` (#2, sjakobi).
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/doclayout-0.3.0.2/doclayout.cabal 
new/doclayout-0.3.1.1/doclayout.cabal
--- old/doclayout-0.3.0.2/doclayout.cabal       2021-03-15 22:19:41.000000000 
+0100
+++ new/doclayout-0.3.1.1/doclayout.cabal       2021-10-12 05:51:56.000000000 
+0200
@@ -1,5 +1,5 @@
 name:                doclayout
-version:             0.3.0.2
+version:             0.3.1.1
 synopsis:            A prettyprinting library for laying out text documents.
 description:         doclayout is a prettyprinting library for laying out
                      text documents, with several features not present
@@ -23,6 +23,8 @@
   exposed-modules:     Text.DocLayout
   build-depends:       base >= 4.9 && < 5,
                        text,
+                       containers,
+                       emojis >=0.1.2,
                        mtl,
                        safe
   if !impl(ghc >= 8.0)
@@ -40,7 +42,9 @@
                        tasty,
                        tasty-golden,
                        tasty-hunit,
-                       text
+                       tasty-quickcheck,
+                       text,
+                       emojis >=0.1.2
   ghc-options:         -threaded -rtsopts -with-rtsopts=-N
   default-language:    Haskell2010
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/doclayout-0.3.0.2/src/Text/DocLayout.hs 
new/doclayout-0.3.1.1/src/Text/DocLayout.hs
--- old/doclayout-0.3.0.2/src/Text/DocLayout.hs 2021-03-15 22:18:46.000000000 
+0100
+++ new/doclayout-0.3.1.1/src/Text/DocLayout.hs 2021-10-12 05:49:36.000000000 
+0200
@@ -69,6 +69,10 @@
      , height
      , charWidth
      , realLength
+     , realLengthNoShortcut
+     , isEmojiModifier
+     , isEmojiVariation
+     , isEmojiJoiner
      -- * Types
      , Doc(..)
      , HasChars(..)
@@ -76,14 +80,16 @@
 
 where
 import Prelude
-import Data.List (foldl')
 import Data.Maybe (fromMaybe)
+import Data.Monoid (Sum(..))
 import Safe (lastMay, initSafe)
 import Control.Monad
 import Control.Monad.State.Strict
 import GHC.Generics
-import Data.Char (isSpace)
-import Data.List (intersperse)
+import Data.Char (isDigit, isSpace, ord)
+import Data.List (foldl', intersperse)
+import Data.List.NonEmpty (NonEmpty(..))
+import qualified Data.IntMap.Strict as IM
 import Data.Data (Data, Typeable)
 import Data.String
 import qualified Data.Text as T
@@ -93,6 +99,7 @@
 #else
 import Data.Semigroup
 #endif
+import Text.Emoji (baseEmojis)
 
 -- | Class abstracting over various string types that
 -- can fold over characters.  Minimal definition is 'foldrChar'
@@ -676,58 +683,212 @@
 -- | Returns width of a character in a monospace font:  0 for a combining
 -- character, 1 for a regular character, 2 for an East Asian wide character.
 charWidth :: Char -> Int
-charWidth c =
-  case c of
-      _ | c <  '\x0300'                    -> 1
-        | c >= '\x0300' && c <= '\x036F'   -> 0  -- combining
-        | c >= '\x0370' && c <= '\x10FC'   -> 1
-        | c >= '\x1100' && c <= '\x115F'   -> 2
-        | c >= '\x1160' && c <= '\x11A2'   -> 1
-        | c >= '\x11A3' && c <= '\x11A7'   -> 2
-        | c >= '\x11A8' && c <= '\x11F9'   -> 1
-        | c >= '\x11FA' && c <= '\x11FF'   -> 2
-        | c >= '\x1200' && c <= '\x2328'   -> 1
-        | c >= '\x2329' && c <= '\x232A'   -> 2
-        | c >= '\x232B' && c <= '\x2E31'   -> 1
-        | c >= '\x2E80' && c <= '\x303E'   -> 2
-        | c == '\x303F'                    -> 1
-        | c >= '\x3041' && c <= '\x3247'   -> 2
-        | c >= '\x3248' && c <= '\x324F'   -> 1 -- ambiguous
-        | c >= '\x3250' && c <= '\x4DBF'   -> 2
-        | c >= '\x4DC0' && c <= '\x4DFF'   -> 1
-        | c >= '\x4E00' && c <= '\xA4C6'   -> 2
-        | c >= '\xA4D0' && c <= '\xA95F'   -> 1
-        | c >= '\xA960' && c <= '\xA97C'   -> 2
-        | c >= '\xA980' && c <= '\xABF9'   -> 1
-        | c >= '\xAC00' && c <= '\xD7FB'   -> 2
-        | c >= '\xD800' && c <= '\xDFFF'   -> 1
-        | c >= '\xE000' && c <= '\xF8FF'   -> 1 -- ambiguous
-        | c >= '\xF900' && c <= '\xFAFF'   -> 2
-        | c >= '\xFB00' && c <= '\xFDFD'   -> 1
-        | c >= '\xFE00' && c <= '\xFE0F'   -> 1 -- ambiguous
-        | c >= '\xFE10' && c <= '\xFE19'   -> 2
-        | c >= '\xFE20' && c <= '\xFE26'   -> 1
-        | c >= '\xFE30' && c <= '\xFE6B'   -> 2
-        | c >= '\xFE70' && c <= '\xFEFF'   -> 1
-        | c >= '\xFF01' && c <= '\xFF60'   -> 2
-        | c >= '\xFF61' && c <= '\x16A38'  -> 1
-        | c >= '\x1B000' && c <= '\x1B001' -> 2
-        | c >= '\x1D000' && c <= '\x1F1FF' -> 1
-        | c >= '\x1F200' && c <= '\x1F251' -> 2
-        | c >= '\x1F300' && c <= '\x1F773' -> 1
-        | c >= '\x20000' && c <= '\x3FFFD' -> 2
-        | otherwise                        -> 1
+charWidth c = maybe 1 (specificWidth . snd) $ IM.lookupLE (ord c) 
unicodeWidthMap
 
 -- | Get real length of string, taking into account combining and double-wide
 -- characters.
 realLength :: HasChars a => a -> Int
-realLength s = fromMaybe 0 $ foldlChar go Nothing s
+realLength = realLengthWith updateMatchState
+
+-- | Get real length of string, taking into account combining and double-wide
+-- characters, without taking any shortcuts. This should give the same answer
+-- as 'updateMatchState', but will be slower. It is here to test that the
+-- shortcuts are implemented correctly.
+realLengthNoShortcut :: HasChars a => a -> Int
+realLengthNoShortcut = realLengthWith updateMatchStateNoShortcut
+
+-- | Get real length of string, taking into account combining and double-wide
+-- characters, using the given accumulator.
+realLengthWith :: HasChars a => (MatchState -> Char -> MatchState) -> a -> Int
+realLengthWith f = extractLength . foldlChar f (MatchState True 0 0 mempty)
+  where
+    extractLength (MatchState _ tot w _) = tot + w
+
+-- | Update a 'MatchState' by processing a character.
+updateMatchState :: MatchState -> Char -> MatchState
+updateMatchState (MatchState first tot _ Nothing) !c
+    -- For efficiency, we isolate commonly used portions of the basic
+    -- multilingual plane that do not have emoji in them.
+    -- Maximum contiguous range containing ASCII alphabetic characters and no 
emoji
+    | c <= '\x00A8'                                   = MatchState False (tot 
+ 1) 0 Nothing
+    -- Combining characters have width 0
+    | c >= '\x0300' && c <= '\x036F'                  = MatchState False (if 
first then tot + 1 else tot) 0 Nothing
+    -- A block of width 1
+    | c >= '\x0370' && c <= '\x10FC'                  = MatchState False (tot 
+ 1) 0 Nothing
+    -- Hexagrams are width 1
+    | c >= '\x4DC0' && c <= '\x4DFF'                  = MatchState False (tot 
+ 1) 0 Nothing
+    -- Maximum contiguous range of width 2 with no emoji containing CJK
+    | c >= '\x329a' && c <= '\xA4C6'                  = MatchState False (tot 
+ 2) 0 Nothing
+    -- An ambiguous block; TODO: should be width 2 if surrounded by wide, 1 
otherwise
+    | c >= '\x3248' && c <= '\x324F'                  = MatchState False (tot 
+ 1) 0 Nothing
+    -- A width 1 straggler
+    | c == '\x303F'                                   = MatchState False (tot 
+ 1) 0 Nothing
+updateMatchState s c = updateMatchStateNoShortcut s c
+
+-- | Update a 'MatchState' by processing a character, without taking any
+-- shortcuts. This should give the same answer as 'updateMatchState', but will
+-- be slower. It is here to test that the shortcuts are implemented correctly.
+updateMatchStateNoShortcut :: MatchState -> Char -> MatchState
+updateMatchStateNoShortcut (MatchState first tot _ Nothing) !c =
+    case IM.lookupLE oc unicodeWidthMap of
+        -- If there is a specific match, record the tentative width, the map of
+        -- continuations, and move to the next character
+        Just (!oc', SpecificMatch r w m) | oc == oc' -> MatchState False tot 
(fromMaybe r w) (Just m)
+        -- If there is only a range match, record the total width and move to
+        -- the next character
+        Just (!_, !match) -> let r = rangeWidth match
+                                 -- If the string starts with a combining 
character.  Since there is no
+                                 -- preceding character, we count 0 width as 1 
in this one case:
+                                 r' = if first && r == 0 then 1 else r
+                              in MatchState False (tot + r') 0 Nothing
+        -- M.lookupLE should not fail
+        Nothing -> MatchState False (tot + 1) 0 Nothing
+  where
+    oc = ord c
+updateMatchStateNoShortcut (MatchState _ tot w (Just !m)) !c
+    -- Skin tone modifiers and variation modifiers modify the emoji up to this
+    -- point, so can be discarded. However, they always make it width 2, so we
+    -- set the tentative width to 2.
+    | isEmojiModifier c || isEmojiVariation c = MatchState False tot 2 (Just m)
+    -- Zero width joiners will join two emoji together, so let's discard the 
state and parse the next emoji
+    | isEmojiJoiner c = MatchState False tot 2 Nothing
+    -- Otherwise, lookup the emoji continuations
+    | otherwise = case IM.lookup (ord c) m of
+        -- Continuations match, move to the next step with new continuations
+        Just (Emoji ew m') -> MatchState False tot ew (Just m')
+        -- No continuations match, use the tentative width and process c 
without continuations
+        -- I guess we use shortcuts here; that's probably fine.
+        Nothing -> updateMatchState (MatchState False (tot + w) 0 Nothing) c
+
+-- | Keeps track of state in length calculations, determining whether we're at
+-- the first character, the width so far, the tentative width for this group,
+-- and the Map for possible emoji continuations.
+data MatchState = MatchState !Bool !Int !Int !(Maybe EmojiMap)
+
+-- | A possible match for unicode characters; either within a range block, or a
+-- specific match with a block range width, possibly a specific width, and a 
map of
+-- continuations.
+data UnicodeWidthMatch
+    = RangeSeparator !Int                         -- This code point marks the 
boundary of a range
+    | SpecificMatch !Int !(Maybe Int) !EmojiMap   -- This code point has a 
specific emoji with continuations
+  deriving (Show)
+
+instance Semigroup UnicodeWidthMatch where
+    (SpecificMatch r w1 m1) <> (SpecificMatch _ w2 m2) = SpecificMatch r w $ 
concatEmojiMap m1 m2
+      where
+        w = getSum <$> (Sum <$> w1) <> (Sum <$> w2)
+    s <> _ = s
+
+-- | The width of the block in which the character lies, ignoring specific
+-- matches.
+rangeWidth :: UnicodeWidthMatch -> Int
+rangeWidth (RangeSeparator !r)    = r
+rangeWidth (SpecificMatch !r !_ !_) = r
+
+-- | The specific width of a character.
+specificWidth :: UnicodeWidthMatch -> Int
+specificWidth (RangeSeparator r)    = r
+specificWidth (SpecificMatch r w _) = fromMaybe r w
+
+-- | Checks whether a character is a skin tone modifier
+isEmojiModifier :: Char -> Bool
+isEmojiModifier c = c >= '\x1F3FB' && c <= '\x1F3FF'
+
+-- | Checks whether a character is an emoji variation modifier.
+isEmojiVariation :: Char -> Bool
+isEmojiVariation c = c == '\xFE0F'
+
+-- | Checks whether a character is an emoji joiner.
+isEmojiJoiner :: Char -> Bool
+isEmojiJoiner c = c == '\x200D'
+
+-- | A map for looking up the width of Unicode text.
+unicodeWidthMap :: IM.IntMap UnicodeWidthMatch
+unicodeWidthMap =
+    foldr addEmoji unicodeRangeMap
+    . filter (maybe True (not . isKeypad . fst) . T.uncons)  -- Keypad emoji 
can be handles by base rules
+    $ filter (not . T.any isEmojiModifier)                   -- Emoji 
modifiers are inferred from the base emoji
+    baseEmojis
+  where
+    isKeypad c = isDigit c || c == '*' || c == '#'
+
+-- | Denotes the contiguous ranges of Unicode characters which have a given
+-- width: 1 for a regular character, 2 for an East Asian wide character. Emoji
+-- have different widths and lie within some of these blocks. And the emoji
+-- will be added later.
+unicodeRangeMap :: IM.IntMap UnicodeWidthMatch
+unicodeRangeMap = IM.fromList $ map (\(c, x) -> (ord c, x))
+    [ ('\x0000', RangeSeparator 1)
+    , ('\x0300', RangeSeparator 0)  -- combining
+    , ('\x0370', RangeSeparator 1)
+    , ('\x1100', RangeSeparator 2)
+    , ('\x1160', RangeSeparator 1)
+    , ('\x11A3', RangeSeparator 2)
+    , ('\x11A8', RangeSeparator 1)
+    , ('\x11FA', RangeSeparator 2)
+    , ('\x1200', RangeSeparator 1)
+    , ('\x1AB0', RangeSeparator 0)  -- combining
+    , ('\x1B00', RangeSeparator 1)
+    , ('\x1DC0', RangeSeparator 0)  -- combining
+    , ('\x1E00', RangeSeparator 1)
+    , ('\x200B', RangeSeparator 0)  -- zero-width characters and directional 
overrides
+    , ('\x2010', RangeSeparator 1)
+    , ('\x20D0', RangeSeparator 0)  -- combining
+    , ('\x2100', RangeSeparator 1)
+    , ('\x2329', RangeSeparator 2)
+    , ('\x232B', RangeSeparator 1)
+    , ('\x2E80', RangeSeparator 2)
+    , ('\x303F', RangeSeparator 1)
+    , ('\x3041', RangeSeparator 2)
+    , ('\x3248', RangeSeparator 1)  -- ambiguous
+    , ('\x3250', RangeSeparator 2)
+    , ('\x4DC0', RangeSeparator 1)
+    , ('\x4E00', RangeSeparator 2)
+    , ('\xA4D0', RangeSeparator 1)
+    , ('\xA960', RangeSeparator 2)
+    , ('\xA980', RangeSeparator 1)
+    , ('\xAC00', RangeSeparator 2)
+    , ('\xD800', RangeSeparator 1)
+    , ('\xE000', RangeSeparator 1)  -- ambiguous
+    , ('\xF900', RangeSeparator 2)
+    , ('\xFB00', RangeSeparator 1)
+    , ('\xFE00', RangeSeparator 1)  -- ambiguous
+    , ('\xFE10', RangeSeparator 2)
+    , ('\xFE20', RangeSeparator 0)  -- combining
+    , ('\xFE30', RangeSeparator 2)
+    , ('\xFE70', RangeSeparator 1)
+    , ('\xFF01', RangeSeparator 2)
+    , ('\xFF61', RangeSeparator 1)
+    , ('\x1B000', RangeSeparator 2)
+    , ('\x1D000', RangeSeparator 1)
+    , ('\x1F200', RangeSeparator 2)
+    , ('\x1F300', RangeSeparator 1)
+    , ('\x1F3FB', RangeSeparator 2)  -- skin tone modifiers
+    , ('\x1F400', RangeSeparator 1)
+    , ('\x20000', RangeSeparator 2)
+    , ('\x3FFFD', RangeSeparator 1)
+    ]
+
+type EmojiMap = IM.IntMap Emoji
+data Emoji = Emoji !Int !EmojiMap
+  deriving (Show)
+
+concatEmojiMap :: EmojiMap -> EmojiMap -> EmojiMap
+concatEmojiMap = IM.unionWith (\(Emoji w e1) (Emoji _ e2) -> Emoji w $ 
concatEmojiMap e1 e2)
+
+emojiToMatch :: IM.IntMap UnicodeWidthMatch -> NonEmpty Char -> 
UnicodeWidthMatch
+emojiToMatch m (x:|xs) = SpecificMatch r w . emojiToMap $ filter (not . 
isEmojiVariation) xs
   where
-   -- Using a Maybe allows us to handle the case where the string
-   -- starts with a combining character.  Since there is no preceding
-   -- character, we count 0 width as 1 in this one case:
-   go Nothing !c =
-       case charWidth c of
-         0  -> Just 1
-         !n -> Just n
-   go (Just !tot) !c = Just (tot + charWidth c)
+    r = maybe 1 (rangeWidth . snd) $ IM.lookupLT (ord x) m
+    -- If it is a single code point emoji, it is of width 2. Otherwise, don't
+    -- overwrite the range width.
+    w = if null xs then Just 2 else Nothing
+
+addEmoji :: Text -> IM.IntMap UnicodeWidthMatch -> IM.IntMap UnicodeWidthMatch
+addEmoji !emoji !m = case T.unpack emoji of
+    []   -> m
+    x:xs -> IM.insertWith (<>) (ord x) (emojiToMatch m (x:|xs)) m
+
+emojiToMap :: String -> EmojiMap
+emojiToMap []     = mempty
+emojiToMap (x:xs) = IM.singleton (ord x) . Emoji 2 $ emojiToMap xs
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/doclayout-0.3.0.2/test/test.hs 
new/doclayout-0.3.1.1/test/test.hs
--- old/doclayout-0.3.0.2/test/test.hs  2019-10-31 03:08:30.000000000 +0100
+++ new/doclayout-0.3.1.1/test/test.hs  2021-10-10 22:42:49.000000000 +0200
@@ -3,9 +3,13 @@
 {-# LANGUAGE ScopedTypeVariables #-}
 
 import Text.DocLayout
+import Text.Emoji
 import Test.Tasty
 import Test.Tasty.HUnit
+import Test.Tasty.QuickCheck
+import Data.Functor ((<&>))
 import Data.Text (Text)
+import qualified Data.Text as T
 #if MIN_VERSION_base(4,11,0)
 #else
 import Data.Semigroup
@@ -268,4 +272,40 @@
       Nothing
       (text "\870" <> space <> text "a")
       "\870 a"
+
+  , testCase "length of normal text" $
+      realLength ("This is going to be too long anyway" :: String) @?= 35
+
+  , testCase "length of normal character, which could be continued to an 
emoji, but isn't" $
+      realLength ("*a" :: String) @?= 2
+
+  , testCase "length of normal character, which could be continued to an 
emoji, and is" $
+      realLength ("*\xFE0F\x20E3\&a" :: String) @?= 3
+
+  , testCase "length emoji consisting of one code point" $
+      realLength ("\x231A" :: String) @?= 2
+
+  , testCase "length of an emoji constructed using the variating modifier" $
+      realLength ("\x00A9\xFE0F" :: String) @?= 2
+
+  , testCase "length of a non-emoji which would be an emoji with a variation 
modifier" $
+      realLength ("\x00A9" :: String) @?= 1
+
+  , testCase "length of two emoji in a row" $
+      realLength ("\x1F170\xFE0F\x1F1E6\x1F1E8" :: String) @?= 4
+
+  , testCase "length of an emoji with skin tone modifier, where stripping 
results in a non-emoji" $
+      realLength ("\x1F590\x1F3FF" :: String) @?= 2
+
+  , testCase "a digit with a skin tone modifier is invalid but might appear, 
and shouldn't be mistaken for a variation modifier" $
+      realLength ("1\x1F3FF" :: String) @?= 3
+
+  , testGroup "all base emoji have width 2" $
+      baseEmojis <&> \emoji -> testCase (T.unpack emoji) $ realLength emoji 
@?= 2
+
+  , testGroup "all zero-width joiner emoji sequences have width 2" $
+      zwjEmojis <&> \emoji -> testCase (T.unpack emoji) $ realLength emoji @?= 
2
+
+  , testProperty "shortcut provides same answer for string length" . 
withMaxSuccess 1000000 $
+      \(x :: String) -> realLength x === realLengthNoShortcut x
   ]

commit ghc-doclayout for openSUSE:Factory

Reply via email to