commit ghc-basement for openSUSE:Factory

root Tue, 24 Jul 2018 08:13:39 -0700

Hello community,

here is the log from the commit of package ghc-basement for openSUSE:Factory 
checked in at 2018-07-24 17:13:31
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ghc-basement (Old)
 and      /work/SRC/openSUSE:Factory/.ghc-basement.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "ghc-basement"

Tue Jul 24 17:13:31 2018 rev:2 rq:623726 version:0.0.8

Changes:
--------
--- /work/SRC/openSUSE:Factory/ghc-basement/ghc-basement.changes        
2018-05-30 13:10:20.874385774 +0200
+++ /work/SRC/openSUSE:Factory/.ghc-basement.new/ghc-basement.changes   
2018-07-24 17:13:34.218618469 +0200
@@ -1,0 +2,6 @@
+Fri Jul 13 14:31:46 UTC 2018 - psim...@suse.com
+
+- Update basement to version 0.0.8.
+  Upstream does not provide a change log file.
+
+-------------------------------------------------------------------

Old:
----
  basement-0.0.7.tar.gz

New:
----
  basement-0.0.8.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ghc-basement.spec ++++++
--- /var/tmp/diff_new_pack.1Y6fTA/_old  2018-07-24 17:13:35.022619513 +0200
+++ /var/tmp/diff_new_pack.1Y6fTA/_new  2018-07-24 17:13:35.022619513 +0200
@@ -18,7 +18,7 @@
 
 %global pkg_name basement
 Name:           ghc-%{pkg_name}
-Version:        0.0.7
+Version:        0.0.8
 Release:        0
 Summary:        Foundation scrap box of array & string
 License:        BSD-3-Clause

++++++ basement-0.0.7.tar.gz -> basement-0.0.8.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Alg/String.hs 
new/basement-0.0.8/Basement/Alg/String.hs
--- old/basement-0.0.7/Basement/Alg/String.hs   2018-02-04 11:47:59.000000000 
+0100
+++ new/basement-0.0.8/Basement/Alg/String.hs   2018-03-12 23:27:49.000000000 
+0100
@@ -39,9 +39,9 @@
     loop !d !s
         | s == end  = pure (offsetAsSize d)
         | otherwise =
-            let !h = index src s
+            let !h = nextAscii src s
              in case headerIsAscii h of
-                    True | predicate (toChar1 h) -> primMbaWrite dst d h >> 
loop (d + Offset 1) (s + Offset 1)
+                    True | predicate (toChar1 h) -> primMbaWrite dst d 
(stepAsciiRawValue h) >> loop (d + Offset 1) (s + Offset 1)
                          | otherwise             -> loop d (s + Offset 1)
                     False ->
                         case next src s of
@@ -58,10 +58,10 @@
   where
     loop4 !ofs
         | ofs4 < end =
-            let h1 = index ba ofs
-                h2 = index ba (ofs+1)
-                h3 = index ba (ofs+2)
-                h4 = index ba (ofs+3)
+            let h1 = nextAscii ba ofs
+                h2 = nextAscii ba (ofs+1)
+                h3 = nextAscii ba (ofs+2)
+                h4 = nextAscii ba (ofs+3)
              in if headerIsAscii h1 && headerIsAscii h2 && headerIsAscii h3 && 
headerIsAscii h4
                     then loop4 ofs4
                     else loop ofs
@@ -73,7 +73,7 @@
         | headerIsAscii h = loop (ofs + Offset 1)
         | otherwise       = multi (CountOf $ getNbBytes h) ofs
       where
-        h = index ba ofs
+        h = nextAscii ba ofs
 
     multi (CountOf 0xff) pos = (pos, Just InvalidHeader)
     multi nbConts pos
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Alg/UTF8.hs 
new/basement-0.0.8/Basement/Alg/UTF8.hs
--- old/basement-0.0.7/Basement/Alg/UTF8.hs     2018-02-04 12:56:52.000000000 
+0100
+++ new/basement-0.0.8/Basement/Alg/UTF8.hs     2018-03-12 23:27:49.000000000 
+0100
@@ -9,8 +9,10 @@
     , expectAscii
     , next
     , nextSkip
+    , nextWith
     , prev
     , prevSkip
+    , writeASCII
     , writeUTF8
     , toList
     , all
@@ -32,6 +34,7 @@
 import           Basement.Numerical.Additive
 import           Basement.Numerical.Subtractive
 import           Basement.Types.OffsetSize
+import           Basement.Types.Char7 (Char7(..))
 import           Basement.PrimType
 import           Basement.UTF8.Helper
 import           Basement.UTF8.Table
@@ -62,15 +65,31 @@
         3 -> Step (toChar4 h (index ba (n + Offset 1))
                              (index ba (n + Offset 2))
                              (index ba (n + Offset 3))) (n + Offset 4)
-        r -> error ("next: internal error: invalid input: offset=" <> show n 
<> " table=" <> show r <> " h=" <> show h)
+        r -> error ("next: internal error: invalid input: offset=" <> show n 
<> " table=" <> show r <> " h=" <> show (stepAsciiRawValue h))
   where
-    !h = index ba n
+    !h = nextAscii ba n
 {-# INLINE next #-}
 
 nextSkip :: Indexable container Word8 => container -> Offset Word8 -> Offset 
Word8
-nextSkip ba n = n + 1 + Offset (getNbBytes (index ba n))
+nextSkip ba n = n + 1 + Offset (getNbBytes (nextAscii ba n))
 {-# INLINE nextSkip #-}
 
+-- | special case for only non ascii next'er function
+nextWith :: Indexable container Word8
+         => StepASCII
+         -> container
+         -> Offset8
+         -> Step
+nextWith h ba n =
+    case getNbBytes h of
+        1 -> Step (toChar2 h (index ba n)) (n + Offset 1)
+        2 -> Step (toChar3 h (index ba n) (index ba (n + Offset 1))) (n + 
Offset 2)
+        3 -> Step (toChar4 h (index ba n)
+                             (index ba (n + Offset 1))
+                             (index ba (n + Offset 2))) (n + Offset 3)
+        r -> error ("nextWith: internal error: invalid input: offset=" <> show 
n <> " table=" <> show r <> " h=" <> show (stepAsciiRawValue h))
+{-# INLINE nextWith #-}
+
 -- Given a non null offset, give the previous character and the offset of this 
character
 -- will fail bad if apply at the beginning of string or an empty string.
 prev :: Indexable container Word8 => container -> Offset Word8 -> StepBack
@@ -104,7 +123,12 @@
         | isContinuation (index ba o) = loop (o `offsetMinusE` sz1)
         | otherwise                       = o
 
-writeUTF8 :: (PrimMonad prim, RandomAccess container prim Word8) 
+writeASCII :: (PrimMonad prim, RandomAccess container prim Word8)
+           => container -> Offset8 -> Char7 -> prim ()
+writeASCII mba !i (Char7 c) = write mba i c
+{-# INLINE writeASCII #-}
+
+writeUTF8 :: (PrimMonad prim, RandomAccess container prim Word8)
           => container -> Offset8 -> Char -> prim Offset8
 writeUTF8 mba !i !c
     | bool# (ltWord# x 0x80##   ) = encode1
@@ -245,24 +269,24 @@
   where
     loop !d !s
         | s == end        = pure ()
-        | headerIsAscii h = primMbaWrite dst d h >> loop (d `offsetSub` 1) (s 
+ 1)
+        | headerIsAscii h = primMbaWrite dst d (stepAsciiRawValue h) >> loop 
(d `offsetSub` 1) (s + 1)
         | otherwise       = do
             case getNbBytes h of
                 1 -> do
-                    primMbaWrite dst (d `offsetSub` 1) h
+                    primMbaWrite dst (d `offsetSub` 1) (stepAsciiRawValue h)
                     primMbaWrite dst d                 (index src (s + 1))
                     loop (d `offsetSub` 2) (s + 2)
                 2 -> do
-                    primMbaWrite dst (d `offsetSub` 2) h
+                    primMbaWrite dst (d `offsetSub` 2) (stepAsciiRawValue h)
                     primMbaWrite dst (d `offsetSub` 1) (index src (s + 1))
                     primMbaWrite dst d                 (index src (s + 2))
                     loop (d `offsetSub` 3) (s + 3)
                 3 -> do
-                    primMbaWrite dst (d `offsetSub` 3) h
+                    primMbaWrite dst (d `offsetSub` 3) (stepAsciiRawValue h)
                     primMbaWrite dst (d `offsetSub` 2) (index src (s + 1))
                     primMbaWrite dst (d `offsetSub` 1) (index src (s + 2))
                     primMbaWrite dst d                 (index src (s + 3))
                     loop (d `offsetSub` 4) (s + 4)
                 _ -> error "impossible"
-      where h = index src s
+      where h = nextAscii src s
 {-# INLINE reverse #-}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Nat.hs 
new/basement-0.0.8/Basement/Nat.hs
--- old/basement-0.0.7/Basement/Nat.hs  2017-11-11 10:52:31.000000000 +0100
+++ new/basement-0.0.8/Basement/Nat.hs  2018-07-08 10:40:19.000000000 +0200
@@ -8,6 +8,9 @@
 {-# LANGUAGE ScopedTypeVariables       #-}
 {-# LANGUAGE UndecidableInstances      #-}
 {-# LANGUAGE ConstraintKinds           #-}
+#if __GLASGOW_HASKELL__ >= 806
+{-# LANGUAGE NoStarIsType              #-}
+#endif
 module Basement.Nat
     ( Nat
     , KnownNat
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Sized/Block.hs 
new/basement-0.0.8/Basement/Sized/Block.hs
--- old/basement-0.0.7/Basement/Sized/Block.hs  2018-02-12 15:24:12.000000000 
+0100
+++ new/basement-0.0.8/Basement/Sized/Block.hs  2018-07-08 10:40:19.000000000 
+0200
@@ -5,12 +5,16 @@
 --
 -- A Nat-sized version of Block
 {-# LANGUAGE AllowAmbiguousTypes        #-}
+{-# LANGUAGE CPP                        #-}
+{-# LANGUAGE ConstraintKinds            #-}
 {-# LANGUAGE DataKinds                  #-}
-{-# LANGUAGE TypeOperators              #-}
-{-# LANGUAGE TypeApplications           #-}
-{-# LANGUAGE ScopedTypeVariables        #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE ConstraintKinds            #-}
+{-# LANGUAGE ScopedTypeVariables        #-}
+{-# LANGUAGE TypeApplications           #-}
+{-# LANGUAGE TypeOperators              #-}
+#if __GLASGOW_HASKELL__ >= 806
+{-# LANGUAGE NoStarIsType               #-}
+#endif
 
 module Basement.Sized.Block
     ( BlockN
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/String.hs 
new/basement-0.0.8/Basement/String.hs
--- old/basement-0.0.7/Basement/String.hs       2018-02-23 20:34:34.000000000 
+0100
+++ new/basement-0.0.8/Basement/String.hs       2018-03-12 23:27:49.000000000 
+0100
@@ -127,6 +127,8 @@
 import           Basement.Alg.Class (Indexable)
 import qualified Basement.Alg.UTF8 as UTF8
 import qualified Basement.Alg.String as Alg
+import           Basement.Types.Char7 (Char7(..), c7Upper, c7Lower)
+import qualified Basement.Types.Char7 as Char7
 import           GHC.Prim
 import           GHC.ST
 import           GHC.Types
@@ -194,7 +196,7 @@
                 (pos, Just failure) -> return (pos, Just failure)
 
     one pos = do
-        h <- Vec.unsafeRead mba pos
+        h <- StepASCII <$> Vec.unsafeRead mba pos
         let nbConts = getNbBytes h
         if nbConts == 0xff
             then return (pos, Just InvalidHeader)
@@ -1321,8 +1323,8 @@
 {-# SPECIALIZE decimalDigitsPtr :: Word -> Ptr Word8 -> Offset Word8 -> Offset 
Word8 -> (# Word, Bool, Offset Word8 #) #-}
 
 -- | Convert a 'String' 'Char' by 'Char' using a case mapping function.
-caseConvert :: (Char -> CM) -> String -> String
-caseConvert op s@(String arr) = runST $ do
+caseConvert :: (Char7 -> Char7) -> (Char -> CM) -> String -> String
+caseConvert opASCII op s@(String arr) = runST $ do
   mba <- MBLK.new iLen
   nL <- C.onBackendPrim
         (\blk  -> go mba blk (Offset 0) start)
@@ -1342,11 +1344,14 @@
       where
         eSize !e = if e == '\0' then 0 else charToBytes (fromEnum e)
         loop !dst !allocLen !nLen !dstIdx !srcIdx
-          | srcIdx == end = return nLen
+          | srcIdx == end    = return nLen
           | nLen == allocLen = realloc
+          | headerIsAscii h  = do
+                UTF8.writeASCII dst dstIdx (opASCII $ Char7 $ 
stepAsciiRawValue h)
+                loop dst allocLen (nLen + 1) (dstIdx+Offset 1) (srcIdx+Offset 
1)
           | otherwise = do
               let !(CM c1 c2 c3) = op c
-                  !(Step c nextSrcIdx) = UTF8.next src srcIdx
+                  !(Step c nextSrcIdx) = UTF8.nextWith h src (srcIdx+Offset 1)
               nextDstIdx <- UTF8.writeUTF8 dst dstIdx c1
               if c2 == '\0' -- We keep the most common case loop as short as 
possible.
                 then loop dst allocLen (nLen + charToBytes (fromEnum c1)) 
nextDstIdx nextSrcIdx
@@ -1362,20 +1367,21 @@
               nDst <- MBLK.new nAll
               MBLK.unsafeCopyElements nDst 0 dst 0 nLen
               loop nDst nAll nLen dstIdx srcIdx
+            h = UTF8.nextAscii src srcIdx
 
 -- | Convert a 'String' to the upper-case equivalent.
 upper :: String -> String
-upper = caseConvert upperMapping
+upper = caseConvert c7Upper upperMapping
 
 -- | Convert a 'String' to the upper-case equivalent.
 lower :: String -> String
-lower = caseConvert lowerMapping
+lower = caseConvert c7Lower lowerMapping
 
 -- | Convert a 'String' to the unicode case fold equivalent.
 --
 -- Case folding is mostly used for caseless comparison of strings.
 caseFold :: String -> String
-caseFold = caseConvert foldMapping
+caseFold = caseConvert c7Upper foldMapping
 
 -- | Check whether the first string is a prefix of the second string.
 isPrefixOf :: String -> String -> Bool
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Char7.hs 
new/basement-0.0.8/Basement/Types/Char7.hs
--- old/basement-0.0.7/Basement/Types/Char7.hs  2017-08-26 15:34:20.000000000 
+0200
+++ new/basement-0.0.8/Basement/Types/Char7.hs  2018-03-12 23:27:49.000000000 
+0100
@@ -24,6 +24,9 @@
     , c7_7
     , c7_8
     , c7_9
+    -- * Upper / Lower With ASCII
+    , c7Upper
+    , c7Lower
     ) where
 
 import GHC.Prim
@@ -98,3 +101,15 @@
 c7_7 = Char7 0x37
 c7_8 = Char7 0x38
 c7_9 = Char7 0x39
+
+c7Lower :: Char7 -> Char7
+c7Lower c@(Char7 w)
+    | c <  c7_A = c
+    | c <= c7_Z = Char7 (w .|. 0x20)
+    | otherwise = c
+
+c7Upper :: Char7 -> Char7
+c7Upper c@(Char7 w)
+    | c <  c7_a = c
+    | c <= c7_z = Char7 (w .&. 0xdf)
+    | otherwise = c
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Types/CharUTF8.hs 
new/basement-0.0.8/Basement/Types/CharUTF8.hs
--- old/basement-0.0.7/Basement/Types/CharUTF8.hs       1970-01-01 
01:00:00.000000000 +0100
+++ new/basement-0.0.8/Basement/Types/CharUTF8.hs       2018-03-12 
23:27:49.000000000 +0100
@@ -0,0 +1,8 @@
+module Basement.Types.CharUTF8
+    ( CharUTF8(..)
+    , encodeCharUTF8
+    , decodeCharUTF8
+    ) where
+
+import Basement.UTF8.Types
+import Basement.UTF8.Helper
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Word128.hs 
new/basement-0.0.8/Basement/Types/Word128.hs
--- old/basement-0.0.7/Basement/Types/Word128.hs        2018-02-23 
20:15:54.000000000 +0100
+++ new/basement-0.0.8/Basement/Types/Word128.hs        2018-07-08 
10:40:19.000000000 +0200
@@ -144,7 +144,7 @@
 (-) :: Word128 -> Word128 -> Word128
 (-) a b
     | a >= b    = applyBiWordOnNatural (Prelude.-) a b
-    | otherwise = complement $ applyBiWordOnNatural (Prelude.-) b a
+    | otherwise = complement (applyBiWordOnNatural (Prelude.-) b a) + 1
 
 -- | Multiplication
 (*) :: Word128 -> Word128 -> Word128
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Word256.hs 
new/basement-0.0.8/Basement/Types/Word256.hs
--- old/basement-0.0.7/Basement/Types/Word256.hs        2018-02-23 
20:15:54.000000000 +0100
+++ new/basement-0.0.8/Basement/Types/Word256.hs        2018-07-08 
10:40:19.000000000 +0200
@@ -179,7 +179,7 @@
 (-) :: Word256 -> Word256 -> Word256
 (-) a b
     | a >= b    = applyBiWordOnNatural (Prelude.-) a b
-    | otherwise = complement $ applyBiWordOnNatural (Prelude.-) b a
+    | otherwise = complement (applyBiWordOnNatural (Prelude.-) b a) + 1
 
 -- | Multiplication
 (*) :: Word256 -> Word256 -> Word256
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Helper.hs 
new/basement-0.0.8/Basement/UTF8/Helper.hs
--- old/basement-0.0.7/Basement/UTF8/Helper.hs  2017-08-05 12:49:52.000000000 
+0200
+++ new/basement-0.0.8/Basement/UTF8/Helper.hs  2018-03-12 23:27:49.000000000 
+0100
@@ -19,6 +19,7 @@
 import           Basement.Compat.Base
 import           Basement.Compat.Primitive
 import           Basement.Types.OffsetSize
+import           Basement.UTF8.Types
 import           GHC.Prim
 import           GHC.Types
 import           GHC.Word
@@ -38,7 +39,7 @@
 maskHeader3# h = and# h 0xf##
 {-# INLINE maskHeader3# #-}
 
--- mask a UTF8 header for 3 bytes encoding (11110xxx and 3 valid bits)
+-- mask a UTF8 header for 4 bytes encoding (11110xxx and 3 valid bits)
 maskHeader4# :: Word# -> Word#
 maskHeader4# h = and# h 0x7##
 {-# INLINE maskHeader4# #-}
@@ -55,22 +56,22 @@
 toChar# w = C# (chr# (word2Int# w))
 {-# INLINE toChar# #-}
 
-toChar1 :: Word8 -> Char
-toChar1 (W8# w) = toChar# w
+toChar1 :: StepASCII -> Char
+toChar1 (StepASCII (W8# w)) = toChar# w
 
-toChar2 :: Word8 -> Word8 -> Char
-toChar2 (W8# w1) (W8# w2)=
+toChar2 :: StepASCII -> Word8 -> Char
+toChar2 (StepASCII (W8# w1)) (W8# w2) =
     toChar# (or# (uncheckedShiftL# (maskHeader2# w1) 6#) (maskContinuation# 
w2))
 
-toChar3 :: Word8 -> Word8 -> Word8 -> Char
-toChar3 (W8# w1) (W8# w2) (W8# w3) =
+toChar3 :: StepASCII -> Word8 -> Word8 -> Char
+toChar3 (StepASCII (W8# w1)) (W8# w2) (W8# w3) =
     toChar# (or3# (uncheckedShiftL# (maskHeader3# w1) 12#)
                   (uncheckedShiftL# (maskContinuation# w2) 6#)
                   (maskContinuation# w3)
             )
 
-toChar4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
-toChar4 (W8# w1) (W8# w2) (W8# w3) (W8# w4) =
+toChar4 :: StepASCII -> Word8 -> Word8 -> Word8 -> Char
+toChar4 (StepASCII (W8# w1)) (W8# w2) (W8# w3) (W8# w4) =
     toChar# (or4# (uncheckedShiftL# (maskHeader4# w1) 18#)
                   (uncheckedShiftL# (maskContinuation# w2) 12#)
                   (uncheckedShiftL# (maskContinuation# w3) 6#)
@@ -89,14 +90,13 @@
 -- note that we expect here a valid unicode code point in the *allowed* range.
 -- bits will be lost if going above 0x10ffff
 asUTF8Char :: Char -> UTF8Char
-asUTF8Char !c
+asUTF8Char !(C# c)
   | bool# (ltWord# x 0x80##   ) = encode1
   | bool# (ltWord# x 0x800##  ) = encode2
   | bool# (ltWord# x 0x10000##) = encode3
   | otherwise                   = encode4
     where
-      !(I# xi) = fromEnum c
-      !x       = int2Word# xi
+      !x = int2Word# (ord# c)
 
       encode1 = UTF8_1 (W8# x)
       encode2 =
@@ -135,8 +135,8 @@
     | otherwise = CountOf 4
 {-# INLINE skipNextHeaderValue #-}
 
-headerIsAscii :: Word8 -> Bool
-headerIsAscii x = x < 0x80
+headerIsAscii :: StepASCII -> Bool
+headerIsAscii (StepASCII x) = x < 0x80
 
 charToBytes :: Int -> CountOf Word8
 charToBytes c
@@ -145,3 +145,88 @@
     | c < 0x10000  = CountOf 3
     | c < 0x110000 = CountOf 4
     | otherwise    = error ("invalid code point: " `mappend` show c)
+
+-- | Encode a Char into a CharUTF8
+encodeCharUTF8 :: Char -> CharUTF8
+encodeCharUTF8 !(C# c)
+    | bool# (ltWord# x 0x80##   ) = CharUTF8 (W32# x)
+    | bool# (ltWord# x 0x800##  ) = CharUTF8 encode2
+    | bool# (ltWord# x 0x10000##) = CharUTF8 encode3
+    | otherwise                   = CharUTF8 encode4
+  where
+    !x = int2Word# (ord# c)
+
+    -- clearing mask, clearing all the bits that need to be clear as per the 
UTF8 encoding
+    mask2 = 0x0000bfdf## -- 1 continuation , 5 bits header
+    mask3 = 0x00bfbfef## -- 2 continuations, 4 bits header
+    mask4 = 0xbfbfbff7## -- 3 continuations, 3 bits header
+
+    -- setting mask, settings all the bits that need to be set per the UTF8 
encoding
+    set2  = 0x000080c0## -- 10xxxxxx     110xxxxx
+    set3  = 0x008080e0## -- 10xxxxxx * 2 1110xxxx
+    set4  = 0x808080f0## -- 10xxxxxx * 3 11111xxx
+
+    encode2 = W32# (and# mask2 (or3# set2
+                                     (uncheckedShiftRL# x 6#) -- 5 bits to 1st 
byte
+                                     (uncheckedShiftL# x 8# ) -- move lowest 
bits to the 2nd byte
+                               ))
+    encode3 = W32# (and# mask3 (or4# set3
+                                     (uncheckedShiftRL# x 12#) -- 4 bits to 
1st byte
+                                     (and# 0x3f00## (uncheckedShiftL# x 2#)) 
-- 6 bits to the 2nd byte
+                                     (uncheckedShiftL# x 16# ) -- move lowest 
bits to the 3rd byte
+                               ))
+    encode4 = W32# (and# mask4 (or4# set4
+                                     (uncheckedShiftRL# x 18#) -- 3 bits to 
1st byte
+                                     (or# (and# 0x3f00## (uncheckedShiftRL# x 
4#))   -- 6 bits to the 2nd byte
+                                          (and# 0x3f0000## (uncheckedShiftL# x 
10#)) -- 6 bits to the 3nd byte
+                                     )
+                                     (uncheckedShiftL# x 24# ) -- move lowest 
bits to the 4rd byte
+                               ))
+
+-- | decode a CharUTF8 into a Char
+--
+-- If the value inside a CharUTF8 is not properly encoded, this will result in 
violation
+-- of the Char invariants
+decodeCharUTF8 :: CharUTF8 -> Char
+decodeCharUTF8 c@(CharUTF8 !(W32# w))
+    | isCharUTF8Case1 c = toChar# w
+    | isCharUTF8Case2 c = encode2
+    | isCharUTF8Case3 c = encode3
+    | otherwise         = encode4
+  where
+    encode2 =
+        toChar# (or# (uncheckedShiftL# (maskHeader2# w) 6#)
+                     (maskContinuation# (uncheckedShiftRL# w 8#))
+                )
+    encode3 =
+        toChar# (or3# (uncheckedShiftL# (maskHeader3# w) 12#)
+                      (uncheckedShiftRL# (and# 0x3f00## w) 8#)
+                      (maskContinuation# (uncheckedShiftRL# w 16#))
+                )
+    encode4 =
+        toChar# (or4# (uncheckedShiftL#  (maskHeader4# w) 18#)
+                      (uncheckedShiftRL# (and# 0x3f00## w) 10#)
+                      (uncheckedShiftL#  (and# 0x3f0000## w) 4#)
+                      (maskContinuation# (uncheckedShiftRL# w 24#))
+                )
+
+    -- clearing mask, removing all UTF8 metadata and keeping only signal 
(content)
+    --maskContent2 = 0x00003f1f## -- 1 continuation , 5 bits header
+    --maskContent3 = 0x003f3f0f## -- 2 continuations, 4 bits header
+    --maskContent4 = 0x3f3f3f07## -- 3 continuations, 3 bits header
+
+isCharUTF8Case1 :: CharUTF8 -> Bool
+isCharUTF8Case1 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x80##) 0##)
+{-# INLINE isCharUTF8Case1 #-}
+
+isCharUTF8Case2 :: CharUTF8 -> Bool
+isCharUTF8Case2 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x20##) 0##)
+{-# INLINE isCharUTF8Case2 #-}
+
+isCharUTF8Case3 :: CharUTF8 -> Bool
+isCharUTF8Case3 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x10##) 0##)
+{-# INLINE isCharUTF8Case3 #-}
+
+isCharUTF8Case4 :: CharUTF8 -> Bool
+isCharUTF8Case4 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x08##) 0##)
+{-# INLINE isCharUTF8Case4 #-}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Table.hs 
new/basement-0.0.8/Basement/UTF8/Table.hs
--- old/basement-0.0.7/Basement/UTF8/Table.hs   2017-11-01 05:51:18.000000000 
+0100
+++ new/basement-0.0.8/Basement/UTF8/Table.hs   2018-03-12 23:27:49.000000000 
+0100
@@ -21,6 +21,7 @@
 import           GHC.Word
 import           Basement.Compat.Base
 import           Basement.Compat.Primitive
+import           Basement.UTF8.Types (StepASCII(..))
 
 -- | Check if the byte is a continuation byte
 isContinuation :: Word8 -> Bool
@@ -52,8 +53,8 @@
 data NbBytesCont_ = NbBytesCont0_ | NbBytesCont1_ | NbBytesCont2_ | 
NbBytesCont3_
 
 -- | Get the number of following bytes given the first byte of a UTF8 sequence.
-getNbBytes :: Word8 -> Int
-getNbBytes (W8# w) = I# (getNbBytes# w)
+getNbBytes :: StepASCII -> Int
+getNbBytes (StepASCII (W8# w)) = I# (getNbBytes# w)
 {-# INLINE getNbBytes #-}
 
 -- | Check if the byte is a continuation byte
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Types.hs 
new/basement-0.0.8/Basement/UTF8/Types.hs
--- old/basement-0.0.7/Basement/UTF8/Types.hs   2018-01-14 13:55:26.000000000 
+0100
+++ new/basement-0.0.8/Basement/UTF8/Types.hs   2018-03-12 23:27:49.000000000 
+0100
@@ -9,6 +9,8 @@
     , isValidStepDigit
     -- * Unicode Errors
     , ValidationFailure(..)
+    -- * UTF8 Encoded 'Char'
+    , CharUTF8(..)
     -- * Case Conversion
     , CM (..)
     ) where
@@ -34,11 +36,22 @@
 newtype StepDigit = StepDigit Word8
 
 -- | Step when processing ASCII character
-newtype StepASCII = StepASCII Word8
+newtype StepASCII = StepASCII { stepAsciiRawValue :: Word8 }
 
 -- | Specialized tuple used for case mapping.
 data CM = CM {-# UNPACK #-} !Char {-# UNPACK #-} !Char {-# UNPACK #-} !Char 
deriving (Eq)
 
+-- | Represent an already encoded UTF8 Char where the the lowest 8 bits is the 
start of the
+-- sequence. If this contains a multi bytes sequence then each higher 8 bits 
are filled with
+-- the remaining sequence 8 bits per 8 bits.
+--
+-- For example:
+-- 'A' => U+0041  => 41          => 0x00000041
+-- '€  => U+20AC  => E2 82 AC    => 0x00AC82E2
+-- '𐍈' => U+10348 => F0 90 8D 88 => 0x888D90F0
+--
+newtype CharUTF8 = CharUTF8 Word32
+
 isValidStepASCII :: StepASCII -> Bool
 isValidStepASCII (StepASCII w) = w < 0x80
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/basement-0.0.7/basement.cabal 
new/basement-0.0.8/basement.cabal
--- old/basement-0.0.7/basement.cabal   2018-02-23 20:55:50.000000000 +0100
+++ new/basement-0.0.8/basement.cabal   2018-07-08 11:08:56.000000000 +0200
@@ -1,25 +1,23 @@
 name:                basement
-version:             0.0.7
+version:             0.0.8
 synopsis:            Foundation scrap box of array & string
 description:         Foundation most basic primitives without any dependencies
-homepage:            https://github.com/haskell-foundation/foundation#readme
 license:             BSD3
 license-file:        LICENSE
 copyright:           2015-2017 Vincent Hanquez <vinc...@snarc.org>
-                     2017-2018 Foundation Maintainers
+                   , 2017-2018 Foundation Maintainers
 maintainer:          vinc...@snarc.org
-copyright:           Vincent Hanquez
 category:            Web
 build-type:          Simple
-homepage:            https://github.com/haskell-foundation/foundation
+homepage:            https://github.com/haskell-foundation/foundation#readme
 bug-reports:         https://github.com/haskell-foundation/foundation/issues
 cabal-version:       >=1.10
-tested-with:         GHC==8.2.1, GHC==8.0.2, GHC==7.10.3
 extra-source-files:  cbits/*.h
 
 source-repository head
   type: git
-  location: https://github.com/haskell-foundation/foundation.git
+  location: https://github.com/haskell-foundation/foundation
+  subdir: basement
 
 library
   hs-source-dirs:    .
@@ -37,6 +35,7 @@
                      Basement.From
 
                      Basement.Types.Char7
+                     Basement.Types.CharUTF8
                      Basement.Types.OffsetSize
                      Basement.Types.Ptr
                      Basement.Types.AsciiString
@@ -59,6 +58,13 @@
                      Basement.String.Builder
                      Basement.NonEmpty
 
+                     -- Extended Types with explicit type level size
+                     Basement.Sized.Block
+                     Basement.Sized.UVect
+                     Basement.Sized.Vect
+                     Basement.Sized.List
+                     Basement.BlockN
+
                      -- Utils
                      Basement.NormalForm
                      Basement.These
@@ -95,15 +101,8 @@
                      Basement.Compat.Natural
                      Basement.Compat.NumLiteral
                      Basement.Compat.Typeable
-  if impl(ghc >= 8.0)
-    exposed-modules: Basement.BlockN
-                   , Basement.Sized.Block
-                   , Basement.Sized.UVect
-                   , Basement.Sized.Vect
-                   , Basement.Bits
-  if impl(ghc >= 7.10)
-    exposed-modules:
-                     Basement.Sized.List
+
+                     Basement.Bits
 
   other-modules:
                      Basement.Error
@@ -137,12 +136,14 @@
 
                      Basement.Terminal.Size
 
-
-  build-depends:       base >= 4.7 && < 5
+  -- support and dependencies
+  if impl(ghc < 8.0)
+    buildable: False
+  else
+    build-depends:     base
                      , ghc-prim
-
-  if os(windows)
-    build-depends:     Win32
+    if os(windows)
+      build-depends:   Win32
 
   default-language:    Haskell2010
   default-extensions: NoImplicitPrelude

commit ghc-basement for openSUSE:Factory

Reply via email to