Hello community, here is the log from the commit of package ghc-basement for openSUSE:Factory checked in at 2018-07-24 17:13:31 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-basement (Old) and /work/SRC/openSUSE:Factory/.ghc-basement.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-basement" Tue Jul 24 17:13:31 2018 rev:2 rq:623726 version:0.0.8 Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-basement/ghc-basement.changes 2018-05-30 13:10:20.874385774 +0200 +++ /work/SRC/openSUSE:Factory/.ghc-basement.new/ghc-basement.changes 2018-07-24 17:13:34.218618469 +0200 @@ -1,0 +2,6 @@ +Fri Jul 13 14:31:46 UTC 2018 - psim...@suse.com + +- Update basement to version 0.0.8. + Upstream does not provide a change log file. + +------------------------------------------------------------------- Old: ---- basement-0.0.7.tar.gz New: ---- basement-0.0.8.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-basement.spec ++++++ --- /var/tmp/diff_new_pack.1Y6fTA/_old 2018-07-24 17:13:35.022619513 +0200 +++ /var/tmp/diff_new_pack.1Y6fTA/_new 2018-07-24 17:13:35.022619513 +0200 @@ -18,7 +18,7 @@ %global pkg_name basement Name: ghc-%{pkg_name} -Version: 0.0.7 +Version: 0.0.8 Release: 0 Summary: Foundation scrap box of array & string License: BSD-3-Clause ++++++ basement-0.0.7.tar.gz -> basement-0.0.8.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Alg/String.hs new/basement-0.0.8/Basement/Alg/String.hs --- old/basement-0.0.7/Basement/Alg/String.hs 2018-02-04 11:47:59.000000000 +0100 +++ new/basement-0.0.8/Basement/Alg/String.hs 2018-03-12 23:27:49.000000000 +0100 @@ -39,9 +39,9 @@ loop !d !s | s == end = pure (offsetAsSize d) | otherwise = - let !h = index src s + let !h = nextAscii src s in case headerIsAscii h of - True | predicate (toChar1 h) -> primMbaWrite dst d h >> loop (d + Offset 1) (s + Offset 1) + True | predicate (toChar1 h) -> primMbaWrite dst d (stepAsciiRawValue h) >> loop (d + Offset 1) (s + Offset 1) | otherwise -> loop d (s + Offset 1) False -> case next src s of @@ -58,10 +58,10 @@ where loop4 !ofs | ofs4 < end = - let h1 = index ba ofs - h2 = index ba (ofs+1) - h3 = index ba (ofs+2) - h4 = index ba (ofs+3) + let h1 = nextAscii ba ofs + h2 = nextAscii ba (ofs+1) + h3 = nextAscii ba (ofs+2) + h4 = nextAscii ba (ofs+3) in if headerIsAscii h1 && headerIsAscii h2 && headerIsAscii h3 && headerIsAscii h4 then loop4 ofs4 else loop ofs @@ -73,7 +73,7 @@ | headerIsAscii h = loop (ofs + Offset 1) | otherwise = multi (CountOf $ getNbBytes h) ofs where - h = index ba ofs + h = nextAscii ba ofs multi (CountOf 0xff) pos = (pos, Just InvalidHeader) multi nbConts pos diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Alg/UTF8.hs new/basement-0.0.8/Basement/Alg/UTF8.hs --- old/basement-0.0.7/Basement/Alg/UTF8.hs 2018-02-04 12:56:52.000000000 +0100 +++ new/basement-0.0.8/Basement/Alg/UTF8.hs 2018-03-12 23:27:49.000000000 +0100 @@ -9,8 +9,10 @@ , expectAscii , next , nextSkip + , nextWith , prev , prevSkip + , writeASCII , writeUTF8 , toList , all @@ -32,6 +34,7 @@ import Basement.Numerical.Additive import Basement.Numerical.Subtractive import Basement.Types.OffsetSize +import Basement.Types.Char7 (Char7(..)) import Basement.PrimType import Basement.UTF8.Helper import Basement.UTF8.Table @@ -62,15 +65,31 @@ 3 -> Step (toChar4 h (index ba (n + Offset 1)) (index ba (n + Offset 2)) (index ba (n + Offset 3))) (n + Offset 4) - r -> error ("next: internal error: invalid input: offset=" <> show n <> " table=" <> show r <> " h=" <> show h) + r -> error ("next: internal error: invalid input: offset=" <> show n <> " table=" <> show r <> " h=" <> show (stepAsciiRawValue h)) where - !h = index ba n + !h = nextAscii ba n {-# INLINE next #-} nextSkip :: Indexable container Word8 => container -> Offset Word8 -> Offset Word8 -nextSkip ba n = n + 1 + Offset (getNbBytes (index ba n)) +nextSkip ba n = n + 1 + Offset (getNbBytes (nextAscii ba n)) {-# INLINE nextSkip #-} +-- | special case for only non ascii next'er function +nextWith :: Indexable container Word8 + => StepASCII + -> container + -> Offset8 + -> Step +nextWith h ba n = + case getNbBytes h of + 1 -> Step (toChar2 h (index ba n)) (n + Offset 1) + 2 -> Step (toChar3 h (index ba n) (index ba (n + Offset 1))) (n + Offset 2) + 3 -> Step (toChar4 h (index ba n) + (index ba (n + Offset 1)) + (index ba (n + Offset 2))) (n + Offset 3) + r -> error ("nextWith: internal error: invalid input: offset=" <> show n <> " table=" <> show r <> " h=" <> show (stepAsciiRawValue h)) +{-# INLINE nextWith #-} + -- Given a non null offset, give the previous character and the offset of this character -- will fail bad if apply at the beginning of string or an empty string. prev :: Indexable container Word8 => container -> Offset Word8 -> StepBack @@ -104,7 +123,12 @@ | isContinuation (index ba o) = loop (o `offsetMinusE` sz1) | otherwise = o -writeUTF8 :: (PrimMonad prim, RandomAccess container prim Word8) +writeASCII :: (PrimMonad prim, RandomAccess container prim Word8) + => container -> Offset8 -> Char7 -> prim () +writeASCII mba !i (Char7 c) = write mba i c +{-# INLINE writeASCII #-} + +writeUTF8 :: (PrimMonad prim, RandomAccess container prim Word8) => container -> Offset8 -> Char -> prim Offset8 writeUTF8 mba !i !c | bool# (ltWord# x 0x80## ) = encode1 @@ -245,24 +269,24 @@ where loop !d !s | s == end = pure () - | headerIsAscii h = primMbaWrite dst d h >> loop (d `offsetSub` 1) (s + 1) + | headerIsAscii h = primMbaWrite dst d (stepAsciiRawValue h) >> loop (d `offsetSub` 1) (s + 1) | otherwise = do case getNbBytes h of 1 -> do - primMbaWrite dst (d `offsetSub` 1) h + primMbaWrite dst (d `offsetSub` 1) (stepAsciiRawValue h) primMbaWrite dst d (index src (s + 1)) loop (d `offsetSub` 2) (s + 2) 2 -> do - primMbaWrite dst (d `offsetSub` 2) h + primMbaWrite dst (d `offsetSub` 2) (stepAsciiRawValue h) primMbaWrite dst (d `offsetSub` 1) (index src (s + 1)) primMbaWrite dst d (index src (s + 2)) loop (d `offsetSub` 3) (s + 3) 3 -> do - primMbaWrite dst (d `offsetSub` 3) h + primMbaWrite dst (d `offsetSub` 3) (stepAsciiRawValue h) primMbaWrite dst (d `offsetSub` 2) (index src (s + 1)) primMbaWrite dst (d `offsetSub` 1) (index src (s + 2)) primMbaWrite dst d (index src (s + 3)) loop (d `offsetSub` 4) (s + 4) _ -> error "impossible" - where h = index src s + where h = nextAscii src s {-# INLINE reverse #-} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Nat.hs new/basement-0.0.8/Basement/Nat.hs --- old/basement-0.0.7/Basement/Nat.hs 2017-11-11 10:52:31.000000000 +0100 +++ new/basement-0.0.8/Basement/Nat.hs 2018-07-08 10:40:19.000000000 +0200 @@ -8,6 +8,9 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE ConstraintKinds #-} +#if __GLASGOW_HASKELL__ >= 806 +{-# LANGUAGE NoStarIsType #-} +#endif module Basement.Nat ( Nat , KnownNat diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Sized/Block.hs new/basement-0.0.8/Basement/Sized/Block.hs --- old/basement-0.0.7/Basement/Sized/Block.hs 2018-02-12 15:24:12.000000000 +0100 +++ new/basement-0.0.8/Basement/Sized/Block.hs 2018-07-08 10:40:19.000000000 +0200 @@ -5,12 +5,16 @@ -- -- A Nat-sized version of Block {-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE DataKinds #-} -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} +#if __GLASGOW_HASKELL__ >= 806 +{-# LANGUAGE NoStarIsType #-} +#endif module Basement.Sized.Block ( BlockN diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/String.hs new/basement-0.0.8/Basement/String.hs --- old/basement-0.0.7/Basement/String.hs 2018-02-23 20:34:34.000000000 +0100 +++ new/basement-0.0.8/Basement/String.hs 2018-03-12 23:27:49.000000000 +0100 @@ -127,6 +127,8 @@ import Basement.Alg.Class (Indexable) import qualified Basement.Alg.UTF8 as UTF8 import qualified Basement.Alg.String as Alg +import Basement.Types.Char7 (Char7(..), c7Upper, c7Lower) +import qualified Basement.Types.Char7 as Char7 import GHC.Prim import GHC.ST import GHC.Types @@ -194,7 +196,7 @@ (pos, Just failure) -> return (pos, Just failure) one pos = do - h <- Vec.unsafeRead mba pos + h <- StepASCII <$> Vec.unsafeRead mba pos let nbConts = getNbBytes h if nbConts == 0xff then return (pos, Just InvalidHeader) @@ -1321,8 +1323,8 @@ {-# SPECIALIZE decimalDigitsPtr :: Word -> Ptr Word8 -> Offset Word8 -> Offset Word8 -> (# Word, Bool, Offset Word8 #) #-} -- | Convert a 'String' 'Char' by 'Char' using a case mapping function. -caseConvert :: (Char -> CM) -> String -> String -caseConvert op s@(String arr) = runST $ do +caseConvert :: (Char7 -> Char7) -> (Char -> CM) -> String -> String +caseConvert opASCII op s@(String arr) = runST $ do mba <- MBLK.new iLen nL <- C.onBackendPrim (\blk -> go mba blk (Offset 0) start) @@ -1342,11 +1344,14 @@ where eSize !e = if e == '\0' then 0 else charToBytes (fromEnum e) loop !dst !allocLen !nLen !dstIdx !srcIdx - | srcIdx == end = return nLen + | srcIdx == end = return nLen | nLen == allocLen = realloc + | headerIsAscii h = do + UTF8.writeASCII dst dstIdx (opASCII $ Char7 $ stepAsciiRawValue h) + loop dst allocLen (nLen + 1) (dstIdx+Offset 1) (srcIdx+Offset 1) | otherwise = do let !(CM c1 c2 c3) = op c - !(Step c nextSrcIdx) = UTF8.next src srcIdx + !(Step c nextSrcIdx) = UTF8.nextWith h src (srcIdx+Offset 1) nextDstIdx <- UTF8.writeUTF8 dst dstIdx c1 if c2 == '\0' -- We keep the most common case loop as short as possible. then loop dst allocLen (nLen + charToBytes (fromEnum c1)) nextDstIdx nextSrcIdx @@ -1362,20 +1367,21 @@ nDst <- MBLK.new nAll MBLK.unsafeCopyElements nDst 0 dst 0 nLen loop nDst nAll nLen dstIdx srcIdx + h = UTF8.nextAscii src srcIdx -- | Convert a 'String' to the upper-case equivalent. upper :: String -> String -upper = caseConvert upperMapping +upper = caseConvert c7Upper upperMapping -- | Convert a 'String' to the upper-case equivalent. lower :: String -> String -lower = caseConvert lowerMapping +lower = caseConvert c7Lower lowerMapping -- | Convert a 'String' to the unicode case fold equivalent. -- -- Case folding is mostly used for caseless comparison of strings. caseFold :: String -> String -caseFold = caseConvert foldMapping +caseFold = caseConvert c7Upper foldMapping -- | Check whether the first string is a prefix of the second string. isPrefixOf :: String -> String -> Bool diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Char7.hs new/basement-0.0.8/Basement/Types/Char7.hs --- old/basement-0.0.7/Basement/Types/Char7.hs 2017-08-26 15:34:20.000000000 +0200 +++ new/basement-0.0.8/Basement/Types/Char7.hs 2018-03-12 23:27:49.000000000 +0100 @@ -24,6 +24,9 @@ , c7_7 , c7_8 , c7_9 + -- * Upper / Lower With ASCII + , c7Upper + , c7Lower ) where import GHC.Prim @@ -98,3 +101,15 @@ c7_7 = Char7 0x37 c7_8 = Char7 0x38 c7_9 = Char7 0x39 + +c7Lower :: Char7 -> Char7 +c7Lower c@(Char7 w) + | c < c7_A = c + | c <= c7_Z = Char7 (w .|. 0x20) + | otherwise = c + +c7Upper :: Char7 -> Char7 +c7Upper c@(Char7 w) + | c < c7_a = c + | c <= c7_z = Char7 (w .&. 0xdf) + | otherwise = c diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Types/CharUTF8.hs new/basement-0.0.8/Basement/Types/CharUTF8.hs --- old/basement-0.0.7/Basement/Types/CharUTF8.hs 1970-01-01 01:00:00.000000000 +0100 +++ new/basement-0.0.8/Basement/Types/CharUTF8.hs 2018-03-12 23:27:49.000000000 +0100 @@ -0,0 +1,8 @@ +module Basement.Types.CharUTF8 + ( CharUTF8(..) + , encodeCharUTF8 + , decodeCharUTF8 + ) where + +import Basement.UTF8.Types +import Basement.UTF8.Helper diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Word128.hs new/basement-0.0.8/Basement/Types/Word128.hs --- old/basement-0.0.7/Basement/Types/Word128.hs 2018-02-23 20:15:54.000000000 +0100 +++ new/basement-0.0.8/Basement/Types/Word128.hs 2018-07-08 10:40:19.000000000 +0200 @@ -144,7 +144,7 @@ (-) :: Word128 -> Word128 -> Word128 (-) a b | a >= b = applyBiWordOnNatural (Prelude.-) a b - | otherwise = complement $ applyBiWordOnNatural (Prelude.-) b a + | otherwise = complement (applyBiWordOnNatural (Prelude.-) b a) + 1 -- | Multiplication (*) :: Word128 -> Word128 -> Word128 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/Types/Word256.hs new/basement-0.0.8/Basement/Types/Word256.hs --- old/basement-0.0.7/Basement/Types/Word256.hs 2018-02-23 20:15:54.000000000 +0100 +++ new/basement-0.0.8/Basement/Types/Word256.hs 2018-07-08 10:40:19.000000000 +0200 @@ -179,7 +179,7 @@ (-) :: Word256 -> Word256 -> Word256 (-) a b | a >= b = applyBiWordOnNatural (Prelude.-) a b - | otherwise = complement $ applyBiWordOnNatural (Prelude.-) b a + | otherwise = complement (applyBiWordOnNatural (Prelude.-) b a) + 1 -- | Multiplication (*) :: Word256 -> Word256 -> Word256 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Helper.hs new/basement-0.0.8/Basement/UTF8/Helper.hs --- old/basement-0.0.7/Basement/UTF8/Helper.hs 2017-08-05 12:49:52.000000000 +0200 +++ new/basement-0.0.8/Basement/UTF8/Helper.hs 2018-03-12 23:27:49.000000000 +0100 @@ -19,6 +19,7 @@ import Basement.Compat.Base import Basement.Compat.Primitive import Basement.Types.OffsetSize +import Basement.UTF8.Types import GHC.Prim import GHC.Types import GHC.Word @@ -38,7 +39,7 @@ maskHeader3# h = and# h 0xf## {-# INLINE maskHeader3# #-} --- mask a UTF8 header for 3 bytes encoding (11110xxx and 3 valid bits) +-- mask a UTF8 header for 4 bytes encoding (11110xxx and 3 valid bits) maskHeader4# :: Word# -> Word# maskHeader4# h = and# h 0x7## {-# INLINE maskHeader4# #-} @@ -55,22 +56,22 @@ toChar# w = C# (chr# (word2Int# w)) {-# INLINE toChar# #-} -toChar1 :: Word8 -> Char -toChar1 (W8# w) = toChar# w +toChar1 :: StepASCII -> Char +toChar1 (StepASCII (W8# w)) = toChar# w -toChar2 :: Word8 -> Word8 -> Char -toChar2 (W8# w1) (W8# w2)= +toChar2 :: StepASCII -> Word8 -> Char +toChar2 (StepASCII (W8# w1)) (W8# w2) = toChar# (or# (uncheckedShiftL# (maskHeader2# w1) 6#) (maskContinuation# w2)) -toChar3 :: Word8 -> Word8 -> Word8 -> Char -toChar3 (W8# w1) (W8# w2) (W8# w3) = +toChar3 :: StepASCII -> Word8 -> Word8 -> Char +toChar3 (StepASCII (W8# w1)) (W8# w2) (W8# w3) = toChar# (or3# (uncheckedShiftL# (maskHeader3# w1) 12#) (uncheckedShiftL# (maskContinuation# w2) 6#) (maskContinuation# w3) ) -toChar4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char -toChar4 (W8# w1) (W8# w2) (W8# w3) (W8# w4) = +toChar4 :: StepASCII -> Word8 -> Word8 -> Word8 -> Char +toChar4 (StepASCII (W8# w1)) (W8# w2) (W8# w3) (W8# w4) = toChar# (or4# (uncheckedShiftL# (maskHeader4# w1) 18#) (uncheckedShiftL# (maskContinuation# w2) 12#) (uncheckedShiftL# (maskContinuation# w3) 6#) @@ -89,14 +90,13 @@ -- note that we expect here a valid unicode code point in the *allowed* range. -- bits will be lost if going above 0x10ffff asUTF8Char :: Char -> UTF8Char -asUTF8Char !c +asUTF8Char !(C# c) | bool# (ltWord# x 0x80## ) = encode1 | bool# (ltWord# x 0x800## ) = encode2 | bool# (ltWord# x 0x10000##) = encode3 | otherwise = encode4 where - !(I# xi) = fromEnum c - !x = int2Word# xi + !x = int2Word# (ord# c) encode1 = UTF8_1 (W8# x) encode2 = @@ -135,8 +135,8 @@ | otherwise = CountOf 4 {-# INLINE skipNextHeaderValue #-} -headerIsAscii :: Word8 -> Bool -headerIsAscii x = x < 0x80 +headerIsAscii :: StepASCII -> Bool +headerIsAscii (StepASCII x) = x < 0x80 charToBytes :: Int -> CountOf Word8 charToBytes c @@ -145,3 +145,88 @@ | c < 0x10000 = CountOf 3 | c < 0x110000 = CountOf 4 | otherwise = error ("invalid code point: " `mappend` show c) + +-- | Encode a Char into a CharUTF8 +encodeCharUTF8 :: Char -> CharUTF8 +encodeCharUTF8 !(C# c) + | bool# (ltWord# x 0x80## ) = CharUTF8 (W32# x) + | bool# (ltWord# x 0x800## ) = CharUTF8 encode2 + | bool# (ltWord# x 0x10000##) = CharUTF8 encode3 + | otherwise = CharUTF8 encode4 + where + !x = int2Word# (ord# c) + + -- clearing mask, clearing all the bits that need to be clear as per the UTF8 encoding + mask2 = 0x0000bfdf## -- 1 continuation , 5 bits header + mask3 = 0x00bfbfef## -- 2 continuations, 4 bits header + mask4 = 0xbfbfbff7## -- 3 continuations, 3 bits header + + -- setting mask, settings all the bits that need to be set per the UTF8 encoding + set2 = 0x000080c0## -- 10xxxxxx 110xxxxx + set3 = 0x008080e0## -- 10xxxxxx * 2 1110xxxx + set4 = 0x808080f0## -- 10xxxxxx * 3 11111xxx + + encode2 = W32# (and# mask2 (or3# set2 + (uncheckedShiftRL# x 6#) -- 5 bits to 1st byte + (uncheckedShiftL# x 8# ) -- move lowest bits to the 2nd byte + )) + encode3 = W32# (and# mask3 (or4# set3 + (uncheckedShiftRL# x 12#) -- 4 bits to 1st byte + (and# 0x3f00## (uncheckedShiftL# x 2#)) -- 6 bits to the 2nd byte + (uncheckedShiftL# x 16# ) -- move lowest bits to the 3rd byte + )) + encode4 = W32# (and# mask4 (or4# set4 + (uncheckedShiftRL# x 18#) -- 3 bits to 1st byte + (or# (and# 0x3f00## (uncheckedShiftRL# x 4#)) -- 6 bits to the 2nd byte + (and# 0x3f0000## (uncheckedShiftL# x 10#)) -- 6 bits to the 3nd byte + ) + (uncheckedShiftL# x 24# ) -- move lowest bits to the 4rd byte + )) + +-- | decode a CharUTF8 into a Char +-- +-- If the value inside a CharUTF8 is not properly encoded, this will result in violation +-- of the Char invariants +decodeCharUTF8 :: CharUTF8 -> Char +decodeCharUTF8 c@(CharUTF8 !(W32# w)) + | isCharUTF8Case1 c = toChar# w + | isCharUTF8Case2 c = encode2 + | isCharUTF8Case3 c = encode3 + | otherwise = encode4 + where + encode2 = + toChar# (or# (uncheckedShiftL# (maskHeader2# w) 6#) + (maskContinuation# (uncheckedShiftRL# w 8#)) + ) + encode3 = + toChar# (or3# (uncheckedShiftL# (maskHeader3# w) 12#) + (uncheckedShiftRL# (and# 0x3f00## w) 8#) + (maskContinuation# (uncheckedShiftRL# w 16#)) + ) + encode4 = + toChar# (or4# (uncheckedShiftL# (maskHeader4# w) 18#) + (uncheckedShiftRL# (and# 0x3f00## w) 10#) + (uncheckedShiftL# (and# 0x3f0000## w) 4#) + (maskContinuation# (uncheckedShiftRL# w 24#)) + ) + + -- clearing mask, removing all UTF8 metadata and keeping only signal (content) + --maskContent2 = 0x00003f1f## -- 1 continuation , 5 bits header + --maskContent3 = 0x003f3f0f## -- 2 continuations, 4 bits header + --maskContent4 = 0x3f3f3f07## -- 3 continuations, 3 bits header + +isCharUTF8Case1 :: CharUTF8 -> Bool +isCharUTF8Case1 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x80##) 0##) +{-# INLINE isCharUTF8Case1 #-} + +isCharUTF8Case2 :: CharUTF8 -> Bool +isCharUTF8Case2 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x20##) 0##) +{-# INLINE isCharUTF8Case2 #-} + +isCharUTF8Case3 :: CharUTF8 -> Bool +isCharUTF8Case3 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x10##) 0##) +{-# INLINE isCharUTF8Case3 #-} + +isCharUTF8Case4 :: CharUTF8 -> Bool +isCharUTF8Case4 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x08##) 0##) +{-# INLINE isCharUTF8Case4 #-} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Table.hs new/basement-0.0.8/Basement/UTF8/Table.hs --- old/basement-0.0.7/Basement/UTF8/Table.hs 2017-11-01 05:51:18.000000000 +0100 +++ new/basement-0.0.8/Basement/UTF8/Table.hs 2018-03-12 23:27:49.000000000 +0100 @@ -21,6 +21,7 @@ import GHC.Word import Basement.Compat.Base import Basement.Compat.Primitive +import Basement.UTF8.Types (StepASCII(..)) -- | Check if the byte is a continuation byte isContinuation :: Word8 -> Bool @@ -52,8 +53,8 @@ data NbBytesCont_ = NbBytesCont0_ | NbBytesCont1_ | NbBytesCont2_ | NbBytesCont3_ -- | Get the number of following bytes given the first byte of a UTF8 sequence. -getNbBytes :: Word8 -> Int -getNbBytes (W8# w) = I# (getNbBytes# w) +getNbBytes :: StepASCII -> Int +getNbBytes (StepASCII (W8# w)) = I# (getNbBytes# w) {-# INLINE getNbBytes #-} -- | Check if the byte is a continuation byte diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/Basement/UTF8/Types.hs new/basement-0.0.8/Basement/UTF8/Types.hs --- old/basement-0.0.7/Basement/UTF8/Types.hs 2018-01-14 13:55:26.000000000 +0100 +++ new/basement-0.0.8/Basement/UTF8/Types.hs 2018-03-12 23:27:49.000000000 +0100 @@ -9,6 +9,8 @@ , isValidStepDigit -- * Unicode Errors , ValidationFailure(..) + -- * UTF8 Encoded 'Char' + , CharUTF8(..) -- * Case Conversion , CM (..) ) where @@ -34,11 +36,22 @@ newtype StepDigit = StepDigit Word8 -- | Step when processing ASCII character -newtype StepASCII = StepASCII Word8 +newtype StepASCII = StepASCII { stepAsciiRawValue :: Word8 } -- | Specialized tuple used for case mapping. data CM = CM {-# UNPACK #-} !Char {-# UNPACK #-} !Char {-# UNPACK #-} !Char deriving (Eq) +-- | Represent an already encoded UTF8 Char where the the lowest 8 bits is the start of the +-- sequence. If this contains a multi bytes sequence then each higher 8 bits are filled with +-- the remaining sequence 8 bits per 8 bits. +-- +-- For example: +-- 'A' => U+0041 => 41 => 0x00000041 +-- '€ => U+20AC => E2 82 AC => 0x00AC82E2 +-- '𐍈' => U+10348 => F0 90 8D 88 => 0x888D90F0 +-- +newtype CharUTF8 = CharUTF8 Word32 + isValidStepASCII :: StepASCII -> Bool isValidStepASCII (StepASCII w) = w < 0x80 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/basement-0.0.7/basement.cabal new/basement-0.0.8/basement.cabal --- old/basement-0.0.7/basement.cabal 2018-02-23 20:55:50.000000000 +0100 +++ new/basement-0.0.8/basement.cabal 2018-07-08 11:08:56.000000000 +0200 @@ -1,25 +1,23 @@ name: basement -version: 0.0.7 +version: 0.0.8 synopsis: Foundation scrap box of array & string description: Foundation most basic primitives without any dependencies -homepage: https://github.com/haskell-foundation/foundation#readme license: BSD3 license-file: LICENSE copyright: 2015-2017 Vincent Hanquez <vinc...@snarc.org> - 2017-2018 Foundation Maintainers + , 2017-2018 Foundation Maintainers maintainer: vinc...@snarc.org -copyright: Vincent Hanquez category: Web build-type: Simple -homepage: https://github.com/haskell-foundation/foundation +homepage: https://github.com/haskell-foundation/foundation#readme bug-reports: https://github.com/haskell-foundation/foundation/issues cabal-version: >=1.10 -tested-with: GHC==8.2.1, GHC==8.0.2, GHC==7.10.3 extra-source-files: cbits/*.h source-repository head type: git - location: https://github.com/haskell-foundation/foundation.git + location: https://github.com/haskell-foundation/foundation + subdir: basement library hs-source-dirs: . @@ -37,6 +35,7 @@ Basement.From Basement.Types.Char7 + Basement.Types.CharUTF8 Basement.Types.OffsetSize Basement.Types.Ptr Basement.Types.AsciiString @@ -59,6 +58,13 @@ Basement.String.Builder Basement.NonEmpty + -- Extended Types with explicit type level size + Basement.Sized.Block + Basement.Sized.UVect + Basement.Sized.Vect + Basement.Sized.List + Basement.BlockN + -- Utils Basement.NormalForm Basement.These @@ -95,15 +101,8 @@ Basement.Compat.Natural Basement.Compat.NumLiteral Basement.Compat.Typeable - if impl(ghc >= 8.0) - exposed-modules: Basement.BlockN - , Basement.Sized.Block - , Basement.Sized.UVect - , Basement.Sized.Vect - , Basement.Bits - if impl(ghc >= 7.10) - exposed-modules: - Basement.Sized.List + + Basement.Bits other-modules: Basement.Error @@ -137,12 +136,14 @@ Basement.Terminal.Size - - build-depends: base >= 4.7 && < 5 + -- support and dependencies + if impl(ghc < 8.0) + buildable: False + else + build-depends: base , ghc-prim - - if os(windows) - build-depends: Win32 + if os(windows) + build-depends: Win32 default-language: Haskell2010 default-extensions: NoImplicitPrelude