Hello community, here is the log from the commit of package ghc-xml-conduit for openSUSE:Factory checked in at 2015-10-06 13:27:08 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-xml-conduit (Old) and /work/SRC/openSUSE:Factory/.ghc-xml-conduit.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-xml-conduit" Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-xml-conduit/ghc-xml-conduit.changes 2015-08-15 11:39:41.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.ghc-xml-conduit.new/ghc-xml-conduit.changes 2015-10-06 13:27:33.000000000 +0200 @@ -1,0 +2,6 @@ +Sun Oct 4 17:59:18 UTC 2015 - [email protected] + +- update to 1.3.2 +* Support for iso-8859-1 + +------------------------------------------------------------------- Old: ---- xml-conduit-1.3.1.tar.gz New: ---- xml-conduit-1.3.2.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-xml-conduit.spec ++++++ --- /var/tmp/diff_new_pack.KKdRD8/_old 2015-10-06 13:27:34.000000000 +0200 +++ /var/tmp/diff_new_pack.KKdRD8/_new 2015-10-06 13:27:34.000000000 +0200 @@ -15,17 +15,18 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # + %global pkg_name xml-conduit %bcond_with tests Name: ghc-xml-conduit -Version: 1.3.1 +Version: 1.3.2 Release: 0 Summary: Pure-Haskell utilities for dealing with XML with the conduit package +License: BSD-2-Clause Group: System/Libraries -License: BSD-2-Clause Url: https://hackage.haskell.org/package/%{pkg_name} Source0: https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -74,37 +75,29 @@ %prep %setup -q -n %{pkg_name}-%{version} - %build %ghc_lib_build - %install %ghc_lib_install - %check %if %{with tests} %cabal test %endif - %post devel %ghc_pkg_recache - %postun devel %ghc_pkg_recache - %files -f %{name}.files %defattr(-,root,root,-) %doc LICENSE - %files devel -f %{name}-devel.files %defattr(-,root,root,-) %doc README.md - %changelog ++++++ xml-conduit-1.3.1.tar.gz -> xml-conduit-1.3.2.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xml-conduit-1.3.1/ChangeLog.md new/xml-conduit-1.3.2/ChangeLog.md --- old/xml-conduit-1.3.1/ChangeLog.md 2015-07-17 02:35:37.000000000 +0200 +++ new/xml-conduit-1.3.2/ChangeLog.md 2015-10-02 08:55:36.000000000 +0200 @@ -1,3 +1,7 @@ +## 1.3.2 + +* Support for iso-8859-1 [#63](https://github.com/snoyberg/xml/issues/63) + ## 1.3.1 * Add functions to ignore subtrees & result-streaming (yield) parsers [#58](https://github.com/snoyberg/xml/pull/58) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xml-conduit-1.3.1/Text/XML/Stream/Parse.hs new/xml-conduit-1.3.2/Text/XML/Stream/Parse.hs --- old/xml-conduit-1.3.1/Text/XML/Stream/Parse.hs 2015-07-17 02:35:37.000000000 +0200 +++ new/xml-conduit-1.3.2/Text/XML/Stream/Parse.hs 2015-10-02 08:55:36.000000000 +0200 @@ -145,6 +145,7 @@ import Data.Attoparsec.Text (Parser, anyChar, char, manyTill, skipWhile, string, takeWhile, takeWhile1, try) +import qualified Data.Attoparsec.Text as AT import Data.Conduit.Attoparsec (PositionRange, conduitParser) import Data.XML.Types (Content (..), Event (..), ExternalID (..), @@ -172,8 +173,8 @@ import Data.Text (Text, pack) import qualified Data.Text as T import qualified Data.Text as TS -import Data.Text.Encoding (decodeUtf32BEWith) -import Data.Text.Encoding.Error (ignore) +import Data.Text.Encoding (decodeUtf32BEWith, decodeUtf8With) +import Data.Text.Encoding.Error (ignore, lenientDecode) import Data.Text.Read (Reader, decimal, hexadecimal) import Data.Typeable (Typeable) import Data.Word (Word32) @@ -259,30 +260,58 @@ conduit front = await >>= maybe (return ()) (push front) push front bss = - either conduit (\(bss', continue) -> leftover bss' >> continue) + either conduit + (uncurry checkXMLDecl) (getEncoding front bss) getEncoding front bs' | S.length bs < 4 = Left (bs `S.append`) | otherwise = - Right (bsOut, CT.decode codec) + Right (bsOut, mcodec) where bs = front bs' bsOut = S.append (S.drop toDrop x) y (x, y) = S.splitAt 4 bs - (toDrop, codec) = + (toDrop, mcodec) = case S.unpack x of - [0x00, 0x00, 0xFE, 0xFF] -> (4, CT.utf32_be) - [0xFF, 0xFE, 0x00, 0x00] -> (4, CT.utf32_le) - 0xFE : 0xFF: _ -> (2, CT.utf16_be) - 0xFF : 0xFE: _ -> (2, CT.utf16_le) - 0xEF : 0xBB: 0xBF : _ -> (3, CT.utf8) - [0x00, 0x00, 0x00, 0x3C] -> (0, CT.utf32_be) - [0x3C, 0x00, 0x00, 0x00] -> (0, CT.utf32_le) - [0x00, 0x3C, 0x00, 0x3F] -> (0, CT.utf16_be) - [0x3C, 0x00, 0x3F, 0x00] -> (0, CT.utf16_le) - _ -> (0, CT.utf8) -- Assuming UTF-8 + [0x00, 0x00, 0xFE, 0xFF] -> (4, Just $ CT.utf32_be) + [0xFF, 0xFE, 0x00, 0x00] -> (4, Just $ CT.utf32_le) + 0xFE : 0xFF: _ -> (2, Just $ CT.utf16_be) + 0xFF : 0xFE: _ -> (2, Just $ CT.utf16_le) + 0xEF : 0xBB: 0xBF : _ -> (3, Just $ CT.utf8) + [0x00, 0x00, 0x00, 0x3C] -> (0, Just $ CT.utf32_be) + [0x3C, 0x00, 0x00, 0x00] -> (0, Just $ CT.utf32_le) + [0x00, 0x3C, 0x00, 0x3F] -> (0, Just $ CT.utf16_be) + [0x3C, 0x00, 0x3F, 0x00] -> (0, Just $ CT.utf16_le) + _ -> (0, Nothing) -- Assuming UTF-8 + +checkXMLDecl :: MonadThrow m + => S.ByteString + -> Maybe CT.Codec + -> Conduit S.ByteString m TS.Text +checkXMLDecl bs (Just codec) = leftover bs >> CT.decode codec +checkXMLDecl bs0 Nothing = + loop [] (AT.parse (parseToken decodeXmlEntities)) bs0 + where + loop chunks0 parser nextChunk = + case parser $ decodeUtf8With lenientDecode nextChunk of + AT.Fail _ _ _ -> fallback + AT.Partial f -> await >>= maybe fallback (loop chunks f) + AT.Done _ (TokenBeginDocument attrs) -> findEncoding attrs + AT.Done _ _ -> fallback + where + chunks = nextChunk : chunks0 + fallback = complete CT.utf8 + complete codec = mapM_ leftover chunks >> CT.decode codec + + findEncoding [] = fallback + findEncoding ((TName _ "encoding", [ContentText enc]):_) = + case enc of + "iso-8859-1" -> complete CT.iso8859_1 + "utf-8" -> complete CT.utf8 + _ -> complete CT.utf8 + findEncoding (_:xs) = findEncoding xs type EventPos = (Maybe PositionRange, Event) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xml-conduit-1.3.1/test/main.hs new/xml-conduit-1.3.2/test/main.hs --- old/xml-conduit-1.3.1/test/main.hs 2015-07-17 02:35:37.000000000 +0200 +++ new/xml-conduit-1.3.2/test/main.hs 2015-10-02 08:55:36.000000000 +0200 @@ -93,6 +93,7 @@ it "works" caseOrderAttrs it "parsing CDATA" caseParseCdata it "retains namespaces when asked" caseRetainNamespaces + it "handles iso-8859-1" caseIso8859_1 documentParseRender :: IO () documentParseRender = @@ -605,3 +606,15 @@ [] ]) [] + +caseIso8859_1 :: Assertion +caseIso8859_1 = do + let lbs = "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?><foo>\232</foo>" + doc = Res.parseLBS_ def lbs + doc `shouldBe` Res.Document + (Res.Prologue [] Nothing []) + (Res.Element + "foo" + Map.empty + [Res.NodeContent "\232"]) + [] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xml-conduit-1.3.1/xml-conduit.cabal new/xml-conduit-1.3.2/xml-conduit.cabal --- old/xml-conduit-1.3.1/xml-conduit.cabal 2015-07-17 02:35:37.000000000 +0200 +++ new/xml-conduit-1.3.2/xml-conduit.cabal 2015-10-02 08:55:36.000000000 +0200 @@ -1,5 +1,5 @@ name: xml-conduit -version: 1.3.1 +version: 1.3.2 license: MIT license-file: LICENSE author: Michael Snoyman <[email protected]>, Aristid Breitkreuz <[email protected]>
