Hello community, here is the log from the commit of package ghc-xmlhtml for openSUSE:Factory checked in at 2017-06-04 01:59:19 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/ghc-xmlhtml (Old) and /work/SRC/openSUSE:Factory/.ghc-xmlhtml.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "ghc-xmlhtml" Sun Jun 4 01:59:19 2017 rev:4 rq:499741 version:0.2.4 Changes: -------- --- /work/SRC/openSUSE:Factory/ghc-xmlhtml/ghc-xmlhtml.changes 2017-05-16 14:47:02.459231877 +0200 +++ /work/SRC/openSUSE:Factory/.ghc-xmlhtml.new/ghc-xmlhtml.changes 2017-06-04 01:59:25.131677247 +0200 @@ -1,0 +2,5 @@ +Mon May 22 08:08:55 UTC 2017 - [email protected] + +- Update to version 0.2.4 with cabal2obs. + +------------------------------------------------------------------- Old: ---- xmlhtml-0.2.3.5.tar.gz xmlhtml.cabal New: ---- xmlhtml-0.2.4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ ghc-xmlhtml.spec ++++++ --- /var/tmp/diff_new_pack.sXF7yR/_old 2017-06-04 01:59:25.887570460 +0200 +++ /var/tmp/diff_new_pack.sXF7yR/_new 2017-06-04 01:59:25.887570460 +0200 @@ -19,14 +19,13 @@ %global pkg_name xmlhtml %bcond_with tests Name: ghc-%{pkg_name} -Version: 0.2.3.5 +Version: 0.2.4 Release: 0 Summary: XML parser and renderer with HTML 5 quirks mode License: BSD-3-Clause Group: Development/Languages/Other Url: https://hackage.haskell.org/package/%{pkg_name} Source0: https://hackage.haskell.org/package/%{pkg_name}-%{version}/%{pkg_name}-%{version}.tar.gz -Source1: https://hackage.haskell.org/package/%{pkg_name}-%{version}/revision/4.cabal#/%{pkg_name}.cabal BuildRequires: ghc-Cabal-devel BuildRequires: ghc-blaze-builder-devel BuildRequires: ghc-blaze-html-devel @@ -40,11 +39,9 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-build %if %{with tests} BuildRequires: ghc-HUnit-devel -BuildRequires: ghc-QuickCheck-devel BuildRequires: ghc-directory-devel BuildRequires: ghc-test-framework-devel BuildRequires: ghc-test-framework-hunit-devel -BuildRequires: ghc-test-framework-quickcheck2-devel %endif %description @@ -76,7 +73,6 @@ %prep %setup -q -n %{pkg_name}-%{version} -cp -p %{SOURCE1} %{pkg_name}.cabal %build %ghc_lib_build ++++++ xmlhtml-0.2.3.5.tar.gz -> xmlhtml-0.2.4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xmlhtml-0.2.3.5/src/Text/XmlHtml/Common.hs new/xmlhtml-0.2.4/src/Text/XmlHtml/Common.hs --- old/xmlhtml-0.2.3.5/src/Text/XmlHtml/Common.hs 2016-08-06 00:58:57.000000000 +0200 +++ new/xmlhtml-0.2.4/src/Text/XmlHtml/Common.hs 2017-05-19 12:38:07.000000000 +0200 @@ -5,6 +5,8 @@ module Text.XmlHtml.Common where import Blaze.ByteString.Builder +import Data.Char (isAscii, isLatin1) +import qualified Data.HashMap.Strict as M import Data.Maybe import Data.Text (Text) @@ -13,6 +15,8 @@ import qualified Data.Text.Encoding.Error as TE import Data.ByteString (ByteString) +import qualified Data.ByteString.Char8 as BS +import Text.XmlHtml.HTML.Meta (reversePredefinedRefs) ------------------------------------------------------------------------------ @@ -186,32 +190,66 @@ ------------------------------------------------------------------------------ -- | The character encoding of a document. Currently only the required -- character encodings are implemented. -data Encoding = UTF8 | UTF16BE | UTF16LE deriving (Eq, Show) +data Encoding = UTF8 | UTF16BE | UTF16LE | ISO_8859_1 deriving (Eq, Show) ------------------------------------------------------------------------------ -- | Retrieves the preferred name of a character encoding for embedding in -- a document. encodingName :: Encoding -> Text -encodingName UTF8 = "UTF-8" -encodingName UTF16BE = "UTF-16" -encodingName UTF16LE = "UTF-16" +encodingName UTF8 = "UTF-8" +encodingName UTF16BE = "UTF-16" +encodingName UTF16LE = "UTF-16" +encodingName ISO_8859_1 = "ISO-8859-1" ------------------------------------------------------------------------------ -- | Gets the encoding function from 'Text' to 'ByteString' for an encoding. encoder :: Encoding -> Text -> ByteString -encoder UTF8 = T.encodeUtf8 -encoder UTF16BE = T.encodeUtf16BE -encoder UTF16LE = T.encodeUtf16LE +encoder UTF8 = T.encodeUtf8 +encoder UTF16BE = T.encodeUtf16BE +encoder UTF16LE = T.encodeUtf16LE +encoder ISO_8859_1 = encodeIso_8859_1 + + +------------------------------------------------------------------------------ +-- | Encodes UTF-8 Text into bytestring with only latin1 characters +-- UTF-8 characters found in the input and present in the +-- 'Text.XmlHtml.Meta.references' map are mapped to their escape sequences, +-- and any other UTF-8 characters are replaced with ascii "?" +encodeIso_8859_1 :: Text -> ByteString +encodeIso_8859_1 t = T.encodeUtf8 . T.concat . map toAsciiChunk $ + T.groupBy asciiSplits t + where + + -- Identify long strings of all-acceptable or all-unacceptable characters + -- Acceptable strings are passed through + -- Unacceptable strings are mapped to ASCII character by character + toAsciiChunk sub = + if T.any isAscii sub + then sub + else T.concat . map toAsciiChar $ T.unpack sub + asciiSplits x y = isAscii x == isAscii y + + -- A character's mapping to ascii goes through html entity escaping + -- if that character is in the references table + -- Otherwise its unicode index is printed to decimal and "&#" is appended + toAsciiChar c = maybe + (uniEscape c) (\esc -> T.concat ["&", esc, ";"]) + (M.lookup (T.singleton c) reversePredefinedRefs) + + uniEscape = T.append "&#" . flip T.snoc ';' . T.pack . + (show :: Int -> String) . fromEnum ------------------------------------------------------------------------------ -- | Gets the decoding function from 'ByteString' to 'Text' for an encoding. decoder :: Encoding -> ByteString -> Text -decoder UTF8 = T.decodeUtf8With (TE.replace '\xFFFF') -decoder UTF16BE = T.decodeUtf16BEWith (TE.replace '\xFFFF') -decoder UTF16LE = T.decodeUtf16LEWith (TE.replace '\xFFFF') +decoder UTF8 = T.decodeUtf8With (TE.replace '\xFFFF') +decoder UTF16BE = T.decodeUtf16BEWith (TE.replace '\xFFFF') +decoder UTF16LE = T.decodeUtf16LEWith (TE.replace '\xFFFF') +decoder ISO_8859_1 = T.decodeLatin1 . + BS.map (\c -> if isLatin1 c then c else '?') ------------------------------------------------------------------------------ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xmlhtml-0.2.3.5/src/Text/XmlHtml/HTML/Meta.hs new/xmlhtml-0.2.4/src/Text/XmlHtml/HTML/Meta.hs --- old/xmlhtml-0.2.3.5/src/Text/XmlHtml/HTML/Meta.hs 2016-08-06 00:58:57.000000000 +0200 +++ new/xmlhtml-0.2.4/src/Text/XmlHtml/HTML/Meta.hs 2017-05-19 12:38:07.000000000 +0200 @@ -8,7 +8,9 @@ , isRawText , endOmittableLast , endOmittableNext + , explicitAttributes , predefinedRefs + , reversePredefinedRefs ) where #if !MIN_VERSION_base(4,8,0) @@ -113,6 +115,16 @@ ] ------------------------------------------------------------------------------ +-- | Tags and attributes which should always be rendered with an explicit +-- value, even when the value is empty. This is required by some web browsers +-- for tags that are typically non-empty. +{-# NOINLINE explicitAttributes #-} +explicitAttributes :: HashMap Text (HashSet Text) +explicitAttributes = M.fromList [ + ("a", S.fromList ["href"]) + ] + +------------------------------------------------------------------------------ -- | Predefined character entity references as defined by the HTML5 spec. {-# NOINLINE predefinedRefs #-} predefinedRefs :: HashMap Text Text @@ -2534,3 +2546,10 @@ ("zwj", "\x0200D"), ("zwnj", "\x0200C") ] + +------------------------------------------------------------------------------ +-- Reverse lookup of Html entities +reversePredefinedRefs :: HashMap Text Text +reversePredefinedRefs = M.fromList . map (\(x,y) -> (y,x)) $ + M.toList predefinedRefs + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xmlhtml-0.2.3.5/src/Text/XmlHtml/HTML/Render.hs new/xmlhtml-0.2.4/src/Text/XmlHtml/HTML/Render.hs --- old/xmlhtml-0.2.3.5/src/Text/XmlHtml/HTML/Render.hs 2016-08-06 00:58:57.000000000 +0200 +++ new/xmlhtml-0.2.4/src/Text/XmlHtml/HTML/Render.hs 2017-05-19 12:38:07.000000000 +0200 @@ -18,6 +18,7 @@ import qualified Data.Text as T import qualified Data.HashSet as S +import qualified Data.HashMap.Strict as M #if !MIN_VERSION_base(4,8,0) import Data.Monoid @@ -98,7 +99,7 @@ | tb `S.member` voidTags && null c = fromText e "<" `mappend` fromText e t - `mappend` (mconcat $ map (attribute e) a) + `mappend` (mconcat $ map (attribute e tb) a) `mappend` fromText e " />" | tb `S.member` voidTags = error $ T.unpack t ++ " must be empty" @@ -108,7 +109,7 @@ not ("</" `T.append` t `T.isInfixOf` s) = fromText e "<" `mappend` fromText e t - `mappend` (mconcat $ map (attribute e) a) + `mappend` (mconcat $ map (attribute e tb) a) `mappend` fromText e ">" `mappend` fromText e s `mappend` fromText e "</" @@ -122,7 +123,7 @@ | otherwise = fromText e "<" `mappend` fromText e t - `mappend` (mconcat $ map (attribute e) a) + `mappend` (mconcat $ map (attribute e tb) a) `mappend` fromText e ">" `mappend` (mconcat $ map (node e) c) `mappend` fromText e "</" @@ -131,21 +132,24 @@ ------------------------------------------------------------------------------ -attribute :: Encoding -> (Text, Text) -> Builder -attribute e (n,v) - | v == "" = +attribute :: Encoding -> Text -> (Text, Text) -> Builder +attribute e tb (n,v) + | v == "" && not explicit = fromText e " " `mappend` fromText e n - | not ("\'" `T.isInfixOf` v) = + | v /= "" && not ("\'" `T.isInfixOf` v) = fromText e " " `mappend` fromText e n `mappend` fromText e "=\'" `mappend` escaped "&" e v `mappend` fromText e "\'" - | otherwise = + | otherwise = fromText e " " `mappend` fromText e n `mappend` fromText e "=\"" `mappend` escaped "&\"" e v `mappend` fromText e "\"" - + where nbase = T.toLower $ snd $ T.breakOnEnd ":" n + explicit = case M.lookup tb explicitAttributes of + Nothing -> False + Just ns -> nbase `S.member` ns diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xmlhtml-0.2.3.5/test/src/Text/XmlHtml/Tests.hs new/xmlhtml-0.2.4/test/src/Text/XmlHtml/Tests.hs --- old/xmlhtml-0.2.3.5/test/src/Text/XmlHtml/Tests.hs 2016-08-06 00:58:57.000000000 +0200 +++ new/xmlhtml-0.2.4/test/src/Text/XmlHtml/Tests.hs 2017-05-19 12:38:07.000000000 +0200 @@ -561,7 +561,10 @@ testIt "renderEmptyText " renderEmptyText, testIt "singleQuoteInAttr " singleQuoteInAttr, testIt "doubleQuoteInAttr " doubleQuoteInAttr, - testIt "bothQuotesInAttr " bothQuotesInAttr + testIt "bothQuotesInAttr " bothQuotesInAttr, + testIt "ndashEscapesInLatin " ndashEscapesInLatin, + testIt "smileyEscapesInLatin " smileyEscapesInLatin, + testIt "numericalEscapes " numericalEscapes ] renderByteOrderMark :: Bool @@ -644,6 +647,26 @@ ])) == utf8Decl `B.append` "<foo bar=\"test\'"ing\"/>" +ndashEscapesInLatin :: Bool +ndashEscapesInLatin = + toByteString (renderXmlFragment ISO_8859_1 ([ + TextNode "Hello–world" + ])) + == "Hello–world" + +smileyEscapesInLatin :: Bool +smileyEscapesInLatin = + toByteString (renderXmlFragment ISO_8859_1 ([ + TextNode "Hello ☺" + ])) + == "Hello ☺" + +numericalEscapes :: Bool +numericalEscapes = + fmap (toByteString . renderXmlFragment ISO_8859_1 . docContent) + (parseXML "test" "Hello ®") + == Right "Hello ®" + ------------------------------------------------------------------------------ -- HTML Repeats of XML Rendering Tests --------------------------------------- @@ -749,6 +772,8 @@ testIt "renderHTMLRaw2 " renderHTMLRaw2, testIt "renderHTMLRaw3 " renderHTMLRaw3, testIt "renderHTMLRaw4 " renderHTMLRaw4, + testIt "renderHTMLEmptyAttr " renderHTMLEmptyAttr, + testIt "renderHTMLEmptyAttr2 " renderHTMLEmptyAttr2, testIt "renderHTMLAmpAttr1 " renderHTMLAmpAttr1, testIt "renderHTMLAmpAttr2 " renderHTMLAmpAttr2, testIt "renderHTMLAmpAttr3 " renderHTMLAmpAttr3, @@ -818,6 +843,20 @@ ] ])) +renderHTMLEmptyAttr :: Bool +renderHTMLEmptyAttr = + toByteString (render (HtmlDocument UTF8 Nothing [ + Element "input" [("checked", "")] [] + ])) + == "<input checked />" + +renderHTMLEmptyAttr2 :: Bool +renderHTMLEmptyAttr2 = + toByteString (render (HtmlDocument UTF8 Nothing [ + Element "a" [("href", "")] [] + ])) + == "<a href=\"\"></a>" + renderHTMLAmpAttr1 :: Bool renderHTMLAmpAttr1 = toByteString (render (HtmlDocument UTF8 Nothing [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/xmlhtml-0.2.3.5/xmlhtml.cabal new/xmlhtml-0.2.4/xmlhtml.cabal --- old/xmlhtml-0.2.3.5/xmlhtml.cabal 2016-08-06 00:58:57.000000000 +0200 +++ new/xmlhtml-0.2.4/xmlhtml.cabal 2017-05-19 12:38:07.000000000 +0200 @@ -1,5 +1,5 @@ Name: xmlhtml -Version: 0.2.3.5 +Version: 0.2.4 Synopsis: XML parser and renderer with HTML 5 quirks mode Description: Contains renderers and parsers for both XML and HTML 5 document fragments, which share data structures so that @@ -841,26 +841,22 @@ ghc-options: -Wall -fwarn-tabs -fno-warn-orphans Test-suite testsuite - hs-source-dirs: src test/src + hs-source-dirs: test/src type: exitcode-stdio-1.0 main-is: TestSuite.hs build-depends: - HUnit >= 1.2 && <1.4, - QuickCheck >= 2.3.0.2, + HUnit >= 1.2 && <1.7, base, blaze-builder, blaze-html, blaze-markup, bytestring, - containers, - directory >= 1.0 && <1.3, - parsec, + directory >= 1.0 && <1.4, test-framework >= 0.8.0.3 && <0.9, test-framework-hunit >= 0.3 && <0.4, - test-framework-quickcheck2 >= 0.3 && <0.4, text, - unordered-containers + xmlhtml ghc-options: -Wall -fwarn-tabs -funbox-strict-fields -threaded -fno-warn-unused-do-bind
