Hi Petr, Could you follow up on this one?
Cheers, On Mon, Mar 09, 2009 at 22:29:17 +0100, Reinier Lamers wrote: > This fixes, compared to the previous bundle: > * absolute executable path to darcs in tests/emailformat.sh > * commented-out code in src/unit.lhs > * magic number 10 for newline in src/Darcs/External.hs > * q_encode_if_needed renamed to q_encode_char > > Thu Mar 5 02:52:47 CET 2009 Reinier Lamers <[email protected]> > * Kill unused imports in External.hs > > Mon Mar 9 21:37:39 CET 2009 Reinier Lamers <[email protected]> > * Add tests for email header formatting > > Mon Mar 9 22:27:04 CET 2009 Reinier Lamers <[email protected]> > * resolve issue1358: encode non-ASCII characters in mail headers > Kill unused imports in External.hs ---------------------------------- > Reinier Lamers <[email protected]>**20090305015247 > Ignore-this: 6eab59cf3c0c1a04712bd0282caba72f > ] hunk ./src/Darcs/External.hs 72 > > import Darcs.Lock ( withTemp, withOpenTemp, tempdir_loc, > removeFileMayNotExist ) > import CommandLine ( parseCmd, addUrlencoded ) > -import ThisVersion ( darcs_version ) > #if defined(HAVE_LIBWWW) || defined(HAVE_LIBCURL) || defined(HAVE_HTTP) > import URL ( copyUrl, copyUrlFirst, waitUrl ) > #endif > hunk ./src/Darcs/External.hs 80 > import Exec ( exec, Redirect(..), withoutNonBlock ) > import Darcs.URL ( is_file, is_url, is_ssh ) > import Darcs.Utils ( catchall ) > -import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, > hPutDocWith, ($$), (<+>), renderPS, > +import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, > hPutDocWith, ($$), renderPS, > simplePrinters, > text, empty, packedString, vcat, renderString ) > #include "impossible.h" Add tests for email header formatting ------------------------------------- > Reinier Lamers <[email protected]>**20090309203739 > Ignore-this: 1f0357f84f64446d4f4d8ea98b05b383 > ] hunk ./src/unit.lhs 53 > import System.IO.Unsafe ( unsafePerformIO ) > import ByteStringUtils > import qualified Data.ByteString.Char8 as BC ( unpack, pack ) > -import qualified Data.ByteString as B ( empty, concat ) > +import qualified Data.ByteString as B ( empty, concat, length, unpack, foldr, > + cons, ByteString, null, filter, head > ) > +import Data.Char ( isPrint ) > import Darcs.Patch > import Darcs.Patch.Test > import Darcs.Patch.Unit ( run_patch_unit_tests ) > hunk ./src/unit.lhs 71 > import Control.Monad.ST > import Darcs.Ordered > import Darcs.Sealed ( Sealed(Sealed), unsafeUnseal ) > +import Darcs.Email ( make_email, read_email, formatHeader ) > > hunk ./src/unit.lhs 73 > -import Darcs.Email ( make_email, read_email ) > #include "impossible.h" > \end{code} > > hunk ./src/unit.lhs 103 > BC.unpack (read_email (renderPS > $ make_email "reponame" [] (Just (text "contents\n")) > (text $ unlines s) (Just "filename"))) > + putStr "Checking email header line length... " > + quickCheck email_header_no_long_lines > + putStr "Checking email for illegal characters... " > + quickCheck email_header_ascii_chars > + putStr "Checking for spaces at beginning of folded email header lines... " > + quickCheck email_header_lines_start > + putStr "Checking that there are no empty lines in email headers... " > + quickCheck email_header_no_empty_lines > --putStr $ test_patch > --exitWith ExitSuccess > case run_tests returnval of > hunk ./src/unit.lhs 238 > = (thetest p1 p2)++(pair_unit_tester thetest ps) > \end{code} > > +\chapter{Email format tests} > + > +These tests check whether the emails generated by darcs meet a few criteria. > +We check for line length and non-ASCII characters. We apparently do not have > to > +check for CR-LF newlines because that's handled by sendmail. > + > +\begin{code} > + > +-- Check that formatHeader never creates lines longer than 78 characters > +-- (excluding the carriage return and line feed) > +email_header_no_long_lines :: String -> String -> Bool > +email_header_no_long_lines field value = > + not $ any (>78) $ map B.length $ bs_lines $ formatHeader cleanField > value > + where cleanField = clean_field_string field > + > +bs_lines :: B.ByteString -> [B.ByteString] > +bs_lines = finalizeFold . B.foldr splitAtLines (B.empty, []) > + where splitAtLines 10 (thisLine, prevLines) = (B.empty, thisLine:prevLines) > + splitAtLines c (thisLine, prevLines) = (B.cons c thisLine, > prevLines) > + finalizeFold (lastLine, otherLines) = lastLine : otherLines > + > +-- Check that an email header does not contain non-ASCII characters > +-- formatHeader doesn't escape field names, there is no such thing as > non-ascii > +-- field names afaik > +email_header_ascii_chars :: String -> String -> Bool > +email_header_ascii_chars field value > + = not (any (>127) (B.unpack (formatHeader cleanField value))) > + where cleanField = clean_field_string field > + > +-- Check that header the second and later lines of a header start with a > space > +email_header_lines_start :: String -> String -> Bool > +email_header_lines_start field value = > + all (\l -> B.null l || B.head l == 32) (tail headerLines) > + where headerLines = bs_lines (formatHeader cleanField value) > + cleanField = clean_field_string field > + > +-- Checks that there are no lines in email headers with only whitespace > +email_header_no_empty_lines :: String -> String -> Bool > +email_header_no_empty_lines field value = > + all (not . B.null . B.filter (not . (`elem` [10, 32, 9]))) headerLines > + where headerLines = bs_lines (formatHeader cleanField value) > + cleanField = clean_field_string field > + > +clean_field_string :: String -> String > +clean_field_string = filter (\c -> isPrint c && c < '\x80' && c /= ':') > + > +\end{code} > + > \chapter{LCS} > > Here are a few quick tests of the shiftBoundaries function. > addfile ./tests/emailformat.sh > hunk ./tests/emailformat.sh 1 > +#!/usr/bin/env bash > + > +set -ev > +# TODO: is this really enough to make all commands interpret the given > strings > +# as latin1? > +export LANG="en_US.ISO-8859-1" > + > +rm -rf temp1 > +rm -rf temp2 > +mkdir temp1 > +mkdir temp2 > +cd temp1 > + > +seventysevenaddy="<aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...@bbbbbbbbbb.cccccccccc.abrasoft.com>" > + > +darcs init > + > +echo "Have you seen the sm?rrebr?d of Ren? ?av?sant?" > non_ascii_file > +darcs add non_ascii_file > +darcs record -am "non-ascii file add" -A test > + > +cd ../temp2 > +darcs init > +cd ../temp1 > + > +# long email adress: check that email adresses of <= 77 chars don't get > split up > +darcs send --from="Kj?lt ?berstr?m $seventysevenaddy" \ > + --subject "Un patch pour le r?positoire" \ > + --to="Un gar?on fran?ais <[email protected]>" \ > + --sendmail-command='cp /dev/stdin mail_as_file %<' \ > + -a ../temp2 > + > +cat mail_as_file > +# The long mail address should be in there as a whole > +grep $seventysevenaddy mail_as_file > + > +# Check that there are no non-ASCII characters in the mail > +ghc -e 'getContents >>= return . not . any (> Data.Char.chr 127)' < > mail_as_file | grep '^True$' > + > + > +cd .. > +rm -rf temp1 > +rm -rf temp2 > + resolve issue1358: encode non-ASCII characters in mail headers -------------------------------------------------------------- > Reinier Lamers <[email protected]>**20090309212704 > Ignore-this: 1181370f4896d4b626c466f9402441c1 > ] hunk ./src/Darcs/Email.hs 3 > {-# OPTIONS_GHC -cpp #-} > {-# LANGUAGE CPP #-} > -module Darcs.Email ( make_email, read_email ) where > +module Darcs.Email ( make_email, read_email, formatHeader ) where > > hunk ./src/Darcs/Email.hs 5 > -import Data.Char ( digitToInt, isHexDigit ) > +import Data.Char ( digitToInt, isHexDigit, ord, intToDigit, isPrint, toUpper > ) > +import Data.List ( isInfixOf ) > +import qualified UTF8 ( encode ) > import Printer ( Doc, ($$), (<+>), (<>), text, empty, packedString, renderPS) > > import ByteStringUtils (dropSpace, linesPS, betweenLinesPS ) > hunk ./src/Darcs/Email.hs 11 > -import qualified Data.ByteString as B (ByteString, length, null, > tail, drop, head) > +import qualified Data.ByteString as B (ByteString, length, null, > tail > + ,drop, head, concat, > singleton > + ,pack, append, empty > + ) > import qualified Data.ByteString.Char8 as BC (index, head, pack) > #if __GLASGOW_HASKELL__ > 606 > import Data.ByteString.Internal as B (c2w, createAndTrim) > hunk ./src/Darcs/Email.hs 26 > import Foreign.Storable ( poke ) > import Data.Word ( Word8 ) > > -line_max :: Int > -line_max = 75 > +-- line_max is maximum number of characters in an e-mail line excluding the > CRLF > +-- at the end. qline_max is the number of characters in a q-encoded or > +-- quoted-printable-encoded line. > +line_max, qline_max :: Int > +line_max = 78 > +qline_max = 75 > + > +-- | Formats an e-mail header by encoding any non-ascii characters using > UTF-8 > +-- and Q-encoding, and folding lines at appropriate points. It doesn't do > +-- more than that, so the header name and header value should be > +-- well-formatted give or take line length and encoding. So no non-ASCII > +-- characters within quoted-string, quoted-pair, or atom; no semantically > +-- meaningful signs in names; no non-ASCII characters in the header name; > +-- etcetera. > +formatHeader :: String -> String -> B.ByteString > +formatHeader headerName headerValue = > + B.append nameColon encodedValue > + where nameColon = B.pack (map B.c2w (headerName ++ ":")) -- space for > folding > + encodedValue = fold_and_encode (' ':headerValue) > + (B.length nameColon) False False > + > +-- run through a string and encode non-ascii words and fold where > appropriate. > +-- the integer argument is the current position in the current line. > +-- the string in the first argument must begin with whitespace, or be empty. > +fold_and_encode :: String -> Int -> Bool -> Bool -> B.ByteString > +fold_and_encode [] _ _ _ = B.empty > +fold_and_encode s p lastWordEncoded inMidWord = > + let newline = B.singleton 10 > + space = B.singleton 32 > + s2bs = B.pack . map B.c2w > + -- the twelve there is the max number of ASCII chars to encode a single > + -- character: 4 * 3, 4 UTF-8 bytes times 3 ASCII chars per byte > + safeEncChunkLength = (qline_max - B.length encoded_word_start > + - B.length encoded_word_end) `div` 12 > + (curSpace, afterCurSpace) = break (not . (== ' ')) s > + (curWord, afterCurWord) = break (== ' ') afterCurSpace > + qEncWord | lastWordEncoded = qEncode (curSpace ++ curWord) > + | otherwise = qEncode curWord > + mustEncode = inMidWord > + || any (\c -> not (isPrint c) || (ord c) > 127) curWord > + || length curWord > line_max - 1 > + || isInfixOf "=?" curWord > + mustFold > + | mustEncode && lastWordEncoded > + = p + 1 + B.length qEncWord > line_max > + | mustEncode > + = p + length curSpace + B.length qEncWord > line_max > + | otherwise > + = p + length curSpace + length curWord > line_max > + mustSplit = (B.length qEncWord > qline_max && mustEncode) > + || length curWord > line_max - 1 > + spaceToInsert | mustEncode && lastWordEncoded = space > + | otherwise = s2bs curSpace > + wordToInsert > + | mustEncode && mustSplit = qEncode (take safeEncChunkLength curWord) > + | mustEncode = qEncWord > + | otherwise = s2bs curWord > + doneChunk | mustFold = B.concat [newline, spaceToInsert, wordToInsert] > + | otherwise = B.concat [spaceToInsert, wordToInsert] > + (rest, nextP) > + | mustSplit > + = (drop safeEncChunkLength curWord ++ afterCurWord, qline_max + > 1) > + | mustEncode && mustFold > + = (afterCurWord, B.length spaceToInsert + B.length wordToInsert) > + | otherwise > + = (afterCurWord, p + B.length doneChunk) > + in B.append doneChunk (fold_and_encode rest nextP mustEncode mustSplit) > + > +-- | Turns a piece of string into a q-encoded block > +-- Applies q-encoding, for use in e-mail header values, as defined in RFC > 2047. > +-- It just takes a string and builds an encoded-word from it, it does not > check > +-- length or necessity. > +qEncode :: String -> B.ByteString > +qEncode s = B.concat [encoded_word_start, > + encodedString, > + encoded_word_end] > + where encodedString = B.concat (map q_encode_char s) > + > +encoded_word_start, encoded_word_end :: B.ByteString > +encoded_word_start = B.pack (map B.c2w "=?UTF-8?Q?") > +encoded_word_end = B.pack (map B.c2w "?=") > + > +-- turns a character into its q-encoded bytestring value. For most printable > +-- ASCII characters, that's just the singleton bytestring with that char. > +q_encode_char :: Char -> B.ByteString > +q_encode_char c > + | c == ' ' = c2bs '_' > + | isPrint c > + && not (c `elem` ['?', '=', '_']) > + && ord c < 128 = c2bs c > + | otherwise = B.concat (map qbyte (UTF8.encode > [c])) > + where c2bs = B.singleton . B.c2w > + -- qbyte turns a byte into its q-encoded "=hh" representation > + qbyte b = B.pack (map B.c2w ['=' > + ,word8ToUDigit (b `div` 16) > + ,word8ToUDigit (b `mod` 16) > + ]) > + word8ToUDigit :: Word8 -> Char > + word8ToUDigit = toUpper . intToDigit . fromIntegral > > -- TODO is this doing mime encoding?? > qpencode :: B.ByteString -> B.ByteString > hunk ./src/Darcs/Email.hs 130 > qpencode s = unsafePerformIO > -- Really only (3 + 2/75) * length or something in the worst case > - $ B.createAndTrim (4 * B.length s) (\buf -> encode s line_max buf > 0) > + $ B.createAndTrim (4 * B.length s) (\buf -> encode s qline_max > buf 0) > > encode :: B.ByteString -> Int -> Ptr Word8 -> Int -> IO Int > encode ps _ _ bufi | B.null ps = return bufi > hunk ./src/Darcs/Email.hs 137 > encode ps n buf bufi = case B.head ps of > c | c == newline -> > do poke (buf `plusPtr` bufi) newline > - encode ps' line_max buf (bufi+1) > + encode ps' qline_max buf (bufi+1) > | n == 0 && B.length ps > 1 -> > do poke (buf `plusPtr` bufi) equals > poke (buf `plusPtr` (bufi+1)) newline > hunk ./src/Darcs/Email.hs 141 > - encode ps line_max buf (bufi + 2) > + encode ps qline_max buf (bufi + 2) > | (c == tab || c == space) -> > if B.null ps' || B.head ps' == newline > then do poke (buf `plusPtr` bufi) c > hunk ./src/Darcs/Email.hs 147 > poke (buf `plusPtr` (bufi+1)) equals > poke (buf `plusPtr` (bufi+2)) newline > - encode ps' line_max buf (bufi + 3) > + encode ps' qline_max buf (bufi + 3) > else do poke (buf `plusPtr` bufi) c > encode ps' (n - 1) buf (bufi + 1) > | (c >= bang && c /= equals && c <= tilde) -> > hunk ./src/Darcs/External.hs 67 > import ByteStringUtils (gzReadFilePS, linesPS, unlinesPS) > import qualified Data.ByteString as B (ByteString, empty, null, readFile -- > ratify readFile: Just an import from ByteString > ,hGetContents, writeFile, hPut, length -- ratify hGetContents: > importing from ByteString > - ,take, concat, drop, isPrefixOf) > + ,take, concat, drop, isPrefixOf, singleton, append) > import qualified Data.ByteString.Char8 as BC (unpack, pack) > > import Darcs.Lock ( withTemp, withOpenTemp, tempdir_loc, > removeFileMayNotExist ) > hunk ./src/Darcs/External.hs 83 > import Printer ( Doc, Printers, putDocLnWith, hPutDoc, hPutDocLn, > hPutDocWith, ($$), renderPS, > simplePrinters, > text, empty, packedString, vcat, renderString ) > +import Darcs.Email ( formatHeader ) > #include "impossible.h" > > sendmail_path :: IO String > hunk ./src/Darcs/External.hs 415 > -> Doc -- ^ body > -> IO () > generateEmail h f t s cc body = do > - hPutDocLn h $ > - text "To:" <+> text t > - $$ text "From:" <+> text f > - $$ text "Subject:" <+> text s > - $$ formated_cc > - $$ text "X-Mail-Originator: Darcs Version Control System" > - $$ text ("X-Darcs-Version: " ++ darcs_version) > - $$ body > - where formated_cc = if cc == "" > - then empty > - else text "Cc:" <+> text cc > + putHeader "To" t > + putHeader "From" f > + putHeader "Subject" s > + when (not (null cc)) (putHeader "Cc" cc) > + putHeader "X-Mail-Originator" "Darcs Version Control System" > + hPutDocLn h body > + where putHeader field value > + = B.hPut h (B.append (formatHeader field value) newline) > + newline = B.singleton 10 > > have_sendmail :: IO Bool > have_sendmail = (sendmail_path >> return True) `catch` (\_ -> return False) > -- Eric Kow <http://www.nltg.brighton.ac.uk/home/Eric.Kow> PGP Key ID: 08AC04F9
signature.asc
Description: Digital signature
_______________________________________________ darcs-users mailing list [email protected] http://lists.osuosl.org/mailman/listinfo/darcs-users
