Hi, Add --http flag for optimize ----------------------------
> hunk ./src/Darcs/Arguments.lhs 89 > networkOptions, noCache, > allowUnrelatedRepos, > checkOrRepair, justThisRepo, optimizePristine, > - getOutput > + optimizeHTTP, getOutput > ) where > import System.Console.GetOpt > import System.Directory ( doesDirectoryExist ) > hunk ./src/Darcs/Arguments.lhs 320 > getContent Repair = NoContent > getContent JustThisRepo = NoContent > getContent OptimizePristine = NoContent > +getContent OptimizeHTTP = NoContent > > getContentString :: DarcsFlag -> Maybe String > getContentString f = > hunk ./src/Darcs/Arguments.lhs 1611 > optimizePristine :: DarcsOption > optimizePristine = DarcsNoArgOption [] ["pristine"] OptimizePristine > "optimize hashed pristine layout" > + > +optimizeHTTP :: DarcsOption > +optimizeHTTP = DarcsNoArgOption [] ["http"] OptimizeHTTP > + "optimize repository for getting over network" > \end{code} > \begin{options} > --umask > hunk ./src/Darcs/Flags.hs 92 > | UseFormat2 > | PristinePlain | PristineNone | NoUpdateWorking > | Sibling AbsolutePath | Relink | RelinkPristine | NoLinks > - | OptimizePristine > + | OptimizePristine | OptimizeHTTP > | UpgradeFormat > | Files | NoFiles | Directories | NoDirectories > | Pending | NoPending Ok. Refactor Darcs.Repository.copyInventory (consistent naming) ----------------------------------------------------------- > hunk ./src/Darcs/Repository.hs 100 > import Darcs.Witnesses.Ordered ( FL(..), RL(..), bunchFL, mapFL, mapRL > , reverseRL ,lengthRL, (+>+) ) > import Darcs.Patch.Info ( PatchInfo ) > -import Darcs.Repository.Format ( RepoProperty ( HashedInventory ), > +import Darcs.Repository.Format ( RepoProperty ( HashedInventory ), > RepoFormat, > createRepoFormat, formatHas, > writeRepoFormat ) > import Darcs.Repository.Prefs ( writeDefaultPrefs ) > import Darcs.Repository.Pristine ( createPristine, flagsToPristine, > createPristineFromWorking ) > hunk ./src/Darcs/Repository.hs 158 > > data RepoSort = Hashed | Old > > +repoSort :: RepoFormat -> RepoSort > +repoSort f > + | formatHas HashedInventory f = Hashed > + | otherwise = Old > + > copyInventory :: forall p C(r u t). RepoPatch p => Repository p C(r u t) -> > IO () > hunk ./src/Darcs/Repository.hs 164 > -copyInventory fromrepo@(Repo fromdir opts rf (DarcsRepository _ cremote)) = > do > - repo@(Repo todir xx rf2 (DarcsRepository yy c)) <- identifyRepositoryFor > fromrepo "." > - newcache <- unionRemoteCaches c cremote fromdir > - let newrepo :: Repository p C(r u t) > - newrepo = Repo todir xx rf2 (DarcsRepository yy newcache) > - copyHashedHashed = HashedRepo.copyRepo newrepo opts fromdir > - copyAnythingToOld r = withCurrentDirectory todir $ readRepo r >>= > +copyInventory fromRepo@(Repo fromDir opts fromFormat (DarcsRepository _ > fromCache)) = do > + toRepo@(Repo toDir opts' toFormat (DarcsRepository toPristine toCache)) <- > + identifyRepositoryFor fromRepo "." > + toCache2 <- unionRemoteCaches toCache fromCache fromDir > + let toRepo2 :: Repository p C(r u t) > + toRepo2 = Repo toDir opts' toFormat $ DarcsRepository toPristine > toCache2 It's more common to use ' (prime) as a suffix in Haskell than 2 (the latter usually means 2-argument, like liftM2...) > + copyHashedHashed = HashedRepo.copyRepo toRepo2 opts fromDir > + copyAnythingToOld r = withCurrentDirectory toDir $ readRepo r >>= > DarcsRepo.writeInventoryAndPatches opts I'd say copyAnyToOld is a better name than copyAnythingToOld. (But now I see this is not a name you introduced -- you can still rename it if you are amending or extending this, though...) > hunk ./src/Darcs/Repository.hs 173 > - repoSort rfx | formatHas HashedInventory rfx = Hashed > - | otherwise = Old > - case repoSort rf2 of > - Hashed -> > - if formatHas HashedInventory rf > - then copyHashedHashed > - else withCurrentDirectory todir $ > - do HashedRepo.revertTentativeChanges > - patches <- readRepo fromrepo > + case repoSort fromFormat of > + Hashed -> case repoSort toFormat of > + Hashed -> copyHashedHashed > + Old -> copyAnythingToOld fromRepo > + Old -> case repoSort toFormat of > + Hashed -> withCurrentDirectory toDir $ do > + HashedRepo.revertTentativeChanges > + patches <- readRepo fromRepo > let k = "Copying patch" > beginTedious k > tediousSize k (lengthRL $ newset2RL patches) > hunk ./src/Darcs/Repository.hs 185 > let patches' = progressPatchSet k patches > - HashedRepo.writeTentativeInventory c (compression opts) > patches' > + HashedRepo.writeTentativeInventory toCache {- toCache2? -} > (compression opts) patches' I think toCache is OK, since it's what the original code did. > endTedious k > hunk ./src/Darcs/Repository.hs 187 > - HashedRepo.finalizeTentativeChanges repo (compression opts) > - Old -> case repoSort rf of > - Hashed -> copyAnythingToOld fromrepo > - _ -> copyOldrepoPatches opts fromrepo todir > + HashedRepo.finalizeTentativeChanges toRepo {- toRepo2? -} > (compression opts) > + Old -> copyOldrepoPatches opts fromRepo toDir Again, toRepo should be OK. Create a function for lazy fetching files ----------------------------------------- (maybe fix the patch title here to say "fetching of files"?) > hunk ./src/Darcs/External.hs 7 > backupByRenaming, backupByCopying, > copyFileOrUrl, speculateFileOrUrl, copyFilesOrUrls, copyLocal, cloneFile, > cloneTree, cloneTreeExcept, clonePartialsTree, clonePaths, > - fetchFilePS, gzFetchFilePS, > + fetchFilePS, fetchFileLazyPS, gzFetchFilePS, > sendEmail, generateEmail, sendEmailDoc, resendEmail, > signString, verifyPS, > execDocPipe, execPipeIgnoreError, > hunk ./src/Darcs/External.hs 64 > ,hGetContents, writeFile, hPut, length > ,take, concat, drop, isPrefixOf, singleton, append) > import qualified Data.ByteString.Char8 as BC (unpack, pack) > +import qualified Data.ByteString.Lazy as BL > > import Darcs.Lock ( withTemp, withOpenTemp, tempdirLoc, > removeFileMayNotExist ) > import CommandLine ( parseCmd, addUrlencoded ) > hunk ./src/Darcs/External.hs 138 > copyFileOrUrl opts fou t cache > B.readFile t > > +fetchFileLazyPS :: String -> Cachable -> IO BL.ByteString > +fetchFileLazyPS fou _ | isFile fou = BL.readFile fou > +fetchFileLazyPS fou cache = withTemp $ \t -> do let opts = [] -- FIXME: no > network flags > + copyFileOrUrl opts fou t > cache > + BL.readFile t > + > gzFetchFilePS :: String -> Cachable -> IO B.ByteString > gzFetchFilePS fou _ | isFile fou = gzReadFilePS fou > gzFetchFilePS fou cache = withTemp $ \t-> do let opts = [] -- FIXME: no > network flags Ok, although it should be noted that the lazy readFile may constitute a resource (fd) leak -- a haddock explaining that would be certainly appropriate. (I.e. this behaves the same as Prelude.readFile -- see contrib/darcs-errors.hlint in your darcs source tree for explanation.) Implement darcs optimize --http ------------------------------- Ok, the main patch... > hunk ./src/Darcs/Commands/Optimize.lhs 24 > {-# LANGUAGE CPP #-} > > module Darcs.Commands.Optimize ( optimize ) where > +import Control.Applicative ( (<$>) ) : - ) > import Control.Monad ( when, unless ) > import Data.Maybe ( isJust ) > import System.Directory ( getDirectoryContents, doesDirectoryExist, > doesFileExist ) > hunk ./src/Darcs/Commands/Optimize.lhs 29 > import qualified Data.ByteString.Char8 as BS > +import qualified Data.ByteString.Lazy as BL > > import Storage.Hashed.Darcs( decodeDarcsSize ) > > hunk ./src/Darcs/Commands/Optimize.lhs 38 > import Darcs.Arguments ( DarcsFlag( UpgradeFormat, UseHashedInventory, > Compress, UnCompress, > NoCompress, Reorder, > - Relink, RelinkPristine, OptimizePristine > ), > + Relink, RelinkPristine, OptimizePristine, > + OptimizeHTTP ), > reorderPatches, > uncompressNocompress, > relink, relinkPristine, sibling, > hunk ./src/Darcs/Commands/Optimize.lhs 45 > flagsToSiblings, > upgradeFormat, > - workingRepoDir, umaskOption, optimizePristine > + workingRepoDir, umaskOption, optimizePristine, > + optimizeHTTP > ) > import Darcs.Repository.Prefs ( getPreflist ) > import Darcs.Repository ( Repository, > hunk ./src/Darcs/Commands/Optimize.lhs 91 > import Storage.Hashed.Plain( readPlainTree ) > import Storage.Hashed.Darcs( writeDarcsHashed ) > > +import Codec.Archive.Tar ( write ) > +import Codec.Archive.Tar.Entry ( fileEntry, toTarPath ) > +import Codec.Compression.GZip ( compress ) > + > #include "gadts.h" > > optimizeDescription :: String > hunk ./src/Darcs/Commands/Optimize.lhs 138 > sibling, relink, > relinkPristine, > upgradeFormat, > - optimizePristine]} > + optimizePristine, > + optimizeHTTP]} > > optimizeCmd :: [DarcsFlag] -> [String] -> IO () > optimizeCmd origopts _ = do > hunk ./src/Darcs/Commands/Optimize.lhs 145 > when (UpgradeFormat `elem` origopts) optimizeUpgradeFormat > withRepoLock opts $- \repository -> do > + when (OptimizeHTTP `elem` origopts) doOptimizeHTTP > if (OptimizePristine `elem` opts) > then doOptimizePristine repository > else do cleanRepository repository So far so good. > hunk ./src/Darcs/Commands/Optimize.lhs 368 > withCurrentDirectory dir $ do > gzs <- filter ((== ".gz") . takeExtension) `fmap` getDirectoryContents > "." > mapM_ removeFile gzs > + > +doOptimizeHTTP :: IO () > +doOptimizeHTTP = do > + rf <- either fail return =<< identifyRepoFormat "." > + unless (formatHas HashedInventory rf) $ fail > + "Unsupported repository format" The error message should explicitly say what was expected: "Only hashed repositories can be optimized for HTTP" or something in that vein. > + createDirectoryIfMissing False packsDir > + i <- fileEntry' $ darcsdir </> "hashed_inventory" > + is <- tarDarcsDir "inventories" > + pr <- tarDarcsDir "pristine.hashed" > + BL.writeFile (packsDir </> "basic.tar.gz") . compress $ write (i : (is ++ > pr)) > + ps <- tarDarcsDir' "patches" $ \x -> all (x /=) ["unrevert", "pending", > + "pending.tentative"] > + BL.writeFile (packsDir </> "patches.tar.gz") . compress $ write ps > + where > + packsDir = darcsdir </> "packs" > + fileEntry' x = do > + content <- BL.fromChunks . return <$> gzReadFilePS x > + tp <- either fail return $ toTarPath False x > + return $ fileEntry tp content > + dirContents d f = map (d </>) . filter (\x -> head x /= '.' && f x) <$> > + getDirectoryContents d > + tarDarcsDir d = tarDarcsDir' d $ const True > + tarDarcsDir' d f = mapM fileEntry' =<< dirContents (darcsdir </> d) f > \end{code} Looks OK, although I would like to hear from you about memory behaviour of the code, as discussed before (IIRC). :) > hunk ./src/Darcs/Repository.hs 48 [SNIP pile of import wibbling] > hunk ./src/Darcs/Repository.hs 131 > +import qualified Data.ByteString.Lazy as BL > > #include "impossible.h" > > hunk ./src/Darcs/Repository.hs 235 > return IsPartial > > copyFullRepository :: forall p C(r u t). RepoPatch p => Repository p C(r u > t) -> IO () > -copyFullRepository fromrepository@(Repo fromdir opts rffrom _) = do > - copyInventory fromrepository > +copyFullRepository fromRepo@(Repo fromDir opts _ _) = do > debugMessage "Copying prefs" > hunk ./src/Darcs/Repository.hs 237 > - copyFileOrUrl opts (fromdir++"/"++darcsdir++"/prefs/prefs") > (darcsdir++"/prefs/prefs") (MaxAge 600) > - `catchall` return () > + copyFileOrUrl opts (fromDir ++ "/" ++ darcsdir ++ "/prefs/prefs") > + (darcsdir ++ "/prefs/prefs") (MaxAge 600) `catchall` return () (about reformatting: I am not complaining about how it looks now, but it helps review to do formatting changes in separate patch that says it's just formatting) > + b <- (Just <$> fetchFileLazyPS (fromDir ++ "/" ++ darcsdir ++ > + "/packs/basic.tar.gz") Uncachable) `catchall` return Nothing > + case b of > + Nothing -> copyNotPackedRepository fromRepo > + Just b' -> copyPackedRepository fromRepo b' > + > +copyNotPackedRepository :: forall p C(r u t). RepoPatch p => Repository p > C(r u t) -> IO () > +copyNotPackedRepository fromrepository@(Repo _ opts rffrom _) = do The "NotPacked" in the name is a bit edgy, but I can't think of anything better that's also clear enough, so keep it as it is. > + copyInventory fromrepository > debugMessage "Grabbing lock in new repository..." > hunk ./src/Darcs/Repository.hs 249 > - withRepoLock opts $- \torepository@(Repo _ _ rfto (DarcsRepository _ c)) -> > + withRepoLock opts $- \torepository@(Repo _ _ rfto _) -> > if formatHas HashedInventory rffrom && formatHas HashedInventory rfto > then do debugMessage "Writing working directory contents..." > createPristineDirectoryTree torepository "." Is this just a warning fix? > hunk ./src/Darcs/Repository.hs 268 > debugMessage "Writing the pristine" > pristineFromWorking torepository > +copyPackedRepository :: forall p C(r u t). RepoPatch p => > + Repository p C(r u t) -> BL.ByteString -> IO () > +copyPackedRepository fromRepo@(Repo fromDir opts _ (DarcsRepository _ > fromCache)) b = do > + Repo toDir _ toFormat (DarcsRepository toPristine toCache) <- > + identifyRepositoryFor fromRepo "." > + toCache2 <- unionRemoteCaches toCache fromCache fromDir > + let toRepo :: Repository p C(r u t) > + toRepo = Repo toDir opts toFormat $ DarcsRepository toPristine toCache2 > + fromPacksDir = fromDir ++ "/" ++ darcsdir ++ "/packs/" > + createDirectoryIfMissing False $ toDir </> darcsdir </> "inventories" > + createDirectoryIfMissing False $ toDir </> darcsdir </> "pristine.hashed" > + createDirectoryIfMissing False $ toDir </> darcsdir </> "patches" > + copySources toRepo fromDir > + -- unpack inventory & pristine cache > + writeCompressed . Tar.read $ decompress b > + createPristineDirectoryTree toRepo "." For all I can tell, this function is a complete misnomer: what this does is copy the existing pristine into the working copy. (!) It is out of scope for this patch, but I am noting down that it needs to be audited and renamed. > + -- pull new patches > + us <- readRepo toRepo > + them <- readRepo fromRepo > + comm :\/: unc <- return $ findCommonAndUncommon us them Hm, this is my sin, but the findCommonAndUncommon function actually does not return any "common" patches. I will rename it later... You probably want to rename "comm" and "unc" to "us'" and "them'". > + revertTentativeChanges This might be redundant, but let's keep it in for a good measure. > + Sealed pw <- tentativelyMergePatches toRepo "get" opts comm unc us' them' (due to above) > + invalidateIndex toRepo > + withGutsOf toRepo $ do > + finalizeRepositoryChanges toRepo > + applyToWorking toRepo opts pw > + return () Ok. > + -- get old patches > + writeCompressed . Tar.read . decompress =<< fetchFileLazyPS (fromPacksDir > ++ > + "patches.tar.gz") Uncachable Great. We should also make this interruptible later, like normal "get" is, with the result of getting a lazy repository. You can do this in a followup patch and I won't hold up pushing this just for that. > + where > + writeCompressed Tar.Done = return () > + writeCompressed (Tar.Next x xs) = case Tar.entryContent x of > + Tar.NormalFile x' _ -> do > + let p = Tar.entryPath x > + BL.writeFile p $ if "hashed_inventory" `isSuffixOf` p > + then x' > + else compress x' > + writeCompressed xs > + _ -> fail "Unexpected non-file tar entry" > + writeCompressed (Tar.Fail e) = fail e OK. > -- | writePatchSet is like patchSetToRepository, except that it doesn't > -- touch the working directory or pristine cache. > writePatchSet :: RepoPatch p => PatchSet p C(Origin x) -> [DarcsFlag] -> IO > (Repository p C(r u t)) > hunk ./src/Darcs/Repository.hs 411 > withCurrentDirectory dir $ readWorking >>= replacePristine repo > pristineFromWorking (Repo dir _ _ (DarcsRepository p _)) = > withCurrentDirectory dir $ createPristineFromWorking p > + > hunk ./src/Darcs/Repository/HashedRepo.hs 29 > addToTentativeInventory, > removeFromTentativeInventory, > readRepo, readTentativeRepo, > writeAndReadPatch, > writeTentativeInventory, copyRepo, > - readHashedPristineRoot, pris2inv > + readHashedPristineRoot, pris2inv, > copySources > ) where > > import System.Directory ( createDirectoryIfMissing ) > hunk ./src/Darcs/Repository/HashedRepo.hs 293 > createDirectoryIfMissing False (outr++"/"++darcsdir++"/inventories") > copyFileOrUrl opts (inr++"/"++darcsdir++"/hashed_inventory") > (outr++"/"++darcsdir++"/hashed_inventory") > Uncachable -- no need to copy anything but > hashed_inventory! > + copySources repo inr > + debugMessage "Done copying hashed inventory." > + > +copySources :: RepoPatch p => Repository p C(r u t) -> String -> IO () > +copySources repo@(Repo outr _ _ _) inr = do > let repoCache = extractCache $ modifyCache repo dropGlobalCaches > appendBinFile (outr++"/"++darcsdir++"/prefs/sources") (show $ repo2cache > inr `unionCaches` repoCache ) > hunk ./src/Darcs/Repository/HashedRepo.hs 300 > - debugMessage "Done copying hashed inventory." > where > dropGlobalCaches (Ca cache) = Ca $ filter notGlobalCache cache > notGlobalCache xs = case xs of Split off copySources from copyRepo. Makes sense. Does not change copyRepo semantics. So, there's some minor wibbling to do still, but other than that, awesome. I guess nothing of the mentioned issues warrants amending -- if you run into dependencies, just record new patch(es) on top. I will hold off pushing this till Thursday evening -- at that point, unless I run into bugs, I can push. Please try to address my comments by then -- if not, I will take care of the most pressing ones and will expect you to post followup patches to fix the rest. Thanks! Yours, Petr. _______________________________________________ darcs-users mailing list darcs-users@darcs.net http://lists.osuosl.org/mailman/listinfo/darcs-users