I have completed initial work on libwww pipelining. Output of darcs whatsnew
is attached (sorry for that, I will try to make a proper patch tomorrow).
What is done:
- libcurl functionality is implemented using libwww. Now pipelining works.
- New Libcurl module provides 3 functions:
* copyUrl - same as copyUrl from Curl.hs. It uses copyUrls and waitNextUrl.
* copyUrls - takes (filename, url) list, creates requests and adds
them to libwww. Does not load anything.
* waitNextUrl - starts libwww event loop and blocks until first url
loads (or error happens). After it returns it should be possible to
add more urls to queue using copyUrls again. waitNextUrl should be
called as many times as urls are in the queue.
At the moment the only place where copyUrls is used is get command.
But I hope this interface
is enough for Darcs. If not - we need to think of smth more complex.
Waiting for comments here.
What is missing:
- DARCS_PROXYUSERPWD is not used (but http_proxy works).
- Proper error handling.
- Not tested.
- ???
Performance:
- libwww with pipelining disabled:
> time ~/projects/darcs/darcs get http://darcs.net/repos/unstable
Copying patch 4847 of 4847... done.
Applying patch 4847 of 4847... done.
Finished getting.
~/projects/darcs/darcs get http://darcs.net/repos/unstable 23,95s
user 7,54s system 1% cpu 27:15,17 total
- libwww with pipelining enabled:
> time ~/projects/darcs/darcs get http://darcs.net/repos/unstable
Copying patch 4847 of 4847... done.
Applying patch 4847 of 4847... done.
Finished getting.
~/projects/darcs/darcs get http://darcs.net/repos/unstable 71,41s
user 97,55s system 45% cpu 6:14,02 total
Note that I have a poor quality line with 35Kb max. So I believe when
pipelining is enabled results are limited by connection speed.
The only thing I really do not like in libwww is that it is not
maintained upsream. And it looks like it will be removed from Debian,
see bug #440436. It is suggested to move to libcurl. So I did some
more experiments ant came to conclusion that doing the same with
libcurl is possible but would require much more work. I think adding
proper pipelining support to haskell http library is the way to go. I
plan to do some research on this topic.
Regards,
Dmitry
{
hunk ./GNUmakefile 48
- Curl.hs DateMatcher.lhs \
+ Libwww.hs DateMatcher.lhs \
hunk ./GNUmakefile 148
-ifeq ($(HAVE_LIBCURL),True)
-C_OBJS += src/hscurl.o
-GHCFLAGS += -DHAVE_CURL
-endif
+#ifeq ($(HAVE_LIBCURL),True)
+#C_OBJS += src/hscurl.o
+#GHCFLAGS += -DHAVE_CURL
+#endif
+
+#ifeq ($(HAVE_LIBWWW),True)
+C_OBJS += src/hslibwww.o
+GHCFLAGS += -DHAVE_LIBWWW
+CPPFLAGS += -I/usr/include/w3c-libwww -optc-DHAVE_CONFIG_H
+OPTLLDFLAGS += `libwww-config --libs`
+#endif
hunk ./src/Darcs/External.hs 54
-import Curl ( copyUrl )
-import Curl ( Cachable(..) )
+import Libwww ( copyUrl, copyUrls, waitNextUrl )
+import Libwww ( Cachable(..) )
hunk ./src/Darcs/External.hs 196
- then Curl.copyUrl u v cache
+ then Libwww.copyUrl u v cache
hunk ./src/Darcs/External.hs 260
- then doWithPatches opts (\n -> copyRemote (u++"/"++n) (d++"/"++n) cache) ns
+ then do copyUrls (map (\n -> ((u++"/"++n), (d++"/"++n))) ns) cache
+ doWithPatches opts (const Libwww.waitNextUrl) ns
addfile ./src/Libwww.hs
hunk ./src/Libwww.hs 1
+{-# OPTIONS -fffi #-}
+module Libwww ( copyUrl, copyUrls, waitNextUrl,
+ Cachable(Cachable, Uncachable, MaxAge) )
+where
+
+import System.IO
+import Foreign.C.Types ( CInt )
+#ifdef HAVE_LIBWWW
+import Foreign.C.String ( withCString, newCString, CString )
+import Foreign.Marshal.Array ( withArrayLen, withArray )
+import Foreign.Marshal.Alloc ( free )
+import Foreign.Ptr ( Ptr )
+import Control.Monad ( when )
+import System.Environment ( getEnv )
+import Autoconf ( darcs_version )
+#endif
+
+data Cachable = Cachable | Uncachable | MaxAge !CInt
+
+copyUrl :: String -> String -> Cachable -> IO ()
+#ifdef HAVE_LIBWWW
+copyUrl u f cache = do copyUrls [(u, f)] cache
+ libwww_wait_next_url
+ return ()
+#else
+copyUrl _ _ _ = fail "There is no libwww!"
+#endif
+
+copyUrls :: [ (String, String) ] -> Cachable -> IO ()
+#ifdef HAVE_LIBWWW
+copyUrls u cache =
+ withCString darcs_version $ \vstr ->
+ withCStringList (map fst u) $ \urls ->
+ withArrayLen urls $ \cnt uptr ->
+ withCStringList (map snd u) $ \filenames ->
+ withArray filenames $ \fptr -> do
+ pwd <- getProxyUserPwd
+ withCString pwd $ \pstr -> do
+ err <- libwww_request_urls vstr pstr fptr uptr (intToCInt cnt) (cachableToInt cache)
+ when (err /= 0) $ fail $ "Failed to download URL"
+#else
+copyUrls _ _ = fail "There is no libwww!"
+#endif
+
+waitNextUrl :: IO ()
+#ifdef HAVE_LIBWWW
+waitNextUrl = do libwww_wait_next_url
+ return ()
+#else
+waitNextUrl = fail "There is no libwww!"
+#endif
+
+#ifdef HAVE_LIBWWW
+cachableToInt :: Cachable -> CInt
+cachableToInt Cachable = -1
+cachableToInt Uncachable = 0
+cachableToInt (MaxAge n) = n
+
+intToCInt :: Int -> CInt
+intToCInt = fromInteger . toInteger
+
+foreign import ccall "hslibwww.h libwww_request_urls"
+ libwww_request_urls :: CString -> CString -> Ptr CString -> Ptr CString -> CInt -> CInt -> IO CInt
+
+foreign import ccall "hslibwww.h libwww_wait_next_url"
+ libwww_wait_next_url :: IO CInt
+
+getProxyUserPwd :: IO String
+getProxyUserPwd = do
+ getEnv "DARCS_PROXYUSERPWD" `catch` (\_ -> return "")
+
+withCStringList :: [String] -> ([CString] -> IO a) -> IO a
+withCStringList ss f = do css <- mapM newCString ss
+ r <- f css
+ mapM_ free css
+ return r
+#endif
addfile ./src/hslibwww.c
hunk ./src/hslibwww.c 1
+#include "hscurl.h"
+
+#include <stdio.h>
+#include <WWWLib.h>
+#include <WWWInit.h>
+
+static BOOL init_done = NO;
+static char* user_agent = "darcs";
+
+int terminate_handler(HTRequest *request,
+ HTResponse *response,
+ void *param,
+ int status)
+{
+ HTChunk *const chunk = HTRequest_context(request);
+ if (status == 200)
+ {
+ FILE *const f = fopen(param, "wb");
+ if (f != NULL)
+ {
+ if (HTChunk_size(chunk) > 0)
+ {
+ if (fwrite(HTChunk_data(chunk), HTChunk_size(chunk), 1, f) != 1)
+ perror("terminate_handler fwrite");
+ }
+ fclose(f);
+ }
+ else
+ perror("terminate_handler fopen");
+ }
+
+ HTChunk_delete(chunk);
+ HTRequest_delete(request);
+ free(param);
+
+ HTEventList_stopLoop();
+
+ return HT_OK;
+}
+
+int libwww_request_urls(const char *darcs_version,
+ const char *proxypass,
+ const char **filenames,
+ const char **urls,
+ int count,
+ int cache_time)
+{
+ if (init_done == NO)
+ {
+ HTProfile_newNoCacheClient(user_agent, darcs_version);
+ HTProxy_getEnvVar();
+ HTAlert_setInteractive(NO);
+ init_done = YES;
+ }
+
+ int i;
+ for (i = 0; i < count; ++i)
+ {
+ HTRequest *const request = HTRequest_new();
+ HTRequest_addAfter(request,
+ terminate_handler,
+ NULL,
+ strdup(filenames[i]),
+ HT_ALL,
+ HT_FILTER_LAST,
+ YES);
+ HTRequest_setOutputFormat(request, WWW_SOURCE);
+ if (cache_time == 0)
+ {
+ HTRequest_addGnHd(request, HT_G_PRAGMA_NO_CACHE);
+ HTRequest_addCacheControl(request, "no-cache", "");
+ }
+ else if (cache_time > 0)
+ {
+ char buf[8];
+ snprintf(buf, sizeof(buf), "%d", cache_time);
+ buf[sizeof(buf) - 1] = '\0';
+ HTRequest_addCacheControl(request, "max-age", buf);
+ }
+
+ HTChunk *const chunk = HTLoadToChunk(urls[i], request);
+ if (chunk == NULL)
+ printf("\n\nNOT ACCEPTED\n\n");
+ else
+ HTRequest_setContext(request, chunk);
+ }
+
+ return 0;
+}
+
+int libwww_wait_next_url()
+{
+ if (HTNet_isEmpty() == NO)
+ HTEventList_newLoop();
+
+ return (HTNet_isEmpty() == YES);
+}
addfile ./src/hslibwww.h
hunk ./src/hslibwww.h 1
+int libwww_request_urls(const char *darcs_version,
+ const char *proxyuserpass,
+ const char **filenames,
+ const char **urls,
+ int count,
+ int cache_time);
+
+int libwww_wait_next_url();
}
_______________________________________________
darcs-devel mailing list
[email protected]
http://lists.osuosl.org/mailman/listinfo/darcs-devel