On Tue, Dec 04, 2001 at 09:01:15PM +0300, Alexander V. Lukyanov wrote:
> BTW, what do you think about caching in LsCache result of CHANGE_DIR?

I've implemented this.

I originally played around with making LsCache an inheritable (FsCache),
and putting the variant-type cached data in a derived class.  This was
more code than it was worth, and it got tricky: you had to explicitely
cast the cache entry up to the derived class to get at the data (using a
GetType() to find out if it was the type you wanted); and this made testing
for equality difficult (multidispatch in C++? yuck.)

I reverted that, and just made it another mode.

I've also added a single FileSet cache and a Find() interface for it.
This helps the most common case of repeatedly parsing the same
directory; for example, "glob cls -ld *".  It's freed if a same-loc is
freed, to make sure it never sticks around indefinitely.  I only used
this in IsDirectory so far; I kept a copy of that function before this
in case you don't want it.  Strip one or the other.  (The API for this
might need to be adjusted to make it more useful elsewhere, but I'll
deal with that if it becomes needed.)

I started working on Chmod with textual modes.  (Some servers do this
locally; since I assume many may not, and we can handle this ourselves, I
won't depend on that.)  This makes Chmod need more information than
TreatFileJob gives.  I began trying to extend TreatFileJob--letting the
derived say what information it needs, and passing a FileInfo instead of
a path.

Except that's basically what FindJob does (among other things.)

I don't think FindJob needs a whole lot of extending to let it take over
Chmod.  The major things: 
1: ability to not be recursive (done; set_maxdepth(0).)
2: ability to deal with files as argument
3: ability to not verify files at all, optionally.  That is, when the
class and the derived don't need other information about files (including
file type for recursion) and the derived says not to verify arguments,
don't do a LIST at all.  (This to make sure plain commands don't do
unnecessary LISTs.)

2 is the tricky one, and is useful in other places (du file, find file).

One more thing: if I cls in a large directory (to slow things down), I
see "connection idle" or "connection limit reached" (when out of cache)
flicker past.  Odd ...

So, changes:

LsCache.cc, LsCache.h, ftpclass.cc: Add explicit filetype caching.
LsCache.cc, LsCache.cc: Cache a single fileset.
GetFileInfo.cc, GetFileInfo.h: Fix up cache lookups; Close() connection as
needed.

-- 
Glenn Maynard
Index: LsCache.cc
===================================================================
RCS file: /home/lav/cvsroot/lftp/src/LsCache.cc,v
retrieving revision 1.14
diff -u -r1.14 LsCache.cc
--- LsCache.cc  2001/12/05 17:05:57     1.14
+++ LsCache.cc  2001/12/08 21:27:25
@@ -21,6 +21,7 @@
 /* $Id: LsCache.cc,v 1.14 2001/12/05 17:05:57 lav Exp $ */
 
 #include <config.h>
+#include <assert.h>
 #include "LsCache.h"
 #include "xmalloc.h"
 #include "plural.h"
@@ -32,6 +33,11 @@
 TimeInterval LsCache::ttl("60m");  // time to live = 60 minutes
 LsCache::ExpireHelper LsCache::expire_helper;
 
+FileSet *LsCache::fset=0;
+FileAccess *LsCache::fset_loc=0;
+int LsCache::fset_m=0;
+char *LsCache::fset_a=0;
+           
 void LsCache::CheckSize()
 {
    if(sizelimit<0)
@@ -130,10 +136,46 @@
    return 0;
 }
 
+FileSet *LsCache::Find(FileAccess *p_loc,const char *a,int m)
+{
+   if(!fset || m != fset_m && strcmp(fset_a, a) && !p_loc->SameLocationAs(fset_loc))
+      return fset;
+
+   const char *buf_c;
+   int bufsiz;
+   if(!Find(p_loc, a, m, &buf_c, &bufsiz))
+      return 0;
+
+   FileSet *new_fset=p_loc->ParseLongList(buf_c, bufsiz);
+   assert(fset); /* should not have unparsable lists cached */
+   
+   free_fset();
+
+   fset=new_fset;
+   fset_a=xstrdup(a);
+   fset_m=m;
+   fset_loc=p_loc->Clone();
+   
+   return fset;
+}
+
+void LsCache::free_fset()
+{
+   SMTask::Delete(fset_loc);
+   xfree(fset_a);
+   delete fset;
+
+   fset_loc=0;
+   fset_a=0;
+   fset=0;
+}
+
 LsCache::~LsCache()
 {
    if(expire_helper.expiring==this)
       expire_helper.expiring=0;
+   if(fset_loc && loc->SameLocationAs(fset_loc))
+      free_fset();
    SMTask::Delete(loc);
    xfree(data);
    xfree(arg);
@@ -244,21 +286,45 @@
    }
 }
 
-/* This is a hint only function. If file type is really needed, use ListInfo */
-/* Returns -1 if type is not known for sure */
+/* Mark a path as a directory or file. (We have other ways of knowing this;
+ * this is the most explicit and least expensive.) */
+void LsCache::SetDirectory(FileAccess *p_loc, const char *path, bool dir)
+{
+   char *origdir = alloca_strdup(p_loc->GetCwd());
+
+   p_loc->Chdir(path,false);
+   const char *entry = dir? "1":"0";
+   LsCache::Add(p_loc,"",FileAccess::CHANGE_DIR, entry, strlen(entry));
+   p_loc->Chdir(origdir,false);
+}
+
+/* This is a hint function. If file type is really needed, use GetFileInfo
+ * with showdir set to true. (GetFileInfo uses this function.)
+ * Returns -1 if type is not known, 1 if a directory, 0 if a file. */
+
 int LsCache::IsDirectory(FileAccess *p_loc,const char *dir_c)
 {
    if(*dir_c && dir_c[strlen(dir_c)-1] == '/')
       return 1;
 
    char *origdir = alloca_strdup(p_loc->GetCwd());
-
-   /* (This is cheap, so do this first.)  We know the path is a directory
-    * if we have a cache entry for it.  This is true regardless of the list
-    * type. */
-   /* TODO: or if we have a subdirectory cached; ie /foo is a dir if we
-    * know about /foo/bar */
    p_loc->Chdir(dir_c, false);
+
+   /* Cheap tests first: 
+    *
+    * First, we know the path is a directory or not if we have an expicit
+    * CHANGE_DIR entry for it. */
+   const char *buf_c;
+   int bufsiz;
+   if(Find(p_loc, "", FileAccess::CHANGE_DIR, &buf_c,&bufsiz))
+   {
+      p_loc->SetCwd(origdir);
+      return buf_c[0] == '1';
+   }
+
+   /* We know the path is a directory if we have a cache entry for it.  This is
+    * true regardless of the list type.  (Unless it's a CHANGE_DIR entry; do this
+    * test after the CHANGE_DIR check.) */
    int ret = Find(p_loc, "", -1, 0,0);
    p_loc->SetCwd(origdir);
    if(ret)
@@ -275,8 +341,59 @@
    }
 
    ret = -1; /* don't know */
+   FileSet *fs=Find(p_loc, "", FA::LONG_LIST);
+   if(fs)
+   {
+      FileInfo *fi=fs->FindByName(basename_ptr(dir_c));
+      if(fi && (fi->defined&fi->TYPE))
+        ret = (fi->filetype == fi->DIRECTORY);
+      delete fs;
+   }
+
+   p_loc->SetCwd(origdir);
+   return ret;
+}
+
+#if 0
+int LsCache::IsDirectory(FileAccess *p_loc,const char *dir_c)
+{
+   if(*dir_c && dir_c[strlen(dir_c)-1] == '/')
+      return 1;
+
+   char *origdir = alloca_strdup(p_loc->GetCwd());
+   p_loc->Chdir(dir_c, false);
+
+   /* Cheap tests first: 
+    *
+    * First, we know the path is a directory or not if we have an expicit
+    * CHANGE_DIR entry for it. */
    const char *buf_c;
    int bufsiz;
+   if(Find(p_loc, "", FileAccess::CHANGE_DIR, &buf_c,&bufsiz))
+   {
+      p_loc->SetCwd(origdir);
+      return buf_c[0] == '1';
+   }
+
+   /* We know the path is a directory if we have a cache entry for it.  This is
+    * true regardless of the list type.  (Unless it's a CHANGE_DIR entry; do this
+    * test after the CHANGE_DIR check.) */
+   int ret = Find(p_loc, "", -1, 0,0);
+   p_loc->SetCwd(origdir);
+   if(ret)
+      return 1;
+
+   /* We know this is a file or a directory if the dirname is cached and
+    * contains the basename. */
+   char *dir = alloca_strdup(dir_c);
+   char *sl = strrchr(dir, '/');
+   if(sl)
+   {
+      *sl = 0;
+      p_loc->Chdir(dir, false);
+   }
+
+   ret = -1; /* don't know */
    if(Find(p_loc, "", FA::LONG_LIST, &buf_c, &bufsiz))
    {
       FileSet *fs = p_loc->ParseLongList(buf_c, bufsiz);
@@ -292,3 +409,4 @@
    p_loc->SetCwd(origdir);
    return ret;
 }
+#endif
Index: LsCache.h
===================================================================
RCS file: /home/lav/cvsroot/lftp/src/LsCache.h,v
retrieving revision 1.12
diff -u -r1.12 LsCache.h
--- LsCache.h   2001/12/05 16:56:44     1.12
+++ LsCache.h   2001/12/08 21:27:25
@@ -43,6 +43,12 @@
    static bool use;
    static long sizelimit;
    static TimeInterval ttl;
+   /* single cached fileset */
+   static FileSet *fset;
+   static FileAccess *fset_loc;
+   static int fset_m;
+   static char *fset_a;
+   static void free_fset();
 
    class ExpireHelper;
    friend class LsCache::ExpireHelper;
@@ -64,8 +70,10 @@
    static void Add(FileAccess *p_loc,const char *a,int m,const char *d,int l);
    static void Add(FileAccess *p_loc,const char *a,int m,const Buffer *ubuf);
    static int Find(FileAccess *p_loc,const char *a,int m,const char **d, int *l);
+   static FileSet *Find(FileAccess *p_loc,const char *a,int m);
 
    static int IsDirectory(FileAccess *p_loc,const char *dir);
+   static void SetDirectory(FileAccess *p_loc, const char *path, bool dir);
 
    enum change_mode { FILE_CHANGED, DIR_CHANGED, TREE_CHANGED };
    static void Changed(change_mode m,FileAccess *f,const char *what);
Index: ftpclass.cc
===================================================================
RCS file: /home/lav/cvsroot/lftp/src/ftpclass.cc,v
retrieving revision 1.222
diff -u -r1.222 ftpclass.cc
--- ftpclass.cc 2001/12/05 15:03:44     1.222
+++ ftpclass.cc 2001/12/08 21:27:27
@@ -40,6 +40,7 @@
 #include "FtpDirList.h"
 #include "log.h"
 #include "FileCopyFtp.h"
+#include "LsCache.h"
 
 #include "ascii_ctype.h"
 #include "misc.h"
@@ -3327,11 +3328,13 @@
            cwd=xstrdup(RespQueue[RQ_head].path);
         }
         set_real_cwd(cwd);
+        LsCache::SetDirectory(this, RespQueue[RQ_head].path, true);
         break;
       }
       if(is5XX(act))
       {
         SetError(NO_FILE,line);
+        LsCache::SetDirectory(this, RespQueue[RQ_head].path, false);
         break;
       }
       Disconnect();
@@ -3434,6 +3437,7 @@
         }
         if(!home)
            set_home(home_auto);
+        LsCache::SetDirectory(this, home, true);
         break;
       }
       break;
Index: GetFileInfo.cc
===================================================================
RCS file: /home/lav/cvsroot/lftp/src/GetFileInfo.cc,v
retrieving revision 1.6
diff -u -r1.6 GetFileInfo.cc
--- GetFileInfo.cc      2001/12/05 16:56:44     1.6
+++ GetFileInfo.cc      2001/12/08 21:27:28
@@ -6,38 +6,48 @@
 #include "misc.h"
 #include "LsCache.h"
 
+/* Get information about a path (dir or file).  If _dir is a file, get
+ * information about that file only.  If it's a directory, get information
+ * about files in it.  If _showdir is true, act like ls -d: get information
+ * about the directory itself.
+ *
+ * To find out if a path is a directory, attempt to chdir into it.  If it
+ * succeeds it's a path, otherwise it's a directory (or there was an error).
+ * Do this by setting the verify argument to Chdir() to true.
+ * 
+ * If the cache knows the file type of _dir, avoid changing directories if
+ * possible, so cached listings don't touch the connection at all.  
+ *
+ * We still need to Chdir() if we're operating out of cache (that's how you
+ * look up cache entries).  However, we don't really want to change the
+ * directory of the session (ie. send a CWD if we're FTP), so set verify to
+ * false if we're operating out of cache.
+ *
+ * Note: it's possible to know the type of a path (ie. its parent is cached)
+ * but not its contents.  This can lead to some inconsistencies, but only in
+ * fairly contrived situations (ie. "cls dir/", then change a directory in
+ * dir/ to a file).  It's not possible to fix completely, and a partial fix
+ * would cause other problems, so it's not worth bothering with.
+ */ 
 GetFileInfo::GetFileInfo(FileAccess *a, const char *_dir, bool _showdir)
    : ListInfo(a,0)
 {
    dir=xstrdup(_dir? _dir:"");
    showdir=_showdir;
-   state=CHANGE_DIR;
+   state=INITIAL;
    tried_dir=tried_file=false;
    result=0;
    realdir=0;
    li=0;
+   from_cache=0;
+   saved_error_text=0;
 
    origdir=xstrdup(session->GetCwd());
-
-   if(_showdir) tried_dir = true;
-
-   /* if we're not showing directories, try to skip tests we don't need */
-   if(!_showdir) switch(LsCache::IsDirectory(a,dir))
-   {
-   case 0:
-      tried_dir = true; /* it's a file */
-      break;
-   case 1:
-      tried_file = true; /* it's a dir */
-      break;
-   }
-
-   assert(!tried_dir || !tried_file); /* always do at least one */
-   saved_error_text=0;
 }
 
 GetFileInfo::~GetFileInfo()
 {
+   session->Close();
    Delete(li);
    xfree(saved_error_text);
    xfree(dir);
@@ -48,12 +58,31 @@
 int GetFileInfo::Do()
 {
    int res;
+   int m=STALL;
 
    if(Done())
-      return STALL;
+      return m;
 
    switch(state)
    {
+   case INITIAL:
+      state=CHANGE_DIR;
+
+      /* if we're not showing directories, try to skip tests we don't need */
+      if(use_cache && !showdir) switch(LsCache::IsDirectory(session,dir))
+      {
+        case 0:
+           tried_dir = true; /* it's a file */
+           from_cache = true;
+           break;
+        case 1:
+           tried_file = true; /* it's a dir */
+           from_cache = true;
+           break;
+      }
+   
+      assert(!tried_dir || !tried_file); /* always do at least one */
+
    case CHANGE_DIR:
       if(tried_dir && tried_file) {
         /* We tried both; no luck.  Fail. */
@@ -86,14 +115,17 @@
            *slash=0;
       }
 
-      session->Chdir(realdir, true);
+      /* See top comments for logic here: */
+      session->Chdir(realdir, !from_cache);
       state=CHANGING_DIR;
-      return MOVED;
+      m=MOVED;
 
    case CHANGING_DIR:
       res=session->Done();
       if(res==FA::IN_PROGRESS)
-        return STALL;
+        return m;
+      session->Close();
+
       if(res<0)
       {
         /* Failed.  Save the error, then go back and try to CD again.
@@ -113,7 +145,7 @@
       li->Need(need);
       SetExclude(exclude_prefix, rxc_exclude, rxc_include);
       state=GETTING_LIST;
-      return MOVED;
+      m=MOVED;
 
    case GETTING_LIST:
       if(li->Error()) {
@@ -122,9 +154,10 @@
       }
 
       if(!li->Done())
-        return STALL;
+        return m;
 
       state=DONE;
+      m=MOVED;
 
       /* Got the list.  Steal it from the listinfo: */
       result=li->GetResult();
@@ -146,7 +179,7 @@
            SetError(buf);
            xfree(buf);
            delete result; result=0;
-           return MOVED;
+           goto done;
         }
 
         /* If we're not listing directories as files, and the file is a
@@ -157,7 +190,7 @@
            SetError(buf);
            xfree(buf);
            delete result; result=0;
-           return MOVED;
+           goto done;
         }
 
         FileSet *newresult=new FileSet();
@@ -167,16 +200,15 @@
       }
 
       result->PrependPath(realdir);
-
-      return MOVED;
 
+done:
    case DONE:
-      if(done)
-        return STALL;
-
-      done=true;
-      session->Chdir(origdir, false);
-      return MOVED;
+      if(!done)
+      {
+        done=true;
+        session->Chdir(origdir, false);
+      }
+      return m;
    }
 
    abort();
Index: GetFileInfo.h
===================================================================
RCS file: /home/lav/cvsroot/lftp/src/GetFileInfo.h,v
retrieving revision 1.2
diff -u -r1.2 GetFileInfo.h
--- GetFileInfo.h       2001/12/05 15:03:43     1.2
+++ GetFileInfo.h       2001/12/08 21:27:28
@@ -40,11 +40,13 @@
 
    bool showdir;
 
-   enum state_t { CHANGE_DIR, CHANGING_DIR, GETTING_LIST, DONE } state;
+   enum state_t { INITIAL, CHANGE_DIR, CHANGING_DIR, GETTING_LIST, DONE } state;
    /* whether we've tried to cd to the whole dir (treating it as a dir): */
    bool tried_dir;
    /* and whether we've tried to cd to the basename (treating it as a file): */
    bool tried_file;
+   /* whether we found out the file type from cache */
+   bool from_cache;
 
    char *saved_error_text;
 

Reply via email to