Hi Pete,
Attached patch fixes the size computation bug (I hope!). Also attached the
one liner change to accomodate encode related print statements appear in
the logs.

However, RobL pointed out to me that pvfs2-cp still has data corruption
issues, which unfortunately don't get triggered thru the VFS as easily.
So I am going to dig that a little bit and see  if I can spot something
obvious.
Thanks for your comments/fixes on the encoding issues!
Murali

On Thu, 5 Jan 2006, Pete Wyckoff wrote:

> [EMAIL PROTECTED] wrote on Wed, 04 Jan 2006 21:26 -0600:
> > Could you try the attached patch and let me know if it fixes the crash?
> > (There are still some other bugs like the size of the file reported is
> > off-by-one, but I haven't dug deep into the SMALL_IO code just yet..)
>
> Yes, that fixes the crash, thanks!  But it is indeed off-by-one:
>
>     piv001$ tar tvfz tarball.tgz
>     drwxr-xr-x pw/oscsys         0 2005-12-23 17:15:49 ./
>     -rw-r--r-- pw/oscsys    180224 2005-12-23 17:15:40 ./data
>     piv001$ tar xfzvC tarball.tgz /pvfs
>     ./
>     ./data
>     piv001$ pvfs2-ls -la /pvfs
>     drwxrwxrwx    1 pw       oscsys          4096 2006-01-05 10:05 .
>     drwxrwxrwx    1 pw       oscsys          4096 2006-01-05 10:05 .. (faked)
>     -rw-r--r--    1 pw       oscsys        180223 2005-12-23 17:15 data
>     drwxrwxrwx    1 pw       oscsys          4096 2006-01-05 10:04 lost+found
>
> But I don't see your debugging statements in the logs when I turn on
> verbose on the client; weird.  Server logs attached too just in case those
> are interesting.
>
> > Also, could you look over and see if the encode_skip4 that I added will
> > fix those pesky alignment warnings on 64 bit machines or if they are
> > unnecessary?
>
> Yours looks fine, but a bit more direct would be to remove the existing
> skip4 and let the fs_id and enum nestle together in a single 64-bit
> word.  It didn't break anything on ia64 (modified patch attached.)
> There are other unrelated unaligned problems that have cropped up in
> the past few months that I'll take a look at, though.
>
> > Sam wrote the SMALL_IO protocol and he knows it best. I am pretty sure
> > this is an ugly fix if at all it works :)
> > I don't know how/if Rob/Sam want SMALL_IO protocol to be disabled since
> > we need people to use the CVS head version and find the last remaining
> > bugs (hopefully!). We could make an environment variable/mount-time option
> > that would disable small i/o changes temporarily but I think that decision 
> > is upto
> > them..
>
> Agree, it should be fixed.  I was just under pressure here to get
> past some old bugs and was hoping to do so by moving up to the CVS
> head where I know at least one has been addressed (append bug).
>
>               -- Pete
>
Index: src/common/misc/pvfs2-debug.c
===================================================================
RCS file: /anoncvs/pvfs2/src/common/misc/pvfs2-debug.c,v
retrieving revision 1.35
diff -u -r1.35 pvfs2-debug.c
--- src/common/misc/pvfs2-debug.c       20 Dec 2005 00:08:27 -0000      1.35
+++ src/common/misc/pvfs2-debug.c       5 Jan 2006 21:21:55 -0000
@@ -50,6 +50,7 @@
     { "reqsched", GOSSIP_REQ_SCHED_DEBUG },
     { "flowproto", GOSSIP_FLOW_PROTO_DEBUG },
     { "flow", GOSSIP_FLOW_DEBUG },
+    { "decode", GOSSIP_ENDECODE_DEBUG},
     { "ncache", GOSSIP_NCACHE_DEBUG },
     { "mmaprcache", GOSSIP_MMAP_RCACHE_DEBUG },
     { "acache", GOSSIP_ACACHE_DEBUG },
Index: src/io/description/dist-simple-stripe.c
===================================================================
RCS file: /anoncvs/pvfs2/src/io/description/dist-simple-stripe.c,v
retrieving revision 1.11
diff -u -r1.11 dist-simple-stripe.c
--- src/io/description/dist-simple-stripe.c     22 Dec 2005 05:48:37 -0000      
1.11
+++ src/io/description/dist-simple-stripe.c     5 Jan 2006 21:21:55 -0000
@@ -48,10 +48,12 @@
     PVFS_simple_stripe_params * dparam = (PVFS_simple_stripe_params *)params;
     uint32_t server_nr = fd->server_nr;
     uint32_t server_ct = fd->server_ct;
-    PVFS_size strips_div = physical_size / (dparam->strip_size + 1);
-    PVFS_size strips_mod = physical_size % (dparam->strip_size + 1);
+    PVFS_size strips_div = physical_size / dparam->strip_size;
+    PVFS_size strips_mod = physical_size % dparam->strip_size;
 
-    return (strips_div * dparam->strip_size * server_ct) +
+    if (strips_mod != 0)
+        strips_div++;
+    return ((strips_div - 1) * dparam->strip_size * server_ct) +
            (dparam->strip_size * server_nr) +
            strips_mod;
 }
@@ -66,7 +68,9 @@
     PVFS_size strips_div = physical_offset / dparam->strip_size;
     PVFS_size strips_mod = physical_offset % dparam->strip_size;
 
-    return (strips_div * dparam->strip_size * server_ct) +
+    if (strips_mod != 0)
+        strips_div++;
+    return ((strips_div - 1) * dparam->strip_size * server_ct) +
            (dparam->strip_size * server_nr) +
            strips_mod;
 }
Index: src/proto/pvfs2-req-proto.h
===================================================================
RCS file: /anoncvs/pvfs2/src/proto/pvfs2-req-proto.h,v
retrieving revision 1.132
diff -u -r1.132 pvfs2-req-proto.h
--- src/proto/pvfs2-req-proto.h 14 Dec 2005 21:50:30 -0000      1.132
+++ src/proto/pvfs2-req-proto.h 5 Jan 2006 21:21:56 -0000
@@ -17,6 +17,7 @@
 #include "pvfs2-request.h"
 #include "pint-request.h"
 #include "pvfs2-mgmt.h"
+#include "gossip.h"
 
 /* update PVFS2_PROTO_MAJOR on wire protocol changes that break backwards
  * compatibility (such as changing the semantics or protocol fields for an
@@ -945,11 +946,11 @@
     struct PINT_Request * file_req;
     PVFS_offset file_req_offset;
     PVFS_size aggregate_size;
-    int segments;
 
     /* these are used for writes to map the regions of the memory buffer
      * to the contiguous encoded message.  They don't get encoded.
      */
+    int segments;
     PVFS_offset offsets[SMALL_IO_MAX_SEGMENTS];
     PVFS_size sizes[SMALL_IO_MAX_SEGMENTS];
 
@@ -958,36 +959,51 @@
 
 #ifdef __PINT_REQPROTO_ENCODE_FUNCS_C
 #define encode_PVFS_servreq_small_io(pptr,x) do { \
+    void *oldptr = (*pptr);\
     encode_PVFS_handle(pptr, &(x)->handle); \
     encode_PVFS_fs_id(pptr, &(x)->fs_id); \
-    encode_skip4(pptr,); \
     encode_enum(pptr, &(x)->io_type); \
     encode_uint32_t(pptr, &(x)->server_nr); \
     encode_uint32_t(pptr, &(x)->server_ct); \
+    encode_skip4(pptr,); \
     encode_PINT_dist(pptr, &(x)->dist); \
     encode_PINT_Request(pptr, &(x)->file_req); \
     encode_PVFS_offset(pptr, &(x)->file_req_offset); \
-    encode_PVFS_size(pptr, &(x)->aggregate_size); \
     if ((x)->io_type == PVFS_IO_WRITE) \
     { \
         int i = 0; \
-        for(; i < (x)->segments; ++i) \
+        PVFS_size _aggregate_size = 0;\
+        for (i = 0; i < (x)->segments; ++i) \
         { \
+            _aggregate_size += (x)->sizes[i];\
+        } \
+        (x)->aggregate_size = _aggregate_size;\
+        encode_PVFS_size(pptr, &(x)->aggregate_size); \
+        for (i = 0; i < (x)->segments; ++i) \
+        { \
+            gossip_debug(GOSSIP_ENDECODE_DEBUG, "offsets[%d] = %llu, sizes[%d] 
= %llu\n", \
+                    i, llu((x)->offsets[i]), i, llu((x)->sizes[i]));\
             memcpy((*pptr), \
                    (char *)(x)->buffer + ((x)->offsets[i]), \
                    (x)->sizes[i]); \
             (*pptr) += (x)->sizes[i]; \
         } \
     } \
+    else {\
+        encode_PVFS_size(pptr, &(x)->aggregate_size); \
+    }\
+    gossip_debug(GOSSIP_ENDECODE_DEBUG, "%p -> %p %lu\n", oldptr, (*pptr),\
+            (unsigned long)(*pptr)- (unsigned long)oldptr);\
 } while (0)
 
 #define decode_PVFS_servreq_small_io(pptr,x) do { \
+    void *oldptr = (*pptr);\
     decode_PVFS_handle(pptr, &(x)->handle); \
     decode_PVFS_fs_id(pptr, &(x)->fs_id); \
-    decode_skip4(pptr,); \
     decode_enum(pptr, &(x)->io_type); \
     decode_uint32_t(pptr, &(x)->server_nr); \
     decode_uint32_t(pptr, &(x)->server_ct); \
+    decode_skip4(pptr,); \
     decode_PINT_dist(pptr, &(x)->dist); \
     decode_PINT_Request(pptr, &(x)->file_req); \
     PINT_request_decode((x)->file_req); /* unpacks the pointers */ \
@@ -1002,6 +1018,8 @@
         (x)->buffer = (*pptr); \
         (*pptr) += (x)->aggregate_size; \
     } \
+    gossip_debug(GOSSIP_ENDECODE_DEBUG, "%p -> %p %lu\n", oldptr, (*pptr),\
+            (unsigned long)(*pptr) - (unsigned long)oldptr);\
 } while (0)
 #endif
 
_______________________________________________
PVFS2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to