Re: [PATCH] don't expect inode name to be NUL-terminated (avoid read overrun)

2010-09-14 Thread Jeff Garzik

On 09/10/2010 08:55 AM, Jim Meyering wrote:


* server/msg.c (msg_get): Copy only name_len bytes, then NUL-terminate,
rather than using snprintf to copy up to and including nonexistent NUL.
---

valgrind exposed this.  The use of snprintf would have been
correct if the inode name buffer (following the struct raw_inode)
were NUL-terminated, but it is not.


applied -- good catch

out of curiosity, what is your patch base?

We combined cld and chunkd into a single 'hail' pkg, and from the 
pathname, your patch was generated from the older cld pkg.  We'd like to 
find the source and replace cld/chunkd with 'hail'.


F12?  F13?  rawhide?

Thanks,

Jeff




--
To unsubscribe from this list: send the line unsubscribe hail-devel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[hail patch 0/3] chunkd: on-disk checksumming and get-partial operation

2010-09-14 Thread Jeff Garzik

This patchset is just about ready to go upstream.  Just need to write a
couple tests (familiar refrain eh?:)).

These changes add a new Get-Partial-Object (GET_PART) chunkd operation.

GET_PART permits partial retrieval of an object, by adding an
(offset,length) pair to the standard Get-Object (GET) operation.
length==0 is special-cased as meaning retrieve until end of object.

The maximum number of bytes that may be requested in a single GET_PART
request is 4 x 64k blocks (256k).  Larger lengths will be truncated
down to the maximum.

Because we currently only store whole-object SHA1 checksums, we are left
without an ability to verify on-disk data is valid, when retrieving a
subset of an object.  Thus, a necessary pre-req of GET_PART is changing
the checksum scheme, which is done as follows:

* objects are defined as runs of 64k logical blocks
* checksums are stored on-disk for each 64k in an object
* Rather than returning the stored SHA1 checksum, which serves
  to verify both on-disk and network integrity, we break this
  into two steps,
* verify per-64k checksums at GET_PART time
* generate on-the-fly SHA1 checksum for GET_PART
  returned data

The chunkd network protocol supports any offset/length, including
not-64k-aligned values.  However, failure to align GET_PART requests on
64k boundaries will result in reduced performance, due to additional
work chunkd must perform [and then throw away], because chunkd now works
in 64k chunks internally.

This is a major protocol milestone, and should immediately enable sane
usage by nfs4d and itd (see wiki if unfamiliar), as well as hopefully
providing useful benefits to tabled as well.

--
To unsubscribe from this list: send the line unsubscribe hail-devel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[hail patch 1/3] chunkd: Add checksum table to on-disk format

2010-09-14 Thread Jeff Garzik

commit f1de17a6e2b3afdbfbfa581228280b65a4a17e5f
Author: Jeff Garzik j...@garzik.org
Date:   Thu Aug 5 17:47:03 2010 -0400

chunkd: Add checksum table to on-disk format, one sum per 64k of data

Signed-off-by: Jeff Garzik jgar...@redhat.com

 chunkd/be-fs.c |  162 -
 1 file changed, 137 insertions(+), 25 deletions(-)

diff --git a/chunkd/be-fs.c b/chunkd/be-fs.c
index 4b851a7..d714e7c 100644
--- a/chunkd/be-fs.c
+++ b/chunkd/be-fs.c
@@ -53,14 +53,23 @@ struct fs_obj {
int in_fd;
char*in_fn;
off_t   sendfile_ofs;
+
+   size_t  checked_bytes;
+   SHA_CTX checksum;
+   unsigned intcsum_idx;
+   void*csum_tbl;
+   size_t  csum_tbl_sz;
+
+   unsigned intn_blk;
 };
 
 struct be_fs_obj_hdr {
charmagic[4];
uint32_tkey_len;
uint64_tvalue_len;
+   uint32_tn_blk;
 
-   charreserved[16];
+   charreserved[12];
 
unsigned char   hash[CHD_CSUM_SZ];
charowner[128];
@@ -208,6 +217,8 @@ static struct fs_obj *fs_obj_alloc(void)
obj-out_fd = -1;
obj-in_fd = -1;
 
+   SHA1_Init(obj-checksum);
+
return obj;
 }
 
@@ -318,6 +329,17 @@ static bool key_valid(const void *key, size_t key_len)
return true;
 }
 
+static unsigned int fs_blk_count(uint64_t data_len)
+{
+   uint64_t n_blk;
+
+   n_blk = data_len  CHUNK_BLK_ORDER;
+   if (data_len  (CHUNK_BLK_SZ - 1))
+   n_blk++;
+
+   return (unsigned int) n_blk;
+}
+
 struct backend_obj *fs_obj_new(uint32_t table_id,
   const void *key, size_t key_len,
   uint64_t data_len,
@@ -325,6 +347,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
 {
struct fs_obj *obj;
char *fn = NULL;
+   size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
off_t skip_len;
 
@@ -339,6 +362,13 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
return NULL;
}
 
+   obj-n_blk = fs_blk_count(data_len);
+   csum_bytes = obj-n_blk * CHD_CSUM_SZ;
+   obj-csum_tbl = malloc(csum_bytes);
+   if (!obj-csum_tbl)
+   goto err_out;
+   obj-csum_tbl_sz = csum_bytes;
+
/* build local fs pathname */
fn = fs_obj_pathname(table_id, key, key_len);
if (!fn)
@@ -359,7 +389,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
obj-out_fn = fn;
 
/* calculate size of front-of-file metadata area */
-   skip_len = sizeof(struct be_fs_obj_hdr) + key_len;
+   skip_len = sizeof(struct be_fs_obj_hdr) + key_len + csum_bytes;
 
/* position file pointer where object data (as in, not metadata)
 * will begin
@@ -397,7 +427,10 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const 
char *user,
struct be_fs_obj_hdr hdr;
ssize_t rrc;
uint64_t value_len, tmp64;
+   size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
+   struct iovec iov[2];
+   size_t total_rd_len;
 
if (!key_valid(key, key_len)) {
*err_code = che_InvalidKey;
@@ -457,23 +490,45 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const 
char *user,
goto err_out;
 
value_len = GUINT64_FROM_LE(hdr.value_len);
+   obj-n_blk = GUINT32_FROM_LE(hdr.n_blk);
+   csum_bytes = obj-n_blk * CHD_CSUM_SZ;
 
/* verify file size large enough to contain value */
-   tmp64 = value_len + sizeof(hdr) + key_len;
+   tmp64 = value_len + sizeof(hdr) + key_len + csum_bytes;
if (G_UNLIKELY(st.st_size  tmp64)) {
applog(LOG_ERR, obj(%s) size error, too small, obj-in_fn);
goto err_out;
}
 
+   /* verify expected size of checksum table */
+   if (G_UNLIKELY(fs_blk_count(value_len) != obj-n_blk)) {
+   applog(LOG_ERR, obj(%s) unexpected blk count 
+  (%u from val sz, %u from hdr),
+  obj-in_fn, fs_blk_count(value_len), obj-n_blk);
+   goto err_out;
+   }
+
+   obj-csum_tbl = malloc(csum_bytes);
+   if (!obj-csum_tbl)
+   goto err_out;
+   obj-csum_tbl_sz = csum_bytes;
+
obj-bo.key = malloc(key_len);
obj-bo.key_len = key_len;
if (!obj-bo.key)
goto err_out;
 
-   /* read object variable-length header */
-   rrc = read(obj-in_fd, obj-bo.key, key_len);
-   if ((rrc != key_len) || (memcmp(key, obj-bo.key, key_len))) {
-   applog(LOG_ERR, read hdr key obj(%s) failed: %s,
+   /* init additional header segment list */
+   iov[0].iov_base =