[PATCH 0/3] update chunkd checksum verification scheme

2010-07-15 Thread Jeff Garzik

This patchset is part of the work necessary to get ranged-GET (aka
partial GET) working.  As explained in
http://marc.info/?l=hail-develm=127871407125539w=2 the current chunkd
checksum scheme does not work at all for partial retrievals, and must be
revamped.

These patches present step 1 of 4, adding a table of checksums to
chunkd's local on-disk format.

There are no protocol or API changes in this patchset, existing clients
should work fine without any changes.

Nevertheless, this will not be committed to the main branch until
partial retrieval is actually implemented.  I don't commit changes
unless they are actually neeeded.  This checksum table and sendfile
removal work is not required until partial-GET actually exists.

Jeff




--
To unsubscribe from this list: send the line unsubscribe hail-devel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] chunkd: remove sendfile(2) support

2010-07-15 Thread Jeff Garzik
commit d663521ba7e6a808be02633e57dbeb7a95973c0f
Author: Jeff Garzik j...@garzik.org
Date:   Thu Jul 15 13:50:10 2010 -0400

chunkd: remove sendfile(2) zero-copy support

chunkd will be soon checksumming data in main memory.  That removes
the utility of a zero-copy interface which bypasses the on-heap
data requirement.

Signed-off-by: Jeff Garzik jgar...@redhat.com

 chunkd/be-fs.c  |   60 
 chunkd/chunkd.h |   14 -
 chunkd/object.c |   31 
 chunkd/server.c |   28 --
 configure.ac|3 --
 5 files changed, 15 insertions(+), 121 deletions(-)

diff --git a/chunkd/be-fs.c b/chunkd/be-fs.c
index f72ed48..5c97388 100644
--- a/chunkd/be-fs.c
+++ b/chunkd/be-fs.c
@@ -25,9 +25,6 @@
 #include sys/stat.h
 #include sys/socket.h
 #include sys/uio.h
-#if defined(HAVE_SYS_SENDFILE_H)
-#include sys/sendfile.h
-#endif
 #include stdlib.h
 #include unistd.h
 #include stdio.h
@@ -52,7 +49,6 @@ struct fs_obj {
 
int in_fd;
char*in_fn;
-   off_t   sendfile_ofs;
 };
 
 struct be_fs_obj_hdr {
@@ -542,62 +538,6 @@ ssize_t fs_obj_write(struct backend_obj *bo, const void 
*ptr, size_t len)
return rc;
 }
 
-#if defined(HAVE_SENDFILE)  defined(__linux__)
-
-ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len)
-{
-   struct fs_obj *obj = bo-private;
-   ssize_t rc;
-
-   if (obj-sendfile_ofs == 0) {
-   obj-sendfile_ofs += sizeof(struct be_fs_obj_hdr);
-   obj-sendfile_ofs += bo-key_len;
-   }
-
-   rc = sendfile(out_fd, obj-in_fd, obj-sendfile_ofs, len);
-   if (rc  0)
-   applog(LOG_ERR, obj sendfile(%s) failed: %s,
-  obj-in_fn, strerror(errno));
-
-   return rc;
-}
-
-#elif defined(HAVE_SENDFILE)  defined(__FreeBSD__)
-
-ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len)
-{
-   struct fs_obj *obj = bo-private;
-   ssize_t rc;
-   off_t sbytes = 0;
-
-   if (obj-sendfile_ofs == 0) {
-   obj-sendfile_ofs += sizeof(struct be_fs_obj_hdr);
-   obj-sendfile_ofs += bo-key_len;
-   }
-
-   rc = sendfile(obj-in_fd, out_fd, obj-sendfile_ofs, len,
- NULL, sbytes, 0);
-   if (rc  0) {
-   applog(LOG_ERR, obj sendfile(%s) failed: %s,
-  obj-in_fn, strerror(errno));
-   return rc;
-   }
-
-   obj-sendfile_ofs += sbytes;
-
-   return sbytes;
-}
-
-#else
-
-ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len)
-{
-   applog(LOG_ERR, BUG: sendfile used but not supported);
-   return -EOPNOTSUPP;
-}
-
-#endif /* HAVE_SENDFILE  HAVE_SYS_SENDFILE_H */
-
 bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
 unsigned char *md, bool sync_data)
 {
diff --git a/chunkd/chunkd.h b/chunkd/chunkd.h
index 1e1b1d3..1e3741a 100644
--- a/chunkd/chunkd.h
+++ b/chunkd/chunkd.h
@@ -48,8 +48,6 @@ enum {
STD_COOKIE_MIN  = 7,
 
STD_TRASH_MAX   = 1000,
-
-   CLI_MAX_SENDFILE_SZ = 512 * 1024,
 };
 
 struct client;
@@ -63,7 +61,6 @@ struct client_write {
uint64_tlen;/* write buffer length */
cli_write_func  cb; /* callback */
void*cb_data;   /* data passed to cb */
-   boolsendfile;   /* using sendfile? */
 
struct list_headnode;
 };
@@ -275,7 +272,6 @@ extern bool fs_obj_delete(uint32_t table_id, const char 
*user,
  const void *kbuf, size_t klen,
  enum chunk_errcode *err_code);
 extern int fs_obj_disable(const char *fn);
-extern ssize_t fs_obj_sendfile(struct backend_obj *bo, int out_fd, size_t len);
 extern int fs_list_objs_open(struct fs_obj_lister *t,
 const char *root_path, uint32_t table_id);
 extern int fs_list_objs_next(struct fs_obj_lister *t, char **fnp);
@@ -330,7 +326,6 @@ extern void applog(int prio, const char *fmt, ...);
 extern bool cli_err(struct client *cli, enum chunk_errcode code, bool 
recycle_ok);
 extern int cli_writeq(struct client *cli, const void *buf, unsigned int buflen,
 cli_write_func cb, void *cb_data);
-extern bool cli_wr_sendfile(struct client *, cli_write_func);
 extern bool cli_rd_set_poll(struct client *cli, bool readable);
 extern void cli_wr_set_poll(struct client *cli, bool writable);
 extern bool cli_cb_free(struct client *cli, struct client_write *wr,
@@ -349,15 +344,6 @@ extern void read_config(void);
 /* selfcheck.c */
 extern int chk_spawn(TCHDB *hdb);
 
-static inline bool use_sendfile(struct client *cli)
-{
-#if defined(HAVE_SENDFILE)  defined(HAVE_SYS_SENDFILE_H)
-   return cli-ssl ? false : true;
-#else
- 

[PATCH 3/3] chunkd: on-disk format stores per-64k checksums

2010-07-15 Thread Jeff Garzik
commit e6fcc02bea062af291148771a59ee2028ae98834
Author: Jeff Garzik j...@garzik.org
Date:   Thu Jul 15 13:57:17 2010 -0400

chunkd: Add checksum table to on-disk format, one sum per 64k of data

Signed-off-by: Jeff Garzik jgar...@redhat.com

 chunkd/be-fs.c |  145 +
 1 file changed, 127 insertions(+), 18 deletions(-)

diff --git a/chunkd/be-fs.c b/chunkd/be-fs.c
index 671c8fd..1bd85ea 100644
--- a/chunkd/be-fs.c
+++ b/chunkd/be-fs.c
@@ -40,6 +40,11 @@
 
 #define BE_FS_OBJ_MAGICCHU1
 
+enum {
+   CHUNK_BLK_ORDER = 16,   /* 64k blocks */
+   CHUNK_BLK_SZ= 1  CHUNK_BLK_ORDER,
+};
+
 struct fs_obj {
struct backend_obj  bo;
 
@@ -49,14 +54,23 @@ struct fs_obj {
 
int in_fd;
char*in_fn;
+
+   size_t  checked_bytes;
+   SHA_CTX checksum;
+   unsigned intcsum_idx;
+   void*csum_tbl;
+   size_t  csum_tbl_sz;
+
+   unsigned intn_blk;
 };
 
 struct be_fs_obj_hdr {
charmagic[4];
uint32_tkey_len;
uint64_tvalue_len;
+   uint32_tn_blk;
 
-   charreserved[16];
+   charreserved[12];
 
unsigned char   hash[CHD_CSUM_SZ];
charowner[128];
@@ -204,6 +218,8 @@ static struct fs_obj *fs_obj_alloc(void)
obj-out_fd = -1;
obj-in_fd = -1;
 
+   SHA1_Init(obj-checksum);
+
return obj;
 }
 
@@ -314,6 +330,17 @@ static bool key_valid(const void *key, size_t key_len)
return true;
 }
 
+static unsigned int fs_blk_count(uint64_t data_len)
+{
+   uint64_t n_blk;
+
+   n_blk = data_len  CHUNK_BLK_ORDER;
+   if (data_len  (CHUNK_BLK_SZ - 1))
+   n_blk++;
+
+   return (unsigned int) n_blk;
+}
+
 struct backend_obj *fs_obj_new(uint32_t table_id,
   const void *key, size_t key_len,
   uint64_t data_len,
@@ -321,6 +348,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
 {
struct fs_obj *obj;
char *fn = NULL;
+   size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
off_t skip_len;
 
@@ -335,6 +363,13 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
return NULL;
}
 
+   obj-n_blk = fs_blk_count(data_len);
+   csum_bytes = obj-n_blk * CHD_CSUM_SZ;
+   obj-csum_tbl = malloc(csum_bytes);
+   if (!obj-csum_tbl)
+   goto err_out;
+   obj-csum_tbl_sz = csum_bytes;
+
/* build local fs pathname */
fn = fs_obj_pathname(table_id, key, key_len);
if (!fn)
@@ -355,7 +390,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
obj-out_fn = fn;
 
/* calculate size of front-of-file metadata area */
-   skip_len = sizeof(struct be_fs_obj_hdr) + key_len;
+   skip_len = sizeof(struct be_fs_obj_hdr) + key_len + csum_bytes;
 
/* position file pointer where object data (as in, not metadata)
 * will begin
@@ -391,8 +426,11 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const 
char *user,
struct stat st;
struct be_fs_obj_hdr hdr;
ssize_t rrc;
-   uint64_t value_len;
+   uint64_t value_len, tmp64;
+   size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
+   struct iovec iov[2];
+   size_t total_rd_len;
 
if (!key_valid(key, key_len)) {
*err_code = che_InvalidKey;
@@ -447,25 +485,49 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const 
char *user,
}
 
/* verify object key length matches input key length */
-   if (GUINT32_FROM_LE(hdr.key_len) != key_len)
+   if (G_UNLIKELY(GUINT32_FROM_LE(hdr.key_len) != key_len))
goto err_out;
 
-   /* verify file size large enough to contain value */
value_len = GUINT64_FROM_LE(hdr.value_len);
-   if ((st.st_size - sizeof(hdr) - key_len)  value_len) {
+   obj-n_blk = GUINT32_FROM_LE(hdr.n_blk);
+   csum_bytes = obj-n_blk * CHD_CSUM_SZ;
+
+   /* verify file size large enough to contain value */
+   tmp64 = value_len + sizeof(hdr) + key_len + csum_bytes;
+   if (G_UNLIKELY(st.st_size  tmp64)) {
applog(LOG_ERR, obj(%s) unexpected size change, obj-in_fn);
goto err_out;
}
 
+   /* verify expected size of checksum table */
+   if (G_UNLIKELY(fs_blk_count(value_len) != obj-n_blk)) {
+   applog(LOG_ERR, obj(%s) unexpected blk count 
+  (%u from val sz, %u from hdr),
+  obj-in_fn, fs_blk_count(value_len), obj-n_blk);
+   goto err_out;
+   }
+
+   obj-csum_tbl = malloc(csum_bytes);