On Tue, Aug 05, 2014 at 08:33:46PM +0800, Bingpeng Zhu wrote: > From: NankaiZBP <nku...@foxmail.com> > > In current implementation, When we create a bucket, we decide > the bnode's location in account VDI using sd_hash(bucket_name) > as key. We handle hash conflict by linear probing hash table. > Here is the bug: > When we delete a bucket, we can't discard its bnode. Because > bnode_lookup() need it to find if some bucket exists or not > by checking adjacent bnodes. Therefore, we just zero its > bnode.name when client want to delete a bucket. When we create > a bucket later, we can reuse the deleted bnode if they hash to > the same location in account VDI. > > Signed-off-by: Yu Fang <bingpeng....@alibaba-inc.com> > --- > sheep/http/kv.c | 37 +++++++++++++++++++++++++++++-------- > 1 files changed, 29 insertions(+), 8 deletions(-) > > diff --git a/sheep/http/kv.c b/sheep/http/kv.c > index 5c4b386..2060bb1 100644 > --- a/sheep/http/kv.c > +++ b/sheep/http/kv.c > @@ -247,18 +247,21 @@ int kv_delete_account(struct http_request *req, const > char *account) > */ > > static int bnode_do_create(struct kv_bnode *bnode, struct sd_inode *inode, > - uint32_t idx) > + uint32_t idx, bool create) > { > uint32_t vid = inode->vdi_id; > uint64_t oid = vid_to_data_oid(vid, idx); > int ret; > > bnode->oid = oid; > - ret = sd_write_object(oid, (char *)bnode, sizeof(*bnode), 0, true); > + ret = sd_write_object(oid, (char *)bnode, sizeof(*bnode), 0, create); > if (ret != SD_RES_SUCCESS) { > sd_err("failed to create object, %" PRIx64, oid); > goto out; > } > + if (!create) > + goto out; > + > sd_inode_set_vid(inode, idx, vid); > ret = sd_inode_write_vid(inode, idx, vid, vid, 0, false, false); > if (ret != SD_RES_SUCCESS) { > @@ -276,6 +279,7 @@ static int bnode_create(struct kv_bnode *bnode, uint32_t > account_vid) > uint32_t tmp_vid, idx; > uint64_t hval, i; > int ret; > + bool create = true; > > ret = sd_read_object(vid_to_vdi_oid(account_vid), (char *)inode, > sizeof(*inode), 0); > @@ -289,16 +293,27 @@ static int bnode_create(struct kv_bnode *bnode, > uint32_t account_vid) > for (i = 0; i < MAX_DATA_OBJS; i++) { > idx = (hval + i) % MAX_DATA_OBJS; > tmp_vid = sd_inode_get_vid(inode, idx); > - if (tmp_vid) > - continue; > - else > + if (tmp_vid) { > + uint64_t oid = vid_to_data_oid(account_vid, idx); > + char name[SD_MAX_BUCKET_NAME] = { }; > + > + ret = sd_read_object(oid, name, sizeof(name), 0); > + if (ret != SD_RES_SUCCESS) > + goto out; > + if (name[0] == 0) { > + create = false; > + goto create; > + } > + } else > break; > } > if (i == MAX_DATA_OBJS) { > ret = SD_RES_NO_SPACE; > goto out; > } > - ret = bnode_do_create(bnode, inode, idx); > + > +create: > + ret = bnode_do_create(bnode, inode, idx, create); > out: > free(inode); > return ret; > @@ -423,6 +438,7 @@ static int bucket_delete(const char *account, uint32_t > avid, const char *bucket) > struct kv_bnode bnode; > char onode_name[SD_MAX_VDI_LEN]; > char alloc_name[SD_MAX_VDI_LEN]; > + char name[SD_MAX_BUCKET_NAME] = {}; > int ret; > > snprintf(onode_name, SD_MAX_VDI_LEN, "%s/%s", account, bucket); > @@ -436,9 +452,14 @@ static int bucket_delete(const char *account, uint32_t > avid, const char *bucket) > if (bnode.object_count > 0) > return SD_RES_VDI_NOT_EMPTY; > > - ret = sd_discard_object(bnode.oid); > + /* > + * We can't discard bnode because bnode_lookup() need it to find > + * if some bucket exists or not by checking adjacent bnodes. > + * So we just zero bnode.name to indicate a deleted bucket. > + */ > + ret = sd_write_object(bnode.oid, name, sizeof(name), 0, false); > if (ret != SD_RES_SUCCESS) { > - sd_err("failed to discard bnode for %s", bucket); > + sd_err("failed to zero bnode for %s", bucket); > return ret; > } > sd_delete_vdi(onode_name);
If we try to mimic the onode lookup algorithm as your patch did, we should also modify the bnode_lookup() too. See onode_lookup_nolock(). Thanks Yuan -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog