Re: [3/4] DST: Network state machine.
On 14:47 Mon 10 Dec , Evgeniy Polyakov wrote: Network state machine. Includes network async processing state machine and related tasks. Hi, I've tried to play a little bit with DST and discover huge memory leak. Every read request from remote node result in bio + bio's pages leak. Data flow: -kst_export_ready ## prepare and submit bio -generic_make_request(bio) ## submit it -kst_export_read_end_io## block layer call bio_end_io callback -kst_thread_process_state ## process ready requests -kst_data_callback -kst_data_process_bio ## submit pages to network layer -kst_complete_req -kst_bio_endio -kst_export_read_end_io ## WoW we calling the same bio_end_io ## callback twice -dst_free_request(req); ## request will be destroyed but it's bio ## and all bio's pages wasn't released. We may release bio's pages after it was sent to network, it is safe because sendpage() already called get_page(). I've attached simple patch which this this. Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/drivers/block/dst/kst.c b/drivers/block/dst/kst.c new file mode 100644 index 000..8fa3387 --- /dev/null +++ b/drivers/block/dst/kst.c @@ -0,0 +1,1513 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/kernel.h +#include linux/module.h +#include linux/list.h +#include linux/slab.h +#include linux/socket.h +#include linux/kthread.h +#include linux/net.h +#include linux/in.h +#include linux/poll.h +#include linux/bio.h +#include linux/dst.h + +#include net/sock.h + +struct kst_poll_helper +{ + poll_table pt; + struct kst_state*st; +}; + +static LIST_HEAD(kst_worker_list); +static DEFINE_MUTEX(kst_worker_mutex); + +/* + * This function creates bound socket for local export node. + */ +static int kst_sock_create(struct kst_state *st, struct saddr *addr, + int type, int proto, int backlog) +{ + int err; + + err = sock_create(addr-sa_family, type, proto, st-socket); + if (err) + goto err_out_exit; + + err = st-socket-ops-bind(st-socket, (struct sockaddr *)addr, + addr-sa_data_len); + + err = st-socket-ops-listen(st-socket, backlog); + if (err) + goto err_out_release; + + st-socket-sk-sk_allocation = GFP_NOIO; + + return 0; + +err_out_release: + sock_release(st-socket); +err_out_exit: + return err; +} + +static void kst_sock_release(struct kst_state *st) +{ + if (st-socket) { + sock_release(st-socket); + st-socket = NULL; + } +} + +void kst_wake(struct kst_state *st) +{ + if (st) { + struct kst_worker *w = st-node-w; + unsigned long flags; + + spin_lock_irqsave(w-ready_lock, flags); + if (list_empty(st-ready_entry)) + list_add_tail(st-ready_entry, w-ready_list); + spin_unlock_irqrestore(w-ready_lock, flags); + + wake_up(w-wait); + } +} +EXPORT_SYMBOL_GPL(kst_wake); + +/* + * Polling machinery. + */ +static int kst_state_wake_callback(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + struct kst_state *st = container_of(wait, struct kst_state, wait); + kst_wake(st); + return 1; +} + +static void kst_queue_func(struct file *file, wait_queue_head_t *whead, + poll_table *pt) +{ + struct kst_state *st = container_of(pt, struct kst_poll_helper, pt)-st; + + st-whead = whead; + init_waitqueue_func_entry(st-wait, kst_state_wake_callback); + add_wait_queue(whead, st-wait); +} + +static void kst_poll_exit(struct kst_state *st) +{ + if (st-whead) { + remove_wait_queue(st-whead, st-wait); + st-whead = NULL; + } +} + +/* + * This function removes request from state tree and ordering list. + */ +void kst_del_req(struct dst_request *req) +{ + list_del_init(req-request_list_entry); +} +EXPORT_SYMBOL_GPL(kst_del_req); + +static struct dst_request *kst_req_first(struct kst_state *st) +{ + struct dst_request *req = NULL; + + if (!list_empty(st-request_list)) + req = list_entry(st-request_list.next,
Re: [4/4] DST: Algorithms used in distributed storage.
On 14:47 Mon 10 Dec , Evgeniy Polyakov wrote: Algorithms used in distributed storage. Mirror and linear mapping code. Hi, i've finally take a look on your DST solution. It seems what your current implementation will not work on nonstandard devices for example software raid0. other comments are follows: Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED] diff --git a/drivers/block/dst/alg_linear.c b/drivers/block/dst/alg_linear.c new file mode 100644 index 000..9dc0976 --- /dev/null +++ b/drivers/block/dst/alg_linear.c @@ -0,0 +1,105 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/module.h +#include linux/kernel.h +#include linux/init.h +#include linux/dst.h + +static struct dst_alg *alg_linear; + +/* + * This callback is invoked when node is removed from storage. + */ +static void dst_linear_del_node(struct dst_node *n) +{ +} + +/* + * This callback is invoked when node is added to storage. + */ +static int dst_linear_add_node(struct dst_node *n) +{ + struct dst_storage *st = n-st; + + dprintk(%s: disk_size: %llu, node_size: %llu.\n, + __func__, st-disk_size, n-size); + + mutex_lock(st-tree_lock); + n-start = st-disk_size; + st-disk_size += n-size; + set_capacity(st-disk, st-disk_size); + mutex_unlock(st-tree_lock); + + return 0; +} + +static int dst_linear_remap(struct dst_request *req) +{ + int err; + + if (req-node-bdev) { + generic_make_request(req-bio); + return 0; + } + + err = kst_check_permissions(req-state, req-bio); + if (err) + return err; + + return req-state-ops-push(req); +} + +/* + * Failover callback - it is invoked each time error happens during + * request processing. + */ +static int dst_linear_error(struct kst_state *st, int err) +{ + if (err) + set_bit(DST_NODE_FROZEN, st-node-flags); + else + clear_bit(DST_NODE_FROZEN, st-node-flags); + return 0; +} + +static struct dst_alg_ops alg_linear_ops = { + .remap = dst_linear_remap, + .add_node = dst_linear_add_node, + .del_node = dst_linear_del_node, + .error = dst_linear_error, + .owner = THIS_MODULE, +}; + +static int __devinit alg_linear_init(void) +{ + alg_linear = dst_alloc_alg(alg_linear, alg_linear_ops); + if (!alg_linear) + return -ENOMEM; + + return 0; +} + +static void __devexit alg_linear_exit(void) +{ + dst_remove_alg(alg_linear); +} + +module_init(alg_linear_init); +module_exit(alg_linear_exit); + +MODULE_LICENSE(GPL); +MODULE_AUTHOR(Evgeniy Polyakov [EMAIL PROTECTED]); +MODULE_DESCRIPTION(Linear distributed algorithm.); diff --git a/drivers/block/dst/alg_mirror.c b/drivers/block/dst/alg_mirror.c new file mode 100644 index 000..3c457ff --- /dev/null +++ b/drivers/block/dst/alg_mirror.c @@ -0,0 +1,1128 @@ +/* + * 2007+ Copyright (c) Evgeniy Polyakov [EMAIL PROTECTED] + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include linux/module.h +#include linux/kernel.h +#include linux/init.h +#include linux/poll.h +#include linux/dst.h + +struct dst_mirror_node_data +{ + u64 age; +}; + +struct dst_mirror_priv +{ + unsigned intchunk_num; + + u64 last_start; + + spinlock_t backlog_lock; + struct list_headbacklog_list; + + struct dst_mirror_node_data old_data, new_data; + + unsigned long *chunk; +}; + +static struct dst_alg *alg_mirror; +static struct bio_set *dst_mirror_bio_set; + +static int dst_mirror_resync(struct dst_node *n, int ndp); + +static void dst_mirror_mark_sync(struct dst_node *n) +{ + if (test_bit(DST_NODE_NOTSYNC, n-flags)) { +