pvfs2-aio-cancel.patch
----------------------
This patch fixes a bug in the I/O cleanup path on the server side. In
cases where a flow needed to cancel pending I/O operations, the trove
cancel function was calling aio_cancel() directly. This doesn't work
correctly if the alt-aio implementation is used.
pvfs2-root-squash-address.patch
-------------------------------
This fixes a bug in the root squash checking on the server side. The
routine that compares a client address against the root squash list was
using getsockname() rather than getpeername(). The former retrieves the
server's address rather than the client's.
pvfs2-ls-rm.patch
-----------------
This is an interim fix for the concurrent "rm -rf" and "ls" problem that
was recently discussed on the mailing list. It sounds like the long
term direction is to switch to using entry names as dirent tokens, but
this patch fixes the majority of cases in the mean time without a
protocol change. The problem in the case I was seeing was a cache
conflict between the two clients (the ls was caching tokens in the
pcache that caused rm to get the wrong position). The token is 64 bits
wide, but only the first 32 bits are used (the START and END values are
near the top of the 32 bit range). This patch takes advantage of the
extra top 32 bits on the server side to set a unique identifier in the
token for each "readdir session" so that their cache entries do not
collide. The client is not aware of this change because it treats the
token as an opaque value. A readdir session begins when a client
requests the START position.
pvfs2-client-buffer-logging.patch
---------------------------------
I don't know if there is any interest in this, but this adds some
debugging to the buffers used in the kernel module. On startup,
pvfs2-client will print the buffer pointers (whether debugging is
enabled or not). There are also new debugging messages that will show
the first byte of each memory buffer passing through the kernel if
enabled. These logging messages were added to help track down what
ended up being a server side problem (see pvfs2-aio-cancel.patch), but
we kept it in case it is useful in the future.
pvfs2-concurrent-dirent-ops.patch
---------------------------------
I don't know that this is useful for anyone, but we are posting it just
in case. This patch will disable the request scheduler optimization
that allows concurrent rmdirent and crdirent operations on a given
directory. When this optimization was originally introduced, we found
some problems with consistency if two clients attempted to create and
delete the same file name at the same time (sorry, I can't find the
mailing list posting on this right now, but I remember discussing it).
We don't know if this is still a problem or not, but we have still been
running with this optimization disabled as a safety precaution.
-Phil
Index: pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c
===================================================================
--- pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c (revision 3997)
+++ pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c (revision 3998)
@@ -1391,8 +1391,13 @@
if ((cur_op->op.type == BSTREAM_READ_LIST) ||
(cur_op->op.type == BSTREAM_WRITE_LIST))
{
+#if 0
ret = aio_cancel(cur_op->op.u.b_rw_list.fd,
cur_op->op.u.b_rw_list.aiocb_array);
+#endif
+ ret = cur_op->op.u.b_rw_list.aio_ops->aio_cancel(
+ cur_op->op.u.b_rw_list.fd,
+ cur_op->op.u.b_rw_list.aiocb_array);
gossip_debug(
GOSSIP_TROVE_DEBUG, "aio_cancel returned %s\n",
((ret == AIO_CANCELED) ? "CANCELED" :
diff -Naur pvfs2-with-aio/src/io/dev/pint-dev.c pvfs2/src/io/dev/pint-dev.c
--- pvfs2-with-aio/src/io/dev/pint-dev.c 2007-07-20 10:02:32.000000000 -0400
+++ pvfs2/src/io/dev/pint-dev.c 2007-10-02 10:25:52.000000000 -0400
@@ -165,6 +165,8 @@
uint64_t page_size = sysconf(_SC_PAGE_SIZE), total_size;
void *ptr = NULL;
int ioctl_cmd[2] = {PVFS_DEV_MAP, 0};
+ int debug_on = 0;
+ uint64_t debug_mask = 0;
for (i = 0; i < ndesc; i++)
{
@@ -207,6 +209,12 @@
desc[i].size = params[i].dev_buffer_size;
desc[i].count = params[i].dev_buffer_count;
+ gossip_get_debug_mask(&debug_on, &debug_mask);
+ gossip_set_debug_mask(1, GOSSIP_DEV_DEBUG);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "Mapping pointer %p for I/O.\n", ptr);
+ gossip_set_debug_mask(debug_on, debug_mask);
+
/* ioctl to ask driver to map pages if needed */
if (ioctl_cmd[i] != 0)
{
diff -Naur pvfs2-with-aio/src/kernel/linux-2.6/pvfs2-bufmap.c pvfs2/src/kernel/linux-2.6/pvfs2-bufmap.c
--- pvfs2-with-aio/src/kernel/linux-2.6/pvfs2-bufmap.c 2007-08-19 14:20:27.000000000 -0400
+++ pvfs2/src/kernel/linux-2.6/pvfs2-bufmap.c 2007-10-02 10:25:52.000000000 -0400
@@ -530,6 +530,8 @@
void __user *offset = from;
void *to_kaddr = NULL;
struct pvfs_bufmap_desc *to = &desc_array[buffer_index];
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_from_user: from %p, index %d, "
"size %zd\n", from, buffer_index, size);
@@ -550,6 +552,13 @@
to_kaddr = pvfs2_kmap(to->page_array[index]);
ret = copy_from_user(to_kaddr, offset, cur_copy_size);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(to_kaddr);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int);
+ }
+
pvfs2_kunmap(to->page_array[index]);
if (ret)
@@ -726,6 +735,8 @@
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *to = &desc_array[buffer_index];
unsigned int seg, page_offset = 0;
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_iovec_from_user: index %d, "
"size %zd\n", buffer_index, size);
@@ -799,6 +810,14 @@
}
to_kaddr = pvfs2_kmap(to->page_array[index]);
ret = copy_from_user(to_kaddr + page_offset, from_addr, cur_copy_size);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(to_kaddr+page_offset);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int);
+ }
+
+
pvfs2_kunmap(to->page_array[index]);
#if 0
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_copy_iovec_from_user: copying from user %p to kernel %p %zd bytes (to_kddr: %p,page_offset: %d)\n",
@@ -961,6 +980,8 @@
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *from = &desc_array[buffer_index];
unsigned int seg, page_offset = 0;
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_to_user_iovec: index %d, "
"size %zd\n", buffer_index, size);
@@ -1034,6 +1055,12 @@
inc_index = 1;
}
from_kaddr = pvfs2_kmap(from->page_array[index]);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(from_kaddr + page_offset);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n", tmp_int);
+ }
ret = copy_to_user(to_addr, from_kaddr + page_offset, cur_copy_size);
pvfs2_kunmap(from->page_array[index]);
#if 0
Index: pvfs2_src/src/server/request-scheduler/request-scheduler.c
===================================================================
--- pvfs2_src/src/server/request-scheduler/request-scheduler.c (revision 2537)
+++ pvfs2_src/src/server/request-scheduler/request-scheduler.c (revision 2538)
@@ -619,6 +619,7 @@
ret = 0;
}
}
+#if 0
else if((in_request->op == PVFS_SERV_CRDIRENT ||
in_request->op == PVFS_SERV_RMDIRENT) &&
next_element->state == REQ_SCHEDULED &&
@@ -656,6 +657,7 @@
ret = 0;
}
}
+#endif
else
{
tmp_element->state = REQ_QUEUED;
Index: pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c
===================================================================
--- pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c (revision 4195)
+++ pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c (revision 4196)
@@ -38,6 +38,8 @@
#include "pvfs2-internal.h"
#include "pint-perf-counter.h"
+static uint32_t readdir_session = 0;
+
extern int synccount;
/**
@@ -670,6 +672,7 @@
static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p)
{
int count, ret;
+ uint64_t tmp_pos = 0;
assert(*op_p->u.k_iterate.count_p > 0);
@@ -713,6 +716,10 @@
if(*op_p->u.k_iterate.position_p == TROVE_ITERATE_START)
{
*op_p->u.k_iterate.position_p = count;
+ /* store a session identifier in the top 32 bits */
+ tmp_pos += readdir_session;
+ *op_p->u.k_iterate.position_p += (tmp_pos << 32);
+ readdir_session++;
}
else
{
@@ -1454,7 +1461,10 @@
* we fall back to stepping through all the entries to get
* to the position
*/
-
+ /* strip the session out of the position; we need to use a true
+ * integer offset if we get past the cache
+ */
+ pos = pos & 0xffff;
return dbpf_keyval_iterate_step_to_position(handle, pos, dbc_p);
}
Index: pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c (revision 4234)
+++ pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c (revision 4235)
@@ -1708,9 +1708,15 @@
struct sockaddr_in map_addr;
socklen_t map_addr_len = sizeof(map_addr);
const char *tcp_wildcard = wildcard_string + 6 /* strlen("tcp://") */;
+ int ret = -1;
memset(&map_addr, 0, sizeof(map_addr));
- getsockname(tcp_addr_data->socket, (struct sockaddr *) &map_addr, &map_addr_len);
+ if(getpeername(tcp_addr_data->socket, (struct sockaddr *) &map_addr, &map_addr_len) < 0)
+ {
+ ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+ gossip_err("Error: failed to retrieve peer name for client.\n");
+ return(ret);
+ }
/* Wildcard specification */
if (netmask == -1)
{
_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers