pvfs2-aio-cancel.patch
----------------------
This patch fixes a bug in the I/O cleanup path on the server side.
In cases where a flow needed to cancel pending I/O operations, the
trove cancel function was calling aio_cancel() directly. This
doesn't work correctly if the alt-aio implementation is used.
pvfs2-root-squash-address.patch
-------------------------------
This fixes a bug in the root squash checking on the server side.
The routine that compares a client address against the root squash
list was using getsockname() rather than getpeername(). The former
retrieves the server's address rather than the client's.
pvfs2-ls-rm.patch
-----------------
This is an interim fix for the concurrent "rm -rf" and "ls" problem
that was recently discussed on the mailing list. It sounds like
the long term direction is to switch to using entry names as dirent
tokens, but this patch fixes the majority of cases in the mean time
without a protocol change. The problem in the case I was seeing
was a cache conflict between the two clients (the ls was caching
tokens in the pcache that caused rm to get the wrong position).
The token is 64 bits wide, but only the first 32 bits are used (the
START and END values are near the top of the 32 bit range). This
patch takes advantage of the extra top 32 bits on the server side
to set a unique identifier in the token for each "readdir session"
so that their cache entries do not collide. The client is not
aware of this change because it treats the token as an opaque
value. A readdir session begins when a client requests the START
position.
pvfs2-client-buffer-logging.patch
---------------------------------
I don't know if there is any interest in this, but this adds some
debugging to the buffers used in the kernel module. On startup,
pvfs2-client will print the buffer pointers (whether debugging is
enabled or not). There are also new debugging messages that will
show the first byte of each memory buffer passing through the
kernel if enabled. These logging messages were added to help track
down what ended up being a server side problem (see pvfs2-aio-
cancel.patch), but we kept it in case it is useful in the future.
pvfs2-concurrent-dirent-ops.patch
---------------------------------
I don't know that this is useful for anyone, but we are posting it
just in case. This patch will disable the request scheduler
optimization that allows concurrent rmdirent and crdirent
operations on a given directory. When this optimization was
originally introduced, we found some problems with consistency if
two clients attempted to create and delete the same file name at
the same time (sorry, I can't find the mailing list posting on this
right now, but I remember discussing it). We don't know if this is
still a problem or not, but we have still been running with this
optimization disabled as a safety precaution.
-Phil
Index: pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c
===================================================================
--- pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c (revision 3997)
+++ pvfs2_src/src/io/trove/trove-dbpf/dbpf-dspace.c (revision 3998)
@@ -1391,8 +1391,13 @@
if ((cur_op->op.type == BSTREAM_READ_LIST) ||
(cur_op->op.type == BSTREAM_WRITE_LIST))
{
+#if 0
ret = aio_cancel(cur_op->op.u.b_rw_list.fd,
cur_op->op.u.b_rw_list.aiocb_array);
+#endif
+ ret = cur_op->op.u.b_rw_list.aio_ops->aio_cancel(
+ cur_op->op.u.b_rw_list.fd,
+ cur_op->op.u.b_rw_list.aiocb_array);
gossip_debug(
GOSSIP_TROVE_DEBUG, "aio_cancel returned %s\n",
((ret == AIO_CANCELED) ? "CANCELED" :
diff -Naur pvfs2-with-aio/src/io/dev/pint-dev.c pvfs2/src/io/dev/
pint-dev.c
--- pvfs2-with-aio/src/io/dev/pint-dev.c 2007-07-20
10:02:32.000000000 -0400
+++ pvfs2/src/io/dev/pint-dev.c 2007-10-02 10:25:52.000000000 -0400
@@ -165,6 +165,8 @@
uint64_t page_size = sysconf(_SC_PAGE_SIZE), total_size;
void *ptr = NULL;
int ioctl_cmd[2] = {PVFS_DEV_MAP, 0};
+ int debug_on = 0;
+ uint64_t debug_mask = 0;
for (i = 0; i < ndesc; i++)
{
@@ -207,6 +209,12 @@
desc[i].size = params[i].dev_buffer_size;
desc[i].count = params[i].dev_buffer_count;
+ gossip_get_debug_mask(&debug_on, &debug_mask);
+ gossip_set_debug_mask(1, GOSSIP_DEV_DEBUG);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "Mapping pointer %p for I/O.\n", ptr);
+ gossip_set_debug_mask(debug_on, debug_mask);
+
/* ioctl to ask driver to map pages if needed */
if (ioctl_cmd[i] != 0)
{
diff -Naur pvfs2-with-aio/src/kernel/linux-2.6/pvfs2-bufmap.c pvfs2/
src/kernel/linux-2.6/pvfs2-bufmap.c
--- pvfs2-with-aio/src/kernel/linux-2.6/pvfs2-bufmap.c 2007-08-19
14:20:27.000000000 -0400
+++ pvfs2/src/kernel/linux-2.6/pvfs2-bufmap.c 2007-10-02
10:25:52.000000000 -0400
@@ -530,6 +530,8 @@
void __user *offset = from;
void *to_kaddr = NULL;
struct pvfs_bufmap_desc *to = &desc_array[buffer_index];
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs_bufmap_copy_from_user:
from %p, index %d, "
"size %zd\n", from, buffer_index, size);
@@ -550,6 +552,13 @@
to_kaddr = pvfs2_kmap(to->page_array[index]);
ret = copy_from_user(to_kaddr, offset, cur_copy_size);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(to_kaddr);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character
(integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int);
+ }
+
pvfs2_kunmap(to->page_array[index]);
if (ret)
@@ -726,6 +735,8 @@
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *to = &desc_array[buffer_index];
unsigned int seg, page_offset = 0;
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_iovec_from_user: index %d, "
"size %zd\n", buffer_index, size);
@@ -799,6 +810,14 @@
}
to_kaddr = pvfs2_kmap(to->page_array[index]);
ret = copy_from_user(to_kaddr + page_offset, from_addr,
cur_copy_size);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(to_kaddr+page_offset);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character
(integer value) in pvfs_bufmap_copy_from_user: %d\n", tmp_int);
+ }
+
+
pvfs2_kunmap(to->page_array[index]);
#if 0
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs2_bufmap_copy_iovec_from_user: copying from user %p to kernel %
p %zd bytes (to_kddr: %p,page_offset: %d)\n",
@@ -961,6 +980,8 @@
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *from = &desc_array[buffer_index];
unsigned int seg, page_offset = 0;
+ char* tmp_printer = NULL;
+ int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_to_user_iovec: index %d, "
"size %zd\n", buffer_index, size);
@@ -1034,6 +1055,12 @@
inc_index = 1;
}
from_kaddr = pvfs2_kmap(from->page_array[index]);
+ if(!tmp_printer)
+ {
+ tmp_printer = (char*)(from_kaddr + page_offset);
+ tmp_int += tmp_printer[0];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "First character
(integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n", tmp_int);
+ }
ret = copy_to_user(to_addr, from_kaddr + page_offset,
cur_copy_size);
pvfs2_kunmap(from->page_array[index]);
#if 0
Index: pvfs2_src/src/server/request-scheduler/request-scheduler.c
===================================================================
--- pvfs2_src/src/server/request-scheduler/request-scheduler.c
(revision 2537)
+++ pvfs2_src/src/server/request-scheduler/request-scheduler.c
(revision 2538)
@@ -619,6 +619,7 @@
ret = 0;
}
}
+#if 0
else if((in_request->op == PVFS_SERV_CRDIRENT ||
in_request->op == PVFS_SERV_RMDIRENT) &&
next_element->state == REQ_SCHEDULED &&
@@ -656,6 +657,7 @@
ret = 0;
}
}
+#endif
else
{
tmp_element->state = REQ_QUEUED;
Index: pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c
===================================================================
--- pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c (revision 4195)
+++ pvfs2_src/src/io/trove/trove-dbpf/dbpf-keyval.c (revision 4196)
@@ -38,6 +38,8 @@
#include "pvfs2-internal.h"
#include "pint-perf-counter.h"
+static uint32_t readdir_session = 0;
+
extern int synccount;
/**
@@ -670,6 +672,7 @@
static int dbpf_keyval_iterate_op_svc(struct dbpf_op *op_p)
{
int count, ret;
+ uint64_t tmp_pos = 0;
assert(*op_p->u.k_iterate.count_p > 0);
@@ -713,6 +716,10 @@
if(*op_p->u.k_iterate.position_p == TROVE_ITERATE_START)
{
*op_p->u.k_iterate.position_p = count;
+ /* store a session identifier in the top 32 bits */
+ tmp_pos += readdir_session;
+ *op_p->u.k_iterate.position_p += (tmp_pos << 32);
+ readdir_session++;
}
else
{
@@ -1454,7 +1461,10 @@
* we fall back to stepping through all the entries to get
* to the position
*/
-
+ /* strip the session out of the position; we need to use a
true
+ * integer offset if we get past the cache
+ */
+ pos = pos & 0xffff;
return dbpf_keyval_iterate_step_to_position(handle, pos,
dbc_p);
}
Index: pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c
===================================================================
--- pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c (revision 4234)
+++ pvfs2_src/src/io/bmi/bmi_tcp/bmi-tcp.c (revision 4235)
@@ -1708,9 +1708,15 @@
struct sockaddr_in map_addr;
socklen_t map_addr_len = sizeof(map_addr);
const char *tcp_wildcard = wildcard_string + 6 /* strlen
("tcp://") */;
+ int ret = -1;
memset(&map_addr, 0, sizeof(map_addr));
- getsockname(tcp_addr_data->socket, (struct sockaddr *)
&map_addr, &map_addr_len);
+ if(getpeername(tcp_addr_data->socket, (struct sockaddr *)
&map_addr, &map_addr_len) < 0)
+ {
+ ret = bmi_tcp_errno_to_pvfs(-EINVAL);
+ gossip_err("Error: failed to retrieve peer name for client.
\n");
+ return(ret);
+ }
/* Wildcard specification */
if (netmask == -1)
{
_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers