Hi, guys!

When I read, that I am not alone and that issue depends on kernel
version, I decided to continue investigation.
And I found why our threads locks on read/write operations.
On Linux kernel 3.14+ syscalls of file read and write changed a bit:
fdget() function was replaced by fdget_pos() - it is fdget() function
plus additional position mutex lock for files with FMODE_ATOMIC_POS
(files for inodes with S_IFREG flag set - regular nodes). As I thought
our xen files are not regular and nonseekable, I hoped this flag is
not set. But it is set. It is because our file system is created by
function simple_fill_super(), and inside it this flag is hardly set:
inode->i_mode = S_IFREG | files->mode;
So, as a fast hack I made a patch: just made copy of this function for
xen, which does not set this flag. It works for me. Could you please
check if it works for you.
Best regards.

Iurii Konovalenko | Senior Software Engineer
GlobalLogic
P +3.8044.492.9695 M +38.099.932.2909
S yufuntik
www.globallogic.com
http://www.globallogic.com/email_disclaimer.txt


On Thu, Mar 19, 2015 at 12:38 PM, Ian Campbell <ian.campb...@citrix.com> wrote:
> On Thu, 2015-03-19 at 02:19 +0100, Marek Marczykowski-Górecki wrote:
>> Hi,
>>
>> I've hit some deadlock in kernel xenstore client exposed via
>> /proc/xen/xenbus.
>
> Sounds similar to what Iurii also reported last night in "Userspace PV
> backend hangs".
>
> Iurri's case was all 3.14 kernels, which is in your range too.
>
>>  Steps to reproduce are simple:
>> int main() {
>>       struct xs_handle *xs;
>>       xs = xs_open(0);
>>       xs_watch(xs, "domid", "token");
>>       xs_read(xs, 0, "name", NULL);
>>       return 0;
>> }
>>
>> xs_watch internally creates new thread, which uses read to wait for the
>> watch. And in the same time, the program tries to read some value,
>> but actually it hangs at sending the command (before even sending a path to 
>> be
>> read). Strace gives this (simplified for readability):
>> [pid  2494] write(3, "\4\0\0\0\0\0\0\0\0\0\0\0\f\0\0\0", 160 = 16
>> [pid  2494] write(3, "domid\0", 6)      = 6
>> [pid  2494] write(3, "token\0", 6)      = 6
>> [pid  2495] read(3,  <unfinished ...>
>> [pid  2494] futex(0x71c0d4, FUTEX_WAIT_PRIVATE, 1, NULL <unfinished ...>
>> [pid  2495] <... read resumed>
>> "\17\0\0\0\377\377\377\377\220~\255\27\f\0\0\0", 16) = 16
>> [pid  2495] read(3, "domid\0token\0", 12) = 12
>> [pid  2495] read(3, "\4\0\0\0\0\0\0\0\0\0\0\0\3\0\0\0", 16) = 16
>> [pid  2495] read(3, "OK\0", 3)          = 3
>> [pid  2495] futex(0x71c0d4, FUTEX_WAKE_OP_PRIVATE, 1, 1, 0x71c0d0,
>> {FUTEX_OP_SET, 0, FUTEX_OP_CMP_GT, 1} <unfinished ...>
>> [pid  2494] <... futex resumed> )       = 0
>> [pid  2495] <... futex resumed> )       = 1
>> [pid  2494] futex(0x71c0a8, FUTEX_WAIT_PRIVATE, 2, NULL <unfinished ...>
>> [pid  2495] futex(0x71c0a8, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
>> [pid  2494] <... futex resumed> )       = -1 EAGAIN (Resource
>> temporarily unavailable)
>> [pid  2495] <... futex resumed> )       = 0
>> [pid  2494] futex(0x71c0a8, FUTEX_WAKE_PRIVATE, 1 <unfinished ...>
>> [pid  2495] read(3,  <unfinished ...>
>> [pid  2494] <... futex resumed> )       = 0
>> [pid  2494] rt_sigaction(SIGPIPE, {SIG_DFL, [], SA_RESTORER,
>> 0x7fc78c1488f0}, NULL, 8) = 0
>> [pid  2494] rt_sigaction(SIGPIPE, {SIG_IGN, [], SA_RESTORER,
>> 0x7fc78c1488f0}, {SIG_DFL, [], SA_RESTORER, 0x7fc78c1488f0}, 8) = 0
>> [pid  2494] write(3, "\2\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0", 16
>>
>> And thats all - 2494 is waiting on write, 2495 is waiting on read.
>>
>> On 3.12.x it is working. On 3.17.0 and 3.18.7 it is broken. I haven't
>> checked versions in the middle.
>>
>> Any ideas?
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
>
>
From ea5642c0ea537ef3c9da946d4f26a8f597b68622 Mon Sep 17 00:00:00 2001
From: Iurii Konovalenko <iurii.konovale...@globallogic.com>
Date: Thu, 19 Mar 2015 13:55:44 +0200
Subject: [PATCH] arm: xen: hack to make xen files unregular

Change-Id: I0bc32867ca12dad78aa5f532a9c606fab9a3d1db
Signed-off-by: Iurii Konovalenko <iurii.konovale...@globallogic.com>
---
 drivers/xen/xenfs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 06092e0..1f7e74c 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/magic.h>
+#include <linux/pagemap.h>
 
 #include <xen/xen.h>
 
@@ -42,6 +43,74 @@ static const struct file_operations capabilities_file_ops = {
 	.llseek = default_llseek,
 };
 
+static const struct super_operations xen_simple_super_operations = {
+	.statfs		= simple_statfs,
+};
+
+static int xen_simple_fill_super(struct super_block *s, unsigned long magic,
+	      struct tree_descr *files)
+{
+struct inode *inode;
+struct dentry *root;
+struct dentry *dentry;
+int i;
+
+s->s_blocksize = PAGE_CACHE_SIZE;
+s->s_blocksize_bits = PAGE_CACHE_SHIFT;
+s->s_magic = magic;
+s->s_op = &xen_simple_super_operations;
+s->s_time_gran = 1;
+
+inode = new_inode(s);
+if (!inode)
+	return -ENOMEM;
+/*
+* because the root inode is 1, the files array must not contain an
+* entry at index 1
+*/
+inode->i_ino = 1;
+inode->i_mode = S_IFDIR | 0755;
+inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+inode->i_op = &simple_dir_inode_operations;
+inode->i_fop = &simple_dir_operations;
+set_nlink(inode, 2);
+root = d_make_root(inode);
+if (!root)
+	return -ENOMEM;
+for (i = 0; !files->name || files->name[0]; i++, files++) {
+	if (!files->name)
+		continue;
+
+	/* warn if it tries to conflict with the root inode */
+	if (unlikely(i == 1))
+		printk(KERN_WARNING "%s: %s passed in a files array"
+			"with an index of 1!\n", __func__,
+			s->s_type->name);
+
+	dentry = d_alloc_name(root, files->name);
+	if (!dentry)
+		goto out;
+	inode = new_inode(s);
+	if (!inode) {
+		dput(dentry);
+		goto out;
+	}
+	inode->i_mode = files->mode;
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_fop = files->ops;
+	inode->i_ino = i;
+	d_add(dentry, inode);
+}
+s->s_root = root;
+return 0;
+out:
+d_genocide(root);
+shrink_dcache_parent(root);
+dput(root);
+return -ENOMEM;
+}
+
+
 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr xenfs_files[] = {
@@ -60,7 +129,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 		{""},
 	};
 
-	return simple_fill_super(sb, XENFS_SUPER_MAGIC,
+	return xen_simple_fill_super(sb, XENFS_SUPER_MAGIC,
 			xen_initial_domain() ? xenfs_init_files : xenfs_files);
 }
 
-- 
1.9.1

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to