from:"Amos Kong"

[PATCH v5 REPOST 3/6] hw_random: use reference counts on each struct hwrng.

2014-12-08 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

current_rng holds one reference, and we bump it every time we want
to do a read from it.

This means we only hold the rng_mutex to grab or drop a reference,
so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
block on read of /dev/hwrng.

Using a kref is overkill (we're always under the rng_mutex), but
a standard pattern.

This also solves the problem that the hwrng_fillfn thread was
accessing current_rng without a lock, which could change (eg. to NULL)
underneath it.

v5: drop redundant kref_init()
v4: decrease last reference for triggering the cleanup
v3: initialize kref (thanks Amos Kong)
v2: fix missing put_rng() on exit path (thanks Amos Kong)
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 135 --
 include/linux/hw_random.h |   2 +
 2 files changed, 94 insertions(+), 43 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a0905c8..83516cb 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -42,6 +42,7 @@
 #include linux/delay.h
 #include linux/slab.h
 #include linux/random.h
+#include linux/err.h
 #include asm/uaccess.h
 
 
@@ -91,6 +92,60 @@ static void add_early_randomness(struct hwrng *rng)
add_device_randomness(bytes, bytes_read);
 }
 
+static inline void cleanup_rng(struct kref *kref)
+{
+   struct hwrng *rng = container_of(kref, struct hwrng, ref);
+
+   if (rng-cleanup)
+   rng-cleanup(rng);
+}
+
+static void set_current_rng(struct hwrng *rng)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   kref_get(rng-ref);
+   current_rng = rng;
+}
+
+static void drop_current_rng(void)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   if (!current_rng)
+   return;
+
+   /* decrease last reference for triggering the cleanup */
+   kref_put(current_rng-ref, cleanup_rng);
+   current_rng = NULL;
+}
+
+/* Returns ERR_PTR(), NULL or refcounted hwrng */
+static struct hwrng *get_current_rng(void)
+{
+   struct hwrng *rng;
+
+   if (mutex_lock_interruptible(rng_mutex))
+   return ERR_PTR(-ERESTARTSYS);
+
+   rng = current_rng;
+   if (rng)
+   kref_get(rng-ref);
+
+   mutex_unlock(rng_mutex);
+   return rng;
+}
+
+static void put_rng(struct hwrng *rng)
+{
+   /*
+* Hold rng_mutex here so we serialize in case they set_current_rng
+* on rng again immediately.
+*/
+   mutex_lock(rng_mutex);
+   if (rng)
+   kref_put(rng-ref, cleanup_rng);
+   mutex_unlock(rng_mutex);
+}
+
 static inline int hwrng_init(struct hwrng *rng)
 {
if (rng-init) {
@@ -113,12 +168,6 @@ static inline int hwrng_init(struct hwrng *rng)
return 0;
 }
 
-static inline void hwrng_cleanup(struct hwrng *rng)
-{
-   if (rng  rng-cleanup)
-   rng-cleanup(rng);
-}
-
 static int rng_dev_open(struct inode *inode, struct file *filp)
 {
/* enforce read-only access to this chrdev */
@@ -154,21 +203,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
ssize_t ret = 0;
int err = 0;
int bytes_read, len;
+   struct hwrng *rng;
 
while (size) {
-   if (mutex_lock_interruptible(rng_mutex)) {
-   err = -ERESTARTSYS;
+   rng = get_current_rng();
+   if (IS_ERR(rng)) {
+   err = PTR_ERR(rng);
goto out;
}
-
-   if (!current_rng) {
+   if (!rng) {
err = -ENODEV;
-   goto out_unlock;
+   goto out;
}
 
mutex_lock(reading_mutex);
if (!data_avail) {
-   bytes_read = rng_get_data(current_rng, rng_buffer,
+   bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
@@ -200,8 +250,8 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
ret += len;
}
 
-   mutex_unlock(rng_mutex);
mutex_unlock(reading_mutex);
+   put_rng(rng);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -213,12 +263,11 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
}
 out:
return ret ? : err;
-out_unlock:
-   mutex_unlock(rng_mutex);
-   goto out;
+
 out_unlock_reading:
mutex_unlock(reading_mutex);
-   goto out_unlock;
+   put_rng(rng);
+   goto out;
 }
 
 
@@ -257,8 +306,8 @@ static ssize_t hwrng_attr_current_store(struct device *dev

[PATCH v5 REPOST 5/6] hw_random: don't double-check old_rng.

2014-12-08 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 067270b..a9286bf 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -476,14 +476,13 @@ int hwrng_register(struct hwrng *rng)
}
 
old_rng = current_rng;
+   err = 0;
if (!old_rng) {
err = hwrng_init(rng);
if (err)
goto out_unlock;
set_current_rng(rng);
-   }
-   err = 0;
-   if (!old_rng) {
+
err = register_miscdev();
if (err) {
drop_current_rng();
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 REPOST 0/6] fix hw_random stuck

2014-12-08 Thread Amos Kong

When I hotunplug a busy virtio-rng device or try to access
hwrng attributes in non-smp guest, it gets stuck.

My hotplug tests:

| test 0:
|   hotunplug rng device from qemu monitor
|
| test 1:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 2:
|   guest) # dd if=/dev/random of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 4:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   cat /sys/devices/virtual/misc/hw_random/rng_*
|
| test 5:
|   guest) # dd if=/dev/hwrng of=/dev/null
|   cancel dd process after 10 seconds
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 6:
|   use a fifo as rng backend, execute test 0 ~ 5 with no input of fifo

V5: reset cleanup_done flag, drop redundant init of reference count, use
compiler barrier to prevent recording.
V4: update patch 4 to fix corrupt, decrease last reference for triggering
the cleanup, fix unregister race pointed by Herbert
V3: initialize kref to 1
V2: added patch 2 to fix a deadlock, update current patch 3 to fix reference
counting issue

Amos Kong (1):
  hw_random: move some code out mutex_lock for avoiding underlying
deadlock

Rusty Russell (5):
  hw_random: place mutex around read functions and buffers.
  hw_random: use reference counts on each struct hwrng.
  hw_random: fix unregister race.
  hw_random: don't double-check old_rng.
  hw_random: don't init list element we're about to add to list.

 drivers/char/hw_random/core.c | 173 ++
 include/linux/hw_random.h |   3 +
 2 files changed, 126 insertions(+), 50 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 REPOST 1/6] hw_random: place mutex around read functions and buffers.

2014-12-08 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

There's currently a big lock around everything, and it means that we
can't query sysfs (eg /sys/devices/virtual/misc/hw_random/rng_current)
while the rng is reading.  This is a real problem when the rng is slow,
or blocked (eg. virtio_rng with qemu's default /dev/random backend)

This doesn't help (it leaves the current lock untouched), just adds a
lock to protect the read function and the static buffers, in preparation
for transition.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..b1b6042 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -53,7 +53,10 @@
 static struct hwrng *current_rng;
 static struct task_struct *hwrng_fill;
 static LIST_HEAD(rng_list);
+/* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
+/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
+static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
 static unsigned short current_quality;
@@ -81,7 +84,9 @@ static void add_early_randomness(struct hwrng *rng)
unsigned char bytes[16];
int bytes_read;
 
+   mutex_lock(reading_mutex);
bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+   mutex_unlock(reading_mutex);
if (bytes_read  0)
add_device_randomness(bytes, bytes_read);
 }
@@ -128,6 +133,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 
*buffer, size_t size,
int wait) {
int present;
 
+   BUG_ON(!mutex_is_locked(reading_mutex));
if (rng-read)
return rng-read(rng, (void *)buffer, size, wait);
 
@@ -160,13 +166,14 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
goto out_unlock;
}
 
+   mutex_lock(reading_mutex);
if (!data_avail) {
bytes_read = rng_get_data(current_rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
err = bytes_read;
-   goto out_unlock;
+   goto out_unlock_reading;
}
data_avail = bytes_read;
}
@@ -174,7 +181,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (!data_avail) {
if (filp-f_flags  O_NONBLOCK) {
err = -EAGAIN;
-   goto out_unlock;
+   goto out_unlock_reading;
}
} else {
len = data_avail;
@@ -186,7 +193,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (copy_to_user(buf + ret, rng_buffer + data_avail,
len)) {
err = -EFAULT;
-   goto out_unlock;
+   goto out_unlock_reading;
}
 
size -= len;
@@ -194,6 +201,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
}
 
mutex_unlock(rng_mutex);
+   mutex_unlock(reading_mutex);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -208,6 +216,9 @@ out:
 out_unlock:
mutex_unlock(rng_mutex);
goto out;
+out_unlock_reading:
+   mutex_unlock(reading_mutex);
+   goto out_unlock;
 }
 
 
@@ -348,13 +359,16 @@ static int hwrng_fillfn(void *unused)
while (!kthread_should_stop()) {
if (!current_rng)
break;
+   mutex_lock(reading_mutex);
rc = rng_get_data(current_rng, rng_fillbuf,
  rng_buffer_size(), 1);
+   mutex_unlock(reading_mutex);
if (rc = 0) {
pr_warn(hwrng: no data available\n);
msleep_interruptible(1);
continue;
}
+   /* Outside lock, sure, but y'know: randomness. */
add_hwgenerator_randomness((void *)rng_fillbuf, rc,
   rc * current_quality * 8  10);
}
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 REPOST 2/6] hw_random: move some code out mutex_lock for avoiding underlying deadlock

2014-12-08 Thread Amos Kong

In next patch, we use reference counting for each struct hwrng,
changing reference count also needs to take mutex_lock. Before
releasing the lock, if we try to stop a kthread that waits to
take the lock to reduce the referencing count, deadlock will
occur.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index b1b6042..a0905c8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -474,12 +474,12 @@ void hwrng_unregister(struct hwrng *rng)
}
}
if (list_empty(rng_list)) {
+   mutex_unlock(rng_mutex);
unregister_miscdev();
if (hwrng_fill)
kthread_stop(hwrng_fill);
-   }
-
-   mutex_unlock(rng_mutex);
+   } else
+   mutex_unlock(rng_mutex);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 REPOST 4/6] hw_random: fix unregister race.

2014-12-08 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

The previous patch added one potential problem: we can still be
reading from a hwrng when it's unregistered.  Add a wait for zero
in the hwrng_unregister path.

v5: reset cleanup_done flag, use compiler barrier to prevent recording.
v4: add cleanup_done flag to insure that cleanup is done

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 12 
 include/linux/hw_random.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 83516cb..067270b 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex);
 static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
+static DECLARE_WAIT_QUEUE_HEAD(rng_done);
 static unsigned short current_quality;
 static unsigned short default_quality; /* = 0; default to off */
 
@@ -98,6 +99,11 @@ static inline void cleanup_rng(struct kref *kref)
 
if (rng-cleanup)
rng-cleanup(rng);
+
+   /* cleanup_done should be updated after cleanup finishes */
+   smp_wmb();
+   rng-cleanup_done = true;
+   wake_up_all(rng_done);
 }
 
 static void set_current_rng(struct hwrng *rng)
@@ -498,6 +504,8 @@ int hwrng_register(struct hwrng *rng)
add_early_randomness(rng);
}
 
+   rng-cleanup_done = false;
+
 out_unlock:
mutex_unlock(rng_mutex);
 out:
@@ -529,6 +537,10 @@ void hwrng_unregister(struct hwrng *rng)
kthread_stop(hwrng_fill);
} else
mutex_unlock(rng_mutex);
+
+   /* Just in case rng is reading right now, wait. */
+   wait_event(rng_done, rng-cleanup_done 
+  atomic_read(rng-ref.refcount) == 0);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index c212e71..7832e50 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -46,6 +46,7 @@ struct hwrng {
/* internal. */
struct list_head list;
struct kref ref;
+   bool cleanup_done;
 };
 
 /** Register a new Hardware Random Number Generator driver. */
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 REPOST 6/6] hw_random: don't init list element we're about to add to list.

2014-12-08 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Another interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a9286bf..4d13ac5 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -489,7 +489,6 @@ int hwrng_register(struct hwrng *rng)
goto out_unlock;
}
}
-   INIT_LIST_HEAD(rng-list);
list_add_tail(rng-list, rng_list);
 
if (old_rng  !rng-init) {
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v4 4/6] hw_random: fix unregister race.

2014-12-05 Thread Amos Kong

On Wed, Nov 12, 2014 at 02:33:00PM +1030, Rusty Russell wrote:
 Amos Kong ak...@redhat.com writes:
  From: Rusty Russell ru...@rustcorp.com.au
 
  The previous patch added one potential problem: we can still be
  reading from a hwrng when it's unregistered.  Add a wait for zero
  in the hwrng_unregister path.
 
  v4: add cleanup_done flag to insure that cleanup is done
 
 That's a bit weird.  The usual pattern would be to hold a reference
 until we're actually finished, but this reference is a bit weird.
 
 We hold the mutex across cleanup, so we could grab that but we have to
 take care sleeping inside wait_event, otherwise Peter will have to fix
 my code again :)
 
 AFAICT the wake_woken() stuff isn't merged yet, so your patch will
 have to do for now.
 
  @@ -98,6 +99,8 @@ static inline void cleanup_rng(struct kref *kref)
   
  if (rng-cleanup)
  rng-cleanup(rng);
  +   rng-cleanup_done = true;
  +   wake_up_all(rng_done);
   }
   
   static void set_current_rng(struct hwrng *rng)
  @@ -536,6 +539,11 @@ void hwrng_unregister(struct hwrng *rng)
  kthread_stop(hwrng_fill);
  } else
  mutex_unlock(rng_mutex);
  +
  +   /* Just in case rng is reading right now, wait. */
  +   wait_event(rng_done, rng-cleanup_done 
  +  atomic_read(rng-ref.refcount) == 0);
  +
 
 The atomic_read() isn't necessary here.

At least, we need it to convert refcount from atomic_t to int.
Otherwise, we will touch this error:

  error: invalid operands to binary == (have 'atomic_t' and 'int')
 
 However, you should probably init cleanup_done in hwrng_register().
 (Probably noone does unregister then register, but let's be clear).
 
 Thanks,
 Rusty.

-- 
Amos.


signature.asc
Description: Digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 2/6] hw_random: move some code out mutex_lock for avoiding underlying deadlock

2014-12-05 Thread Amos Kong

In next patch, we use reference counting for each struct hwrng,
changing reference count also needs to take mutex_lock. Before
releasing the lock, if we try to stop a kthread that waits to
take the lock to reduce the referencing count, deadlock will
occur.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index b1b6042..a0905c8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -474,12 +474,12 @@ void hwrng_unregister(struct hwrng *rng)
}
}
if (list_empty(rng_list)) {
+   mutex_unlock(rng_mutex);
unregister_miscdev();
if (hwrng_fill)
kthread_stop(hwrng_fill);
-   }
-
-   mutex_unlock(rng_mutex);
+   } else
+   mutex_unlock(rng_mutex);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 1/6] hw_random: place mutex around read functions and buffers.

2014-12-05 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

There's currently a big lock around everything, and it means that we
can't query sysfs (eg /sys/devices/virtual/misc/hw_random/rng_current)
while the rng is reading.  This is a real problem when the rng is slow,
or blocked (eg. virtio_rng with qemu's default /dev/random backend)

This doesn't help (it leaves the current lock untouched), just adds a
lock to protect the read function and the static buffers, in preparation
for transition.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..b1b6042 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -53,7 +53,10 @@
 static struct hwrng *current_rng;
 static struct task_struct *hwrng_fill;
 static LIST_HEAD(rng_list);
+/* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
+/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
+static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
 static unsigned short current_quality;
@@ -81,7 +84,9 @@ static void add_early_randomness(struct hwrng *rng)
unsigned char bytes[16];
int bytes_read;
 
+   mutex_lock(reading_mutex);
bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+   mutex_unlock(reading_mutex);
if (bytes_read  0)
add_device_randomness(bytes, bytes_read);
 }
@@ -128,6 +133,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 
*buffer, size_t size,
int wait) {
int present;
 
+   BUG_ON(!mutex_is_locked(reading_mutex));
if (rng-read)
return rng-read(rng, (void *)buffer, size, wait);
 
@@ -160,13 +166,14 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
goto out_unlock;
}
 
+   mutex_lock(reading_mutex);
if (!data_avail) {
bytes_read = rng_get_data(current_rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
err = bytes_read;
-   goto out_unlock;
+   goto out_unlock_reading;
}
data_avail = bytes_read;
}
@@ -174,7 +181,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (!data_avail) {
if (filp-f_flags  O_NONBLOCK) {
err = -EAGAIN;
-   goto out_unlock;
+   goto out_unlock_reading;
}
} else {
len = data_avail;
@@ -186,7 +193,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (copy_to_user(buf + ret, rng_buffer + data_avail,
len)) {
err = -EFAULT;
-   goto out_unlock;
+   goto out_unlock_reading;
}
 
size -= len;
@@ -194,6 +201,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
}
 
mutex_unlock(rng_mutex);
+   mutex_unlock(reading_mutex);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -208,6 +216,9 @@ out:
 out_unlock:
mutex_unlock(rng_mutex);
goto out;
+out_unlock_reading:
+   mutex_unlock(reading_mutex);
+   goto out_unlock;
 }
 
 
@@ -348,13 +359,16 @@ static int hwrng_fillfn(void *unused)
while (!kthread_should_stop()) {
if (!current_rng)
break;
+   mutex_lock(reading_mutex);
rc = rng_get_data(current_rng, rng_fillbuf,
  rng_buffer_size(), 1);
+   mutex_unlock(reading_mutex);
if (rc = 0) {
pr_warn(hwrng: no data available\n);
msleep_interruptible(1);
continue;
}
+   /* Outside lock, sure, but y'know: randomness. */
add_hwgenerator_randomness((void *)rng_fillbuf, rc,
   rc * current_quality * 8  10);
}
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 3/6] hw_random: use reference counts on each struct hwrng.

2014-12-05 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

current_rng holds one reference, and we bump it every time we want
to do a read from it.

This means we only hold the rng_mutex to grab or drop a reference,
so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
block on read of /dev/hwrng.

Using a kref is overkill (we're always under the rng_mutex), but
a standard pattern.

This also solves the problem that the hwrng_fillfn thread was
accessing current_rng without a lock, which could change (eg. to NULL)
underneath it.

v5: drop redundant kref_init()
v4: decrease last reference for triggering the cleanup
v3: initialize kref (thanks Amos Kong)
v2: fix missing put_rng() on exit path (thanks Amos Kong)
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 135 --
 include/linux/hw_random.h |   2 +
 2 files changed, 94 insertions(+), 43 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a0905c8..83516cb 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -42,6 +42,7 @@
 #include linux/delay.h
 #include linux/slab.h
 #include linux/random.h
+#include linux/err.h
 #include asm/uaccess.h
 
 
@@ -91,6 +92,60 @@ static void add_early_randomness(struct hwrng *rng)
add_device_randomness(bytes, bytes_read);
 }
 
+static inline void cleanup_rng(struct kref *kref)
+{
+   struct hwrng *rng = container_of(kref, struct hwrng, ref);
+
+   if (rng-cleanup)
+   rng-cleanup(rng);
+}
+
+static void set_current_rng(struct hwrng *rng)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   kref_get(rng-ref);
+   current_rng = rng;
+}
+
+static void drop_current_rng(void)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   if (!current_rng)
+   return;
+
+   /* decrease last reference for triggering the cleanup */
+   kref_put(current_rng-ref, cleanup_rng);
+   current_rng = NULL;
+}
+
+/* Returns ERR_PTR(), NULL or refcounted hwrng */
+static struct hwrng *get_current_rng(void)
+{
+   struct hwrng *rng;
+
+   if (mutex_lock_interruptible(rng_mutex))
+   return ERR_PTR(-ERESTARTSYS);
+
+   rng = current_rng;
+   if (rng)
+   kref_get(rng-ref);
+
+   mutex_unlock(rng_mutex);
+   return rng;
+}
+
+static void put_rng(struct hwrng *rng)
+{
+   /*
+* Hold rng_mutex here so we serialize in case they set_current_rng
+* on rng again immediately.
+*/
+   mutex_lock(rng_mutex);
+   if (rng)
+   kref_put(rng-ref, cleanup_rng);
+   mutex_unlock(rng_mutex);
+}
+
 static inline int hwrng_init(struct hwrng *rng)
 {
if (rng-init) {
@@ -113,12 +168,6 @@ static inline int hwrng_init(struct hwrng *rng)
return 0;
 }
 
-static inline void hwrng_cleanup(struct hwrng *rng)
-{
-   if (rng  rng-cleanup)
-   rng-cleanup(rng);
-}
-
 static int rng_dev_open(struct inode *inode, struct file *filp)
 {
/* enforce read-only access to this chrdev */
@@ -154,21 +203,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
ssize_t ret = 0;
int err = 0;
int bytes_read, len;
+   struct hwrng *rng;
 
while (size) {
-   if (mutex_lock_interruptible(rng_mutex)) {
-   err = -ERESTARTSYS;
+   rng = get_current_rng();
+   if (IS_ERR(rng)) {
+   err = PTR_ERR(rng);
goto out;
}
-
-   if (!current_rng) {
+   if (!rng) {
err = -ENODEV;
-   goto out_unlock;
+   goto out;
}
 
mutex_lock(reading_mutex);
if (!data_avail) {
-   bytes_read = rng_get_data(current_rng, rng_buffer,
+   bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
@@ -200,8 +250,8 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
ret += len;
}
 
-   mutex_unlock(rng_mutex);
mutex_unlock(reading_mutex);
+   put_rng(rng);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -213,12 +263,11 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
}
 out:
return ret ? : err;
-out_unlock:
-   mutex_unlock(rng_mutex);
-   goto out;
+
 out_unlock_reading:
mutex_unlock(reading_mutex);
-   goto out_unlock;
+   put_rng(rng);
+   goto out;
 }
 
 
@@ -257,8 +306,8 @@ static ssize_t hwrng_attr_current_store(struct device *dev

[PATCH v5 0/6] fix hw_random stuck

2014-12-05 Thread Amos Kong

When I hotunplug a busy virtio-rng device or try to access
hwrng attributes in non-smp guest, it gets stuck.

My hotplug tests:

| test 0:
|   hotunplug rng device from qemu monitor
|
| test 1:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 2:
|   guest) # dd if=/dev/random of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 4:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   cat /sys/devices/virtual/misc/hw_random/rng_*
|
| test 5:
|   guest) # dd if=/dev/hwrng of=/dev/null
|   cancel dd process after 10 seconds
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 6:
|   use a fifo as rng backend, execute test 0 ~ 5 with no input of fifo

V5: reset cleanup_done flag, drop redundant init of reference count, use
compiler barrier to prevent recording.
V4: update patch 4 to fix corrupt, decrease last reference for triggering
the cleanup, fix unregister race pointed by Herbert
V3: initialize kref to 1
V2: added patch 2 to fix a deadlock, update current patch 3 to fix reference
counting issue

Amos Kong (1):
  hw_random: move some code out mutex_lock for avoiding underlying
deadlock

Rusty Russell (5):
  hw_random: place mutex around read functions and buffers.
  hw_random: use reference counts on each struct hwrng.
  hw_random: fix unregister race.
  hw_random: don't double-check old_rng.
  hw_random: don't init list element we're about to add to list.

 drivers/char/hw_random/core.c | 173 ++
 include/linux/hw_random.h |   3 +
 2 files changed, 126 insertions(+), 50 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 4/6] hw_random: fix unregister race.

2014-12-05 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

The previous patch added one potential problem: we can still be
reading from a hwrng when it's unregistered.  Add a wait for zero
in the hwrng_unregister path.

v5: reset cleanup_done flag, use compiler barrier to prevent recording.
v4: add cleanup_done flag to insure that cleanup is done

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 12 
 include/linux/hw_random.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 83516cb..067270b 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex);
 static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
+static DECLARE_WAIT_QUEUE_HEAD(rng_done);
 static unsigned short current_quality;
 static unsigned short default_quality; /* = 0; default to off */
 
@@ -98,6 +99,11 @@ static inline void cleanup_rng(struct kref *kref)
 
if (rng-cleanup)
rng-cleanup(rng);
+
+   /* cleanup_done should be updated after cleanup finishes */
+   smp_wmb();
+   rng-cleanup_done = true;
+   wake_up_all(rng_done);
 }
 
 static void set_current_rng(struct hwrng *rng)
@@ -498,6 +504,8 @@ int hwrng_register(struct hwrng *rng)
add_early_randomness(rng);
}
 
+   rng-cleanup_done = false;
+
 out_unlock:
mutex_unlock(rng_mutex);
 out:
@@ -529,6 +537,10 @@ void hwrng_unregister(struct hwrng *rng)
kthread_stop(hwrng_fill);
} else
mutex_unlock(rng_mutex);
+
+   /* Just in case rng is reading right now, wait. */
+   wait_event(rng_done, rng-cleanup_done 
+  atomic_read(rng-ref.refcount) == 0);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index c212e71..7832e50 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -46,6 +46,7 @@ struct hwrng {
/* internal. */
struct list_head list;
struct kref ref;
+   bool cleanup_done;
 };
 
 /** Register a new Hardware Random Number Generator driver. */
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 6/6] hw_random: don't init list element we're about to add to list.

2014-12-05 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Another interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a9286bf..4d13ac5 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -489,7 +489,6 @@ int hwrng_register(struct hwrng *rng)
goto out_unlock;
}
}
-   INIT_LIST_HEAD(rng-list);
list_add_tail(rng-list, rng_list);
 
if (old_rng  !rng-init) {
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 5/6] hw_random: don't double-check old_rng.

2014-12-05 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 067270b..a9286bf 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -476,14 +476,13 @@ int hwrng_register(struct hwrng *rng)
}
 
old_rng = current_rng;
+   err = 0;
if (!old_rng) {
err = hwrng_init(rng);
if (err)
goto out_unlock;
set_current_rng(rng);
-   }
-   err = 0;
-   if (!old_rng) {
+
err = register_miscdev();
if (err) {
drop_current_rng();
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v4 4/6] hw_random: fix unregister race.

2014-11-24 Thread Amos Kong

On Wed, Nov 12, 2014 at 02:33:00PM +1030, Rusty Russell wrote:
 Amos Kong ak...@redhat.com writes:
  From: Rusty Russell ru...@rustcorp.com.au
 
  The previous patch added one potential problem: we can still be
  reading from a hwrng when it's unregistered.  Add a wait for zero
  in the hwrng_unregister path.
 
  v4: add cleanup_done flag to insure that cleanup is done
 
 That's a bit weird.  The usual pattern would be to hold a reference
 until we're actually finished, but this reference is a bit weird.

The cleanup function is a callback function of kref_put(), we can't
use the same reference count inside cleanup function.
 
 We hold the mutex across cleanup, so we could grab that but we have to
 take care sleeping inside wait_event, otherwise Peter will have to fix
 my code again :)

We didn't hold rng_mutex inside cleanup_rng(), am I missing something?
 
 AFAICT the wake_woken() stuff isn't merged yet, so your patch will
 have to do for now.

Can you provide some patches/mail link here? I searched nothing about 
wake_woken.
 
  @@ -98,6 +99,8 @@ static inline void cleanup_rng(struct kref *kref)
   
  if (rng-cleanup)
  rng-cleanup(rng);
  +   rng-cleanup_done = true;
  +   wake_up_all(rng_done);
   }
   
   static void set_current_rng(struct hwrng *rng)
  @@ -536,6 +539,11 @@ void hwrng_unregister(struct hwrng *rng)
  kthread_stop(hwrng_fill);
  } else
  mutex_unlock(rng_mutex);
  +
  +   /* Just in case rng is reading right now, wait. */
  +   wait_event(rng_done, rng-cleanup_done 
  +  atomic_read(rng-ref.refcount) == 0);
  +
 
 The atomic_read() isn't necessary here.
 
 However, you should probably init cleanup_done in hwrng_register().
 (Probably noone does unregister then register, but let's be clear).

Got it.
 
 Thanks,
 Rusty.
 
   }
   EXPORT_SYMBOL_GPL(hwrng_unregister);
   
  diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
  index c212e71..7832e50 100644
  --- a/include/linux/hw_random.h
  +++ b/include/linux/hw_random.h
  @@ -46,6 +46,7 @@ struct hwrng {
  /* internal. */
  struct list_head list;
  struct kref ref;
  +   bool cleanup_done;
   };
   
   /** Register a new Hardware Random Number Generator driver. */
  -- 
  1.9.3

-- 
Amos.


signature.asc
Description: Digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v4 3/6] hw_random: use reference counts on each struct hwrng.

2014-11-17 Thread Amos Kong

On Wed, Nov 12, 2014 at 02:11:23PM +1030, Rusty Russell wrote:
 Amos Kong ak...@redhat.com writes:
  From: Rusty Russell ru...@rustcorp.com.au
 
  current_rng holds one reference, and we bump it every time we want
  to do a read from it.
 
  This means we only hold the rng_mutex to grab or drop a reference,
  so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
  block on read of /dev/hwrng.
 
  Using a kref is overkill (we're always under the rng_mutex), but
  a standard pattern.
 
  This also solves the problem that the hwrng_fillfn thread was
  accessing current_rng without a lock, which could change (eg. to NULL)
  underneath it.
 
  v4: decrease last reference for triggering the cleanup
 
 This doesn't make any sense:
 
  +static void drop_current_rng(void)
  +{
  +   struct hwrng *rng = current_rng;
  +
  +   BUG_ON(!mutex_is_locked(rng_mutex));
  +   if (!current_rng)
  +   return;
  +
  +   /* release current_rng reference */
  +   kref_put(current_rng-ref, cleanup_rng);
  +   current_rng = NULL;
  +
  +   /* decrease last reference for triggering the cleanup */
  +   kref_put(rng-ref, cleanup_rng);
  +}
 
 Why would it drop the refcount twice?  This doesn't make sense.
 
 Hmm, because you added kref_init, which initializes the reference count
 to 1, you created this bug.

I saw some kernel code uses kref_* helper functions, the reference
conter is initialized to 1. Some code didn't use the helper functions
to increase/decrease the reference counter. So I will drop kref_init()
and the second kref_put().
 
 Leave out the kref_init, and let it naturally be 0 (until, and if, it
 becomes current_rng).  Add a comment if you want.

OK, thanks.
 
 Thanks,
 Rusty.
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Amos.


signature.asc
Description: Digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [3.16 stable PATCH 0/2] virtio-rng: two backports to fix stuck

2014-11-05 Thread Amos Kong

On Wed, Nov 05, 2014 at 04:02:49PM +, Luis Henriques wrote:
 Hi Amos
 
 On Tue, Nov 04, 2014 at 12:32:27PM +0800, Amos Kong wrote:
  On Sat, Oct 11, 2014 at 06:51:47AM +0800, Amos Kong wrote:
   I received two mails about faile to apply patches to 3.16-stable tree:
   
   FAILED: patch [PATCH] virtio-rng: skip reading when we start to remove 
   the device failed to apply to 3.16-stable tree
   FAILED: patch [PATCH] virtio-rng: fix stuck of hot-unplugging busy 
   device failed to apply to 3.16-stable tree
   
   Amit already backported two patches for 3.16-stable, then cherry-pick
   of my two patches works.
   
   Thanks.
  
  Ping Greg, thanks.
   
 
 I'm now doing the extended stable maintenance of the 3.16 kernel, as
 Greg has EOL'ed it.
 
 Anyway, I will be queuing these 2 patches for the extended 3.16
 kernel.  Thank you!

Thanks a lot!
 
 Cheers,
 --
 Luís
 
   Amos Kong (2):
 virtio-rng: fix stuck of hot-unplugging busy device
 virtio-rng: skip reading when we start to remove the device
   
drivers/char/hw_random/virtio-rng.c | 7 +++
1 file changed, 7 insertions(+)

-- 
Amos.


signature.asc
Description: Digital signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 3/5] hw_random: fix unregister race.

2014-11-03 Thread Amos Kong

On Tue, Oct 21, 2014 at 10:15:23PM +0800, Herbert Xu wrote:
 On Thu, Sep 18, 2014 at 12:18:24PM +0930, Rusty Russell wrote:
  The previous patch added one potential problem: we can still be
  reading from a hwrng when it's unregistered.  Add a wait for zero
  in the hwrng_unregister path.
  
  Signed-off-by: Rusty Russell ru...@rustcorp.com.au
  ---
   drivers/char/hw_random/core.c | 5 +
   1 file changed, 5 insertions(+)
  
  diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
  index dc9092a1075d..b4a21e9521cf 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex);
   static DEFINE_MUTEX(reading_mutex);
   static int data_avail;
   static u8 *rng_buffer, *rng_fillbuf;
  +static DECLARE_WAIT_QUEUE_HEAD(rng_done);
   static unsigned short current_quality;
   static unsigned short default_quality; /* = 0; default to off */
   
  @@ -98,6 +99,7 @@ static inline void cleanup_rng(struct kref *kref)
   
  if (rng-cleanup)
  rng-cleanup(rng);

rng-cleanup_done = true;

  +   wake_up_all(rng_done);
   }
   
   static void set_current_rng(struct hwrng *rng)
  @@ -529,6 +531,9 @@ void hwrng_unregister(struct hwrng *rng)
  }
   
  mutex_unlock(rng_mutex);
  +
  +   /* Just in case rng is reading right now, wait. */
  +   wait_event(rng_done, atomic_read(rng-ref.refcount) == 0);

Hi Rusty,
 
After initializing (kref_init()), the refcount is 1, so we need one
more kref_put() after each drop_current_rng() to release last
reference count, then cleanup function will be called.


 While it's obviously better than what we have now, I don't believe
 this is 100% safe as the cleanup function might still be running
 even after the ref count hits zero.  Once we return from this function
 the module may be unloaded so we need to ensure that nothing is
 running at this point.

I found wait_event() can still pass and finish unregister even cleanup
function isn't called (wake_up_all() isn't called). So I added a flag
cleanup_done to indicate that the rng device is cleaned up.


+   /* Just in case rng is reading right now, wait. */
+   wait_event(rng_done, rng-cleanup_done 
+  atomic_read(rng-ref.refcount) == 0);

I will post the new v4 later.
 
 Cheers,
 -- 
 Email: Herbert Xu herb...@gondor.apana.org.au
 Home Page: http://gondor.apana.org.au/~herbert/
 PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 2/6] hw_random: move some code out mutex_lock for avoiding underlying deadlock

2014-11-03 Thread Amos Kong

In next patch, we use reference counting for each struct hwrng,
changing reference count also needs to take mutex_lock. Before
releasing the lock, if we try to stop a kthread that waits to
take the lock to reduce the referencing count, deadlock will
occur.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index b1b6042..a0905c8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -474,12 +474,12 @@ void hwrng_unregister(struct hwrng *rng)
}
}
if (list_empty(rng_list)) {
+   mutex_unlock(rng_mutex);
unregister_miscdev();
if (hwrng_fill)
kthread_stop(hwrng_fill);
-   }
-
-   mutex_unlock(rng_mutex);
+   } else
+   mutex_unlock(rng_mutex);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 0/6] fix hw_random stuck

2014-11-03 Thread Amos Kong

When I hotunplug a busy virtio-rng device or try to access
hwrng attributes in non-smp guest, it gets stuck.

My hotplug tests:

| test 0:
|   hotunplug rng device from qemu monitor
|
| test 1:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 2:
|   guest) # dd if=/dev/random of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 4:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   cat /sys/devices/virtual/misc/hw_random/rng_*
|
| test 5:
|   guest) # dd if=/dev/hwrng of=/dev/null
|   cancel dd process after 10 seconds
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 6:
|   use a fifo as rng backend, execute test 0 ~ 5 with no input of fifo


V4: update patch 4 to fix corrupt, decrease last reference for triggering
the cleanup, fix unregister race pointed by Herbert
V3: initialize kref to 1
V2: added patch 2 to fix a deadlock, update current patch 3 to fix reference
counting issue

Amos Kong (1):
  hw_random: move some code out mutex_lock for avoiding underlying
deadlock

Rusty Russell (5):
  hw_random: place mutex around read functions and buffers.
  hw_random: use reference counts on each struct hwrng.
  hw_random: fix unregister race.
  hw_random: don't double-check old_rng.
  hw_random: don't init list element we're about to add to list.

 drivers/char/hw_random/core.c | 176 ++
 include/linux/hw_random.h |   3 +
 2 files changed, 129 insertions(+), 50 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 4/6] hw_random: fix unregister race.

2014-11-03 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

The previous patch added one potential problem: we can still be
reading from a hwrng when it's unregistered.  Add a wait for zero
in the hwrng_unregister path.

v4: add cleanup_done flag to insure that cleanup is done

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 8 
 include/linux/hw_random.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 27ad6b4..c31bf91 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex);
 static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
+static DECLARE_WAIT_QUEUE_HEAD(rng_done);
 static unsigned short current_quality;
 static unsigned short default_quality; /* = 0; default to off */
 
@@ -98,6 +99,8 @@ static inline void cleanup_rng(struct kref *kref)
 
if (rng-cleanup)
rng-cleanup(rng);
+   rng-cleanup_done = true;
+   wake_up_all(rng_done);
 }
 
 static void set_current_rng(struct hwrng *rng)
@@ -536,6 +539,11 @@ void hwrng_unregister(struct hwrng *rng)
kthread_stop(hwrng_fill);
} else
mutex_unlock(rng_mutex);
+
+   /* Just in case rng is reading right now, wait. */
+   wait_event(rng_done, rng-cleanup_done 
+  atomic_read(rng-ref.refcount) == 0);
+
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index c212e71..7832e50 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -46,6 +46,7 @@ struct hwrng {
/* internal. */
struct list_head list;
struct kref ref;
+   bool cleanup_done;
 };
 
 /** Register a new Hardware Random Number Generator driver. */
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 3/6] hw_random: use reference counts on each struct hwrng.

2014-11-03 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

current_rng holds one reference, and we bump it every time we want
to do a read from it.

This means we only hold the rng_mutex to grab or drop a reference,
so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
block on read of /dev/hwrng.

Using a kref is overkill (we're always under the rng_mutex), but
a standard pattern.

This also solves the problem that the hwrng_fillfn thread was
accessing current_rng without a lock, which could change (eg. to NULL)
underneath it.

v4: decrease last reference for triggering the cleanup
v3: initialize kref (thanks Amos Kong)
v2: fix missing put_rng() on exit path (thanks Amos Kong)
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 142 +-
 include/linux/hw_random.h |   2 +
 2 files changed, 101 insertions(+), 43 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a0905c8..27ad6b4 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -42,6 +42,7 @@
 #include linux/delay.h
 #include linux/slab.h
 #include linux/random.h
+#include linux/err.h
 #include asm/uaccess.h
 
 
@@ -91,6 +92,65 @@ static void add_early_randomness(struct hwrng *rng)
add_device_randomness(bytes, bytes_read);
 }
 
+static inline void cleanup_rng(struct kref *kref)
+{
+   struct hwrng *rng = container_of(kref, struct hwrng, ref);
+
+   if (rng-cleanup)
+   rng-cleanup(rng);
+}
+
+static void set_current_rng(struct hwrng *rng)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   kref_get(rng-ref);
+   current_rng = rng;
+}
+
+static void drop_current_rng(void)
+{
+   struct hwrng *rng = current_rng;
+
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   if (!current_rng)
+   return;
+
+   /* release current_rng reference */
+   kref_put(current_rng-ref, cleanup_rng);
+   current_rng = NULL;
+
+   /* decrease last reference for triggering the cleanup */
+   kref_put(rng-ref, cleanup_rng);
+}
+
+/* Returns ERR_PTR(), NULL or refcounted hwrng */
+static struct hwrng *get_current_rng(void)
+{
+   struct hwrng *rng;
+
+   if (mutex_lock_interruptible(rng_mutex))
+   return ERR_PTR(-ERESTARTSYS);
+
+   rng = current_rng;
+   if (rng)
+   kref_get(rng-ref);
+
+   mutex_unlock(rng_mutex);
+   return rng;
+}
+
+static void put_rng(struct hwrng *rng)
+{
+   /*
+* Hold rng_mutex here so we serialize in case they set_current_rng
+* on rng again immediately.
+*/
+   mutex_lock(rng_mutex);
+   if (rng)
+   kref_put(rng-ref, cleanup_rng);
+   mutex_unlock(rng_mutex);
+}
+
 static inline int hwrng_init(struct hwrng *rng)
 {
if (rng-init) {
@@ -110,13 +170,9 @@ static inline int hwrng_init(struct hwrng *rng)
if (current_quality  0  !hwrng_fill)
start_khwrngd();
 
-   return 0;
-}
+   kref_init(rng-ref);
 
-static inline void hwrng_cleanup(struct hwrng *rng)
-{
-   if (rng  rng-cleanup)
-   rng-cleanup(rng);
+   return 0;
 }
 
 static int rng_dev_open(struct inode *inode, struct file *filp)
@@ -154,21 +210,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
ssize_t ret = 0;
int err = 0;
int bytes_read, len;
+   struct hwrng *rng;
 
while (size) {
-   if (mutex_lock_interruptible(rng_mutex)) {
-   err = -ERESTARTSYS;
+   rng = get_current_rng();
+   if (IS_ERR(rng)) {
+   err = PTR_ERR(rng);
goto out;
}
-
-   if (!current_rng) {
+   if (!rng) {
err = -ENODEV;
-   goto out_unlock;
+   goto out;
}
 
mutex_lock(reading_mutex);
if (!data_avail) {
-   bytes_read = rng_get_data(current_rng, rng_buffer,
+   bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
@@ -200,8 +257,8 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
ret += len;
}
 
-   mutex_unlock(rng_mutex);
mutex_unlock(reading_mutex);
+   put_rng(rng);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -213,12 +270,11 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
}
 out:
return ret ? : err;
-out_unlock:
-   mutex_unlock(rng_mutex);
-   goto out;
+
 out_unlock_reading:
mutex_unlock(reading_mutex

[PATCH v4 5/6] hw_random: don't double-check old_rng.

2014-11-03 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index c31bf91..fc5de7d 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -480,14 +480,13 @@ int hwrng_register(struct hwrng *rng)
}
 
old_rng = current_rng;
+   err = 0;
if (!old_rng) {
err = hwrng_init(rng);
if (err)
goto out_unlock;
set_current_rng(rng);
-   }
-   err = 0;
-   if (!old_rng) {
+
err = register_miscdev();
if (err) {
drop_current_rng();
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v4 6/6] hw_random: don't init list element we're about to add to list.

2014-11-03 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Another interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index fc5de7d..b2cc8a1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -493,7 +493,6 @@ int hwrng_register(struct hwrng *rng)
goto out_unlock;
}
}
-   INIT_LIST_HEAD(rng-list);
list_add_tail(rng-list, rng_list);
 
if (old_rng  !rng-init) {
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [3.16 stable PATCH 0/2] virtio-rng: two backports to fix stuck

2014-11-03 Thread Amos Kong

On Sat, Oct 11, 2014 at 06:51:47AM +0800, Amos Kong wrote:
 I received two mails about faile to apply patches to 3.16-stable tree:
 
 FAILED: patch [PATCH] virtio-rng: skip reading when we start to remove the 
 device failed to apply to 3.16-stable tree
 FAILED: patch [PATCH] virtio-rng: fix stuck of hot-unplugging busy device 
 failed to apply to 3.16-stable tree
 
 Amit already backported two patches for 3.16-stable, then cherry-pick
 of my two patches works.
 
 Thanks.

Ping Greg, thanks.
 
 Amos Kong (2):
   virtio-rng: fix stuck of hot-unplugging busy device
   virtio-rng: skip reading when we start to remove the device
 
  drivers/char/hw_random/virtio-rng.c | 7 +++
  1 file changed, 7 insertions(+)
 
 -- 
 1.9.3
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 4/6] hw_random: fix unregister race.

2014-11-02 Thread Amos Kong

On Fri, Oct 31, 2014 at 03:23:21PM +0800, Herbert Xu wrote:
 On Fri, Oct 31, 2014 at 10:28:00AM +1030, Rusty Russell wrote:
  Herbert Xu herb...@gondor.apana.org.au writes:
   On Thu, Sep 18, 2014 at 08:37:45PM +0800, Amos Kong wrote:
   From: Rusty Russell ru...@rustcorp.com.au
   
   The previous patch added one potential problem: we can still be
   reading from a hwrng when it's unregistered.  Add a wait for zero
   in the hwrng_unregister path.
   
   Signed-off-by: Rusty Russell ru...@rustcorp.com.au
  
   You totally corrupted Rusty's patch.  If you're going to repost
   his series you better make sure that you've got the right patches.
  
   Just as well though as it made me think a little more about this
   patch :)
  
  OK Amos, can you please repost the complete series?
 
 Please fix the unregister race I identified first.

Ok, I redownload the patches from 
https://patchwork.kernel.org/project/LKML/list/?submitter=5
and rebase fixes of me and rusty on it. I will post a V4 later. Thanks.

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 4/6] hw_random: fix unregister race.

2014-11-02 Thread Amos Kong

On Sun, Nov 02, 2014 at 11:08:09PM +0800, Herbert Xu wrote:
 On Sun, Nov 02, 2014 at 11:06:13PM +0800, Amos Kong wrote:
  On Fri, Oct 31, 2014 at 03:23:21PM +0800, Herbert Xu wrote:
   On Fri, Oct 31, 2014 at 10:28:00AM +1030, Rusty Russell wrote:
Herbert Xu herb...@gondor.apana.org.au writes:
 On Thu, Sep 18, 2014 at 08:37:45PM +0800, Amos Kong wrote:
 From: Rusty Russell ru...@rustcorp.com.au
 
 The previous patch added one potential problem: we can still be
 reading from a hwrng when it's unregistered.  Add a wait for zero
 in the hwrng_unregister path.
 
 Signed-off-by: Rusty Russell ru...@rustcorp.com.au

 You totally corrupted Rusty's patch.  If you're going to repost
 his series you better make sure that you've got the right patches.

 Just as well though as it made me think a little more about this
 patch :)

OK Amos, can you please repost the complete series?
   
   Please fix the unregister race I identified first.
  
  Ok, I redownload the patches from 
  https://patchwork.kernel.org/project/LKML/list/?submitter=5
  and rebase fixes of me and rusty on it. I will post a V4 later. Thanks.
 
 Please fix the unregister race I pointed out before doing a v4.

Thanks for the remind, I got it right now  :-)

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 3/6] hw_random: use reference counts on each struct hwrng.

2014-10-19 Thread Amos Kong

On Mon, Oct 20, 2014 at 10:42:11AM +1030, Rusty Russell wrote:
 Amos Kong ak...@redhat.com writes:
  We got a warning in boot stage when above set_current_rng() is executed,
  it can be fixed by init rng-ref in hwrng_init().
 
 
  @@ -166,6 +169,8 @@ static inline int hwrng_init(struct hwrng *rng)
  if (current_quality  0  !hwrng_fill)
  start_khwrngd();
   
  +   kref_init(rng-ref);
  +
  return 0;
   }
 
 OK, I folded this fix on.

Thanks.

Reviewed-by: Amos Kong ak...@redhat.com
 
 Thanks,
 Rusty.
 
 hw_random: use reference counts on each struct hwrng.
 
 current_rng holds one reference, and we bump it every time we want
 to do a read from it.
 
 This means we only hold the rng_mutex to grab or drop a reference,
 so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
 block on read of /dev/hwrng.
 
 Using a kref is overkill (we're always under the rng_mutex), but
 a standard pattern.
 
 This also solves the problem that the hwrng_fillfn thread was
 accessing current_rng without a lock, which could change (eg. to NULL)
 underneath it.
 
 v3: initialize kref (thanks Amos Kong)
 v2: fix missing put_rng() on exit path (thanks Amos Kong)
 Signed-off-by: Rusty Russell ru...@rustcorp.com.au
 
 diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
 index a0905c818e13..0ecac38da954 100644
 --- a/drivers/char/hw_random/core.c
 +++ b/drivers/char/hw_random/core.c

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[3.16 stable PATCH 2/2] virtio-rng: skip reading when we start to remove the device

2014-10-10 Thread Amos Kong

Before we really unregister the hwrng device, reading will get stuck if
the virtio device is reset. We should return error for reading when we
start to remove the device.

Signed-off-by: Amos Kong ak...@redhat.com
Reviewed-by: Amit Shah amit.s...@redhat.com
Cc: sta...@vger.kernel.org
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
(cherry picked from commit f49819560f53b7f3a596a8ea2e6764dc86695b62)
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/virtio-rng.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index b50252c..cb1688a 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -37,6 +37,7 @@ struct virtrng_info {
char name[25];
int index;
bool hwrng_register_done;
+   bool hwrng_removed;
 };
 
 
@@ -69,6 +70,9 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t 
size, bool wait)
int ret;
struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
+   if (vi-hwrng_removed)
+   return -ENODEV;
+
if (!vi-busy) {
vi-busy = true;
init_completion(vi-have_data);
@@ -137,6 +141,7 @@ static void remove_common(struct virtio_device *vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-hwrng_removed = true;
vi-data_avail = 0;
complete(vi-have_data);
vdev-config-reset(vdev);
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[3.16 stable PATCH 1/2] virtio-rng: fix stuck of hot-unplugging busy device

2014-10-10 Thread Amos Kong

When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch exits the waiting by completing have_data completion before
unregistering, resets data_avail to avoid the hwrng core use wrong
buffer bytes.

Signed-off-by: Amos Kong ak...@redhat.com
Reviewed-by: Amit Shah amit.s...@redhat.com
Cc: sta...@vger.kernel.org
Signed-off-by: Rusty Russell ru...@rustcorp.com.au
(cherry picked from commit 3856e548372513665670ca5db60d9a74b970fe0d)
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/virtio-rng.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index f1aa13b..b50252c 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -137,6 +137,8 @@ static void remove_common(struct virtio_device *vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-data_avail = 0;
+   complete(vi-have_data);
vdev-config-reset(vdev);
vi-busy = false;
if (vi-hwrng_register_done)
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[3.16 stable PATCH 0/2] virtio-rng: two backports to fix stuck

2014-10-10 Thread Amos Kong

I received two mails about faile to apply patches to 3.16-stable tree:

FAILED: patch [PATCH] virtio-rng: skip reading when we start to remove the 
device failed to apply to 3.16-stable tree
FAILED: patch [PATCH] virtio-rng: fix stuck of hot-unplugging busy device 
failed to apply to 3.16-stable tree

Amit already backported two patches for 3.16-stable, then cherry-pick
of my two patches works.

Thanks.

Amos Kong (2):
  virtio-rng: fix stuck of hot-unplugging busy device
  virtio-rng: skip reading when we start to remove the device

 drivers/char/hw_random/virtio-rng.c | 7 +++
 1 file changed, 7 insertions(+)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/5] hw_random: use reference counts on each struct hwrng.

2014-09-18 Thread Amos Kong

On Thu, Sep 18, 2014 at 12:18:23PM +0930, Rusty Russell wrote:
 current_rng holds one reference, and we bump it every time we want
 to do a read from it.
 
 This means we only hold the rng_mutex to grab or drop a reference,
 so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
 block on read of /dev/hwrng.
 
 Using a kref is overkill (we're always under the rng_mutex), but
 a standard pattern.
 
 This also solves the problem that the hwrng_fillfn thread was
 accessing current_rng without a lock, which could change (eg. to NULL)
 underneath it.

Hi Rusty,
 
 Signed-off-by: Rusty Russell ru...@rustcorp.com.au
 ---
  drivers/char/hw_random/core.c | 135 
 --
  include/linux/hw_random.h |   2 +
  2 files changed, 94 insertions(+), 43 deletions(-)

...

  static int rng_dev_open(struct inode *inode, struct file *filp)
  {
   /* enforce read-only access to this chrdev */
 @@ -154,21 +202,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
   ssize_t ret = 0;
   int err = 0;
   int bytes_read, len;
 + struct hwrng *rng;
  
   while (size) {
 - if (mutex_lock_interruptible(rng_mutex)) {
 - err = -ERESTARTSYS;
 + rng = get_current_rng();
 + if (IS_ERR(rng)) {
 + err = PTR_ERR(rng);
   goto out;
   }
 -
 - if (!current_rng) {
 + if (!rng) {
   err = -ENODEV;
 - goto out_unlock;
 + goto out;
   }
  
   mutex_lock(reading_mutex);
   if (!data_avail) {
 - bytes_read = rng_get_data(current_rng, rng_buffer,
 + bytes_read = rng_get_data(rng, rng_buffer,
   rng_buffer_size(),
   !(filp-f_flags  O_NONBLOCK));
   if (bytes_read  0) {
 @@ -200,7 +249,6 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
   ret += len;
   }
  
 - mutex_unlock(rng_mutex);
   mutex_unlock(reading_mutex);
  
   if (need_resched())
 @@ -210,15 +258,16 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
   err = -ERESTARTSYS;

We need put_rng() in this error path. Otherwise, unhotplug will hang
in the end of hwrng_unregister()

|/* Just in case rng is reading right now, wait. */
|wait_event(rng_done, atomic_read(rng-ref.refcount) == 0);

Steps to reproduce the hang:
  guest) # dd if=/dev/hwrng of=/dev/null 
  cancel dd process after 10 seconds
  guest) # dd if=/dev/hwrng of=/dev/null 
  hotunplug rng device from qemu monitor
  result: device can't be removed (still can find in QEMU monitor)


diff --git a/drivers/char/hw_random/core.c
b/drivers/char/hw_random/core.c
index 96fa067..4e22d70 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -258,6 +258,7 @@ static ssize_t rng_dev_read(struct file *filp,
char __user *buf,
 
if (signal_pending(current)) {
err = -ERESTARTSYS;
+   put_rng(rng);
goto out;
}

   goto out;
   }
 +
 + put_rng(rng);
   }
  out:
   return ret ? : err;
 -out_unlock:
 - mutex_unlock(rng_mutex);
 - goto out;
 +
  out_unlock_reading:
   mutex_unlock(reading_mutex);
 - goto out_unlock;
 + put_rng(rng);
 + goto out;
  }
  
  
 @@ -257,8 +306,8 @@ static ssize_t hwrng_attr_current_store(struct device 
 *dev,
   err = hwrng_init(rng);
   if (err)
   break;
 - hwrng_cleanup(current_rng);
 - current_rng = rng;
 + drop_current_rng();
 + set_current_rng(rng);
   err = 0;
   break;
   }
 @@ -272,17 +321,15 @@ static ssize_t hwrng_attr_current_show(struct device 
 *dev,
  struct device_attribute *attr,
  char *buf)
  {
 - int err;
   ssize_t ret;
 - const char *name = none;
 + struct hwrng *rng;
  
 - err = mutex_lock_interruptible(rng_mutex);
 - if (err)
 - return -ERESTARTSYS;
 - if (current_rng)
 - name = current_rng-name;
 - ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 - mutex_unlock(rng_mutex);
 + rng = get_current_rng();
 + if (IS_ERR(rng))
 + return PTR_ERR(rng);
 +
 + ret = snprintf(buf, PAGE_SIZE, %s\n, rng ? rng-name : none);
 + put_rng(rng);
  
   return ret;
  }
 @@ -357,12 +404,16 @@ static int hwrng_fillfn(void *unused)
   long rc;
  
   while (!kthread_should_stop()) {
 -

[PATCH v2 1/6] hw_random: place mutex around read functions and buffers.

2014-09-18 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

There's currently a big lock around everything, and it means that we
can't query sysfs (eg /sys/devices/virtual/misc/hw_random/rng_current)
while the rng is reading.  This is a real problem when the rng is slow,
or blocked (eg. virtio_rng with qemu's default /dev/random backend)

This doesn't help (it leaves the current lock untouched), just adds a
lock to protect the read function and the static buffers, in preparation
for transition.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..b1b6042 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -53,7 +53,10 @@
 static struct hwrng *current_rng;
 static struct task_struct *hwrng_fill;
 static LIST_HEAD(rng_list);
+/* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
+/* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */
+static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
 static unsigned short current_quality;
@@ -81,7 +84,9 @@ static void add_early_randomness(struct hwrng *rng)
unsigned char bytes[16];
int bytes_read;
 
+   mutex_lock(reading_mutex);
bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+   mutex_unlock(reading_mutex);
if (bytes_read  0)
add_device_randomness(bytes, bytes_read);
 }
@@ -128,6 +133,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 
*buffer, size_t size,
int wait) {
int present;
 
+   BUG_ON(!mutex_is_locked(reading_mutex));
if (rng-read)
return rng-read(rng, (void *)buffer, size, wait);
 
@@ -160,13 +166,14 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
goto out_unlock;
}
 
+   mutex_lock(reading_mutex);
if (!data_avail) {
bytes_read = rng_get_data(current_rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
err = bytes_read;
-   goto out_unlock;
+   goto out_unlock_reading;
}
data_avail = bytes_read;
}
@@ -174,7 +181,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (!data_avail) {
if (filp-f_flags  O_NONBLOCK) {
err = -EAGAIN;
-   goto out_unlock;
+   goto out_unlock_reading;
}
} else {
len = data_avail;
@@ -186,7 +193,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
if (copy_to_user(buf + ret, rng_buffer + data_avail,
len)) {
err = -EFAULT;
-   goto out_unlock;
+   goto out_unlock_reading;
}
 
size -= len;
@@ -194,6 +201,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
}
 
mutex_unlock(rng_mutex);
+   mutex_unlock(reading_mutex);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -208,6 +216,9 @@ out:
 out_unlock:
mutex_unlock(rng_mutex);
goto out;
+out_unlock_reading:
+   mutex_unlock(reading_mutex);
+   goto out_unlock;
 }
 
 
@@ -348,13 +359,16 @@ static int hwrng_fillfn(void *unused)
while (!kthread_should_stop()) {
if (!current_rng)
break;
+   mutex_lock(reading_mutex);
rc = rng_get_data(current_rng, rng_fillbuf,
  rng_buffer_size(), 1);
+   mutex_unlock(reading_mutex);
if (rc = 0) {
pr_warn(hwrng: no data available\n);
msleep_interruptible(1);
continue;
}
+   /* Outside lock, sure, but y'know: randomness. */
add_hwgenerator_randomness((void *)rng_fillbuf, rc,
   rc * current_quality * 8  10);
}
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 0/6] fix hw_random stuck

2014-09-18 Thread Amos Kong

When I hotunplug a busy virtio-rng device or try to access
hwrng attributes in non-smp guest, it gets stuck.

My original was pain, Rusty posted a real fix. This patchset
fixed two issue in v1, and tested by my 6+ cases.


| test 0:
|   hotunplug rng device from qemu monitor
|
| test 1:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 2:
|   guest) # dd if=/dev/random of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 4:
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   cat /sys/devices/virtual/misc/hw_random/rng_*
|
| test 5:
|   guest) # dd if=/dev/hwrng of=/dev/null
|   cancel dd process after 10 seconds
|   guest) # dd if=/dev/hwrng of=/dev/null 
|   hotunplug rng device from qemu monitor
|
| test 6:
|   use a fifo as rng backend, execute test 0 ~ 5 with no input of fifo

V2: added patch 2 to fix a deadlock, update current patch 3 to fix reference
counting issue

Amos Kong (1):
  hw_random: move some code out mutex_lock for avoiding underlying
deadlock

Rusty Russell (5):
  hw_random: place mutex around read functions and buffers.
  hw_random: use reference counts on each struct hwrng.
  hw_random: fix unregister race.
  hw_random: don't double-check old_rng.
  hw_random: don't init list element we're about to add to list.

 drivers/char/hw_random/core.c | 166 +-
 include/linux/hw_random.h |   2 +
 2 files changed, 117 insertions(+), 51 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 5/6] hw_random: don't double-check old_rng.

2014-09-18 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 9f6f5bb..6420409 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -473,14 +473,13 @@ int hwrng_register(struct hwrng *rng)
}
 
old_rng = current_rng;
+   err = 0;
if (!old_rng) {
err = hwrng_init(rng);
if (err)
goto out_unlock;
set_current_rng(rng);
-   }
-   err = 0;
-   if (!old_rng) {
+
err = register_miscdev();
if (err) {
drop_current_rng();
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 3/6] hw_random: use reference counts on each struct hwrng.

2014-09-18 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

current_rng holds one reference, and we bump it every time we want
to do a read from it.

This means we only hold the rng_mutex to grab or drop a reference,
so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
block on read of /dev/hwrng.

Using a kref is overkill (we're always under the rng_mutex), but
a standard pattern.

This also solves the problem that the hwrng_fillfn thread was
accessing current_rng without a lock, which could change (eg. to NULL)
underneath it.

V2: reduce reference count in signal_pending path

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 136 +-
 include/linux/hw_random.h |   2 +
 2 files changed, 95 insertions(+), 43 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index a0905c8..ee9e504 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -42,6 +42,7 @@
 #include linux/delay.h
 #include linux/slab.h
 #include linux/random.h
+#include linux/err.h
 #include asm/uaccess.h
 
 
@@ -91,6 +92,59 @@ static void add_early_randomness(struct hwrng *rng)
add_device_randomness(bytes, bytes_read);
 }
 
+static inline void cleanup_rng(struct kref *kref)
+{
+   struct hwrng *rng = container_of(kref, struct hwrng, ref);
+
+   if (rng-cleanup)
+   rng-cleanup(rng);
+}
+
+static void set_current_rng(struct hwrng *rng)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   kref_get(rng-ref);
+   current_rng = rng;
+}
+
+static void drop_current_rng(void)
+{
+   BUG_ON(!mutex_is_locked(rng_mutex));
+   if (!current_rng)
+   return;
+
+   kref_put(current_rng-ref, cleanup_rng);
+   current_rng = NULL;
+}
+
+/* Returns ERR_PTR(), NULL or refcounted hwrng */
+static struct hwrng *get_current_rng(void)
+{
+   struct hwrng *rng;
+
+   if (mutex_lock_interruptible(rng_mutex))
+   return ERR_PTR(-ERESTARTSYS);
+
+   rng = current_rng;
+   if (rng)
+   kref_get(rng-ref);
+
+   mutex_unlock(rng_mutex);
+   return rng;
+}
+
+static void put_rng(struct hwrng *rng)
+{
+   /*
+* Hold rng_mutex here so we serialize in case they set_current_rng
+* on rng again immediately.
+*/
+   mutex_lock(rng_mutex);
+   if (rng)
+   kref_put(rng-ref, cleanup_rng);
+   mutex_unlock(rng_mutex);
+}
+
 static inline int hwrng_init(struct hwrng *rng)
 {
if (rng-init) {
@@ -113,12 +167,6 @@ static inline int hwrng_init(struct hwrng *rng)
return 0;
 }
 
-static inline void hwrng_cleanup(struct hwrng *rng)
-{
-   if (rng  rng-cleanup)
-   rng-cleanup(rng);
-}
-
 static int rng_dev_open(struct inode *inode, struct file *filp)
 {
/* enforce read-only access to this chrdev */
@@ -154,21 +202,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
ssize_t ret = 0;
int err = 0;
int bytes_read, len;
+   struct hwrng *rng;
 
while (size) {
-   if (mutex_lock_interruptible(rng_mutex)) {
-   err = -ERESTARTSYS;
+   rng = get_current_rng();
+   if (IS_ERR(rng)) {
+   err = PTR_ERR(rng);
goto out;
}
-
-   if (!current_rng) {
+   if (!rng) {
err = -ENODEV;
-   goto out_unlock;
+   goto out;
}
 
mutex_lock(reading_mutex);
if (!data_avail) {
-   bytes_read = rng_get_data(current_rng, rng_buffer,
+   bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp-f_flags  O_NONBLOCK));
if (bytes_read  0) {
@@ -200,7 +249,6 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
ret += len;
}
 
-   mutex_unlock(rng_mutex);
mutex_unlock(reading_mutex);
 
if (need_resched())
@@ -208,17 +256,19 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
 
if (signal_pending(current)) {
err = -ERESTARTSYS;
+   put_rng(rng);
goto out;
}
+
+   put_rng(rng);
}
 out:
return ret ? : err;
-out_unlock:
-   mutex_unlock(rng_mutex);
-   goto out;
+
 out_unlock_reading:
mutex_unlock(reading_mutex);
-   goto out_unlock;
+   put_rng(rng);
+   goto out;
 }
 
 
@@ -257,8 +307,8 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
err = hwrng_init(rng);
if (err

[PATCH v2 4/6] hw_random: fix unregister race.

2014-09-18 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

The previous patch added one potential problem: we can still be
reading from a hwrng when it's unregistered.  Add a wait for zero
in the hwrng_unregister path.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index ee9e504..9f6f5bb 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -60,6 +60,7 @@ static DEFINE_MUTEX(rng_mutex);
 static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
+static DECLARE_WAIT_QUEUE_HEAD(rng_done);
 static unsigned short current_quality;
 static unsigned short default_quality; /* = 0; default to off */
 
@@ -98,6 +99,7 @@ static inline void cleanup_rng(struct kref *kref)
 
if (rng-cleanup)
rng-cleanup(rng);
+   wake_up_all(rng_done);
 }
 
 static void set_current_rng(struct hwrng *rng)
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 2/6] hw_random: move some code out mutex_lock for avoiding underlying deadlock

2014-09-18 Thread Amos Kong

In next patch, we use reference counting for each struct hwrng,
changing reference count also needs to take mutex_lock. Before
releasing the lock, if we try to stop a kthread that waits to
take the lock to reduce the referencing count, deadlock will
occur.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index b1b6042..a0905c8 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -474,12 +474,12 @@ void hwrng_unregister(struct hwrng *rng)
}
}
if (list_empty(rng_list)) {
+   mutex_unlock(rng_mutex);
unregister_miscdev();
if (hwrng_fill)
kthread_stop(hwrng_fill);
-   }
-
-   mutex_unlock(rng_mutex);
+   } else
+   mutex_unlock(rng_mutex);
 }
 EXPORT_SYMBOL_GPL(hwrng_unregister);
 
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 6/6] hw_random: don't init list element we're about to add to list.

2014-09-18 Thread Amos Kong

From: Rusty Russell ru...@rustcorp.com.au

Another interesting anti-pattern.

Signed-off-by: Rusty Russell ru...@rustcorp.com.au
---
 drivers/char/hw_random/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 6420409..12187dd 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -486,7 +486,6 @@ int hwrng_register(struct hwrng *rng)
goto out_unlock;
}
}
-   INIT_LIST_HEAD(rng-list);
list_add_tail(rng-list, rng_list);
 
if (old_rng  !rng-init) {
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 3/6] hw_random: use reference counts on each struct hwrng.

2014-09-18 Thread Amos Kong

On Thu, Sep 18, 2014 at 08:37:44PM +0800, Amos Kong wrote:
 From: Rusty Russell ru...@rustcorp.com.au
 
 current_rng holds one reference, and we bump it every time we want
 to do a read from it.
 
 This means we only hold the rng_mutex to grab or drop a reference,
 so accessing /sys/devices/virtual/misc/hw_random/rng_current doesn't
 block on read of /dev/hwrng.
 
 Using a kref is overkill (we're always under the rng_mutex), but
 a standard pattern.
 
 This also solves the problem that the hwrng_fillfn thread was
 accessing current_rng without a lock, which could change (eg. to NULL)
 underneath it.
 
 V2: reduce reference count in signal_pending path
 
 Signed-off-by: Rusty Russell ru...@rustcorp.com.au
 Signed-off-by: Amos Kong ak...@redhat.com

Reply myself.

 ---
  drivers/char/hw_random/core.c | 136 
 +-
  include/linux/hw_random.h |   2 +
  2 files changed, 95 insertions(+), 43 deletions(-)
 
 diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
 index a0905c8..ee9e504 100644
 --- a/drivers/char/hw_random/core.c
 +++ b/drivers/char/hw_random/core.c
 @@ -42,6 +42,7 @@
  #include linux/delay.h
  #include linux/slab.h
  #include linux/random.h
 +#include linux/err.h
  #include asm/uaccess.h
  
  
 @@ -91,6 +92,59 @@ static void add_early_randomness(struct hwrng *rng)
   add_device_randomness(bytes, bytes_read);
  }
  
 +static inline void cleanup_rng(struct kref *kref)
 +{
 + struct hwrng *rng = container_of(kref, struct hwrng, ref);
 +
 + if (rng-cleanup)
 + rng-cleanup(rng);
 +}
 +
 +static void set_current_rng(struct hwrng *rng)
 +{
 + BUG_ON(!mutex_is_locked(rng_mutex));
 + kref_get(rng-ref);
 + current_rng = rng;
 +}
 +
 +static void drop_current_rng(void)
 +{
 + BUG_ON(!mutex_is_locked(rng_mutex));
 + if (!current_rng)
 + return;
 +
 + kref_put(current_rng-ref, cleanup_rng);
 + current_rng = NULL;
 +}
 +
 +/* Returns ERR_PTR(), NULL or refcounted hwrng */
 +static struct hwrng *get_current_rng(void)
 +{
 + struct hwrng *rng;
 +
 + if (mutex_lock_interruptible(rng_mutex))
 + return ERR_PTR(-ERESTARTSYS);
 +
 + rng = current_rng;
 + if (rng)
 + kref_get(rng-ref);
 +
 + mutex_unlock(rng_mutex);
 + return rng;
 +}
 +
 +static void put_rng(struct hwrng *rng)
 +{
 + /*
 +  * Hold rng_mutex here so we serialize in case they set_current_rng
 +  * on rng again immediately.
 +  */
 + mutex_lock(rng_mutex);
 + if (rng)
 + kref_put(rng-ref, cleanup_rng);
 + mutex_unlock(rng_mutex);
 +}
 +
  static inline int hwrng_init(struct hwrng *rng)
  {
   if (rng-init) {
 @@ -113,12 +167,6 @@ static inline int hwrng_init(struct hwrng *rng)
   return 0;
  }
  
 -static inline void hwrng_cleanup(struct hwrng *rng)
 -{
 - if (rng  rng-cleanup)
 - rng-cleanup(rng);
 -}
 -
  static int rng_dev_open(struct inode *inode, struct file *filp)
  {
   /* enforce read-only access to this chrdev */
 @@ -154,21 +202,22 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
   ssize_t ret = 0;
   int err = 0;
   int bytes_read, len;
 + struct hwrng *rng;
  
   while (size) {
 - if (mutex_lock_interruptible(rng_mutex)) {
 - err = -ERESTARTSYS;
 + rng = get_current_rng();
 + if (IS_ERR(rng)) {
 + err = PTR_ERR(rng);
   goto out;
   }
 -
 - if (!current_rng) {
 + if (!rng) {
   err = -ENODEV;
 - goto out_unlock;
 + goto out;
   }
  
   mutex_lock(reading_mutex);
   if (!data_avail) {
 - bytes_read = rng_get_data(current_rng, rng_buffer,
 + bytes_read = rng_get_data(rng, rng_buffer,
   rng_buffer_size(),
   !(filp-f_flags  O_NONBLOCK));
   if (bytes_read  0) {
 @@ -200,7 +249,6 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
   ret += len;
   }
  
 - mutex_unlock(rng_mutex);
   mutex_unlock(reading_mutex);
  
   if (need_resched())
 @@ -208,17 +256,19 @@ static ssize_t rng_dev_read(struct file *filp, char 
 __user *buf,
  
   if (signal_pending(current)) {
   err = -ERESTARTSYS;
 + put_rng(rng);
   goto out;
   }
 +
 + put_rng(rng);
   }
  out:
   return ret ? : err;
 -out_unlock:
 - mutex_unlock(rng_mutex);
 - goto out;
 +
  out_unlock_reading:
   mutex_unlock(reading_mutex);
 - goto out_unlock;
 + put_rng(rng);
 + goto out;
  }
  
  
 @@ -257,8 +307,8 @@ static ssize_t hwrng_attr_current_store(struct

[PATCH v2 1/3] virtio-rng cleanup: move some code out of mutex protection

2014-09-15 Thread Amos Kong

It doesn't save too much cpu time as expected, just a cleanup.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..c591d7e 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -270,8 +270,8 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
return -ERESTARTSYS;
if (current_rng)
name = current_rng-name;
-   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
mutex_unlock(rng_mutex);
+   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
return ret;
 }
@@ -284,19 +284,19 @@ static ssize_t hwrng_attr_available_show(struct device 
*dev,
ssize_t ret = 0;
struct hwrng *rng;
 
+   buf[0] = '\0';
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   buf[0] = '\0';
list_for_each_entry(rng, rng_list, list) {
strncat(buf, rng-name, PAGE_SIZE - ret - 1);
ret += strlen(rng-name);
strncat(buf,  , PAGE_SIZE - ret - 1);
ret++;
}
+   mutex_unlock(rng_mutex);
strncat(buf, \n, PAGE_SIZE - ret - 1);
ret++;
-   mutex_unlock(rng_mutex);
 
return ret;
 }
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 0/3] fix stuck in accessing hwrng attributes

2014-09-15 Thread Amos Kong

If we read hwrng by long-running dd process, it takes too much cpu
time and almost hold the mutex lock. When we check hwrng attributes
from sysfs by cat, it gets stuck in waiting the lock releaseing.
The problem can only be reproduced with non-smp guest with slow backend.

This patchset resolves the issue by changing rng_dev_read() to always
schedule 10 jiffies after release mutex lock, then cat process can
have chance to get the lock and execute protected code without stuck.

Thanks.

V2: update commitlog to describe PATCH 2, split second patch.

Amos Kong (3):
  virtio-rng cleanup: move some code out of mutex protection
  hw_random: fix stuck in catting hwrng attributes
  hw_random: increase schedule timeout in rng_dev_read()

 drivers/char/hw_random/core.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 2/3] hw_random: fix stuck in catting hwrng attributes

2014-09-15 Thread Amos Kong

I started a QEMU (non-smp) guest with one virtio-rng device, and read
random data from /dev/hwrng by dd:

 # dd if=/dev/hwrng of=/dev/null 

In the same time, if I check hwrng attributes from sysfs by cat:

 # cat /sys/class/misc/hw_random/rng_*

The cat process always gets stuck with slow backend (5 k/s), if we
use a quick backend (1.2 M/s), the cat process will cost 1 to 2
minutes. The stuck doesn't exist for smp guest.

Reading syscall enters kernel and call rng_dev_read(), it's user
context. We used need_resched() to check if other tasks need to
be run, but it almost always return false, and re-hold the mutex
lock. The attributes accessing process always fails to hold the
lock, so the cat gets stuck.

User context doesn't allow other user contexts run on that CPU,
unless the kernel code sleeps for some reason. This is why the
need_reshed() always return false here.

This patch removed need_resched() and always schedule other tasks
then other tasks can have chance to hold the lock and execute
protected code.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index c591d7e..263a370 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
 
mutex_unlock(rng_mutex);
 
-   if (need_resched())
-   schedule_timeout_interruptible(1);
+   schedule_timeout_interruptible(1);
 
if (signal_pending(current)) {
err = -ERESTARTSYS;
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2 3/3] hw_random: increase schedule timeout in rng_dev_read()

2014-09-15 Thread Amos Kong

This patch increases the schedule timeout to 10 jiffies, it's more
appropriate, then other takes can easy to hold the mutex lock.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 263a370..b5d1b6f 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -195,7 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
 
mutex_unlock(rng_mutex);
 
-   schedule_timeout_interruptible(1);
+   schedule_timeout_interruptible(10);
 
if (signal_pending(current)) {
err = -ERESTARTSYS;
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 3/3] hw_random: increase schedule timeout in rng_dev_read()

2014-09-15 Thread Amos Kong

On Mon, Sep 15, 2014 at 06:13:31PM +0200, Michael Büsch wrote:
 On Tue, 16 Sep 2014 00:02:29 +0800
 Amos Kong ak...@redhat.com wrote:
 
  This patch increases the schedule timeout to 10 jiffies, it's more
  appropriate, then other takes can easy to hold the mutex lock.
  
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/char/hw_random/core.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
  
  diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
  index 263a370..b5d1b6f 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -195,7 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
  __user *buf,
   
  mutex_unlock(rng_mutex);
   
  -   schedule_timeout_interruptible(1);
  +   schedule_timeout_interruptible(10);
   
  if (signal_pending(current)) {
  err = -ERESTARTSYS;
 
 Does a schedule of 1 ms or 10 ms decrease the throughput?

In my test environment, 1 jiffe always works (100%), as suggested by
Amit 10 jiffes is more appropriate.

After applied current 3 patches, there is a throughput regression.

  1.2 M/s - 6 K/s

We can only schedule in the end of loop (size == 0), and only for
non-smp guest. So smp guest won't be effected.

|   if (!size  num_online_cpus() == 1)
|   schedule_timeout_interruptible(timeout);


Set timeout to 1:
  non-smp guest with quick backend (1.2M/s) - about 49K/s)

Set timeout to 10:
  non-smp guest with quick backend (1.2M/s) - about 490K/s)

We might need other benchmark to test the performance, but we can
see the bug clearly caused a regression.

As we discussed in other thread, need_resched() should work in this
case, so those patches might be wrong fixing.

 I think we need some benchmarks.
 
 -- 
 Michael



-- 
Amos.


pgpEioLyquXyk.pgp
Description: PGP signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 1/3] virtio-rng cleanup: move some code out of mutex protection

2014-09-15 Thread Amos Kong

On Mon, Sep 15, 2014 at 06:13:20PM +0200, Michael Büsch wrote:
 On Tue, 16 Sep 2014 00:02:27 +0800
 Amos Kong ak...@redhat.com wrote:
 
  It doesn't save too much cpu time as expected, just a cleanup.
  
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/char/hw_random/core.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)
  
  diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
  index aa30a25..c591d7e 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -270,8 +270,8 @@ static ssize_t hwrng_attr_current_show(struct device 
  *dev,
  return -ERESTARTSYS;
  if (current_rng)
  name = current_rng-name;
  -   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
  mutex_unlock(rng_mutex);
  +   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
 I'm not sure this is safe.
 Name is just a pointer.
 What if the hwrng gets unregistered after unlock and just before the snprintf?

Oh, it points to protected current_rng-name, I will drop this
cleanup. Thanks.
 
  return ret;
   }
  @@ -284,19 +284,19 @@ static ssize_t hwrng_attr_available_show(struct 
  device *dev,
  ssize_t ret = 0;
  struct hwrng *rng;
   
  +   buf[0] = '\0';
  err = mutex_lock_interruptible(rng_mutex);
  if (err)
  return -ERESTARTSYS;
  -   buf[0] = '\0';
  list_for_each_entry(rng, rng_list, list) {
  strncat(buf, rng-name, PAGE_SIZE - ret - 1);
  ret += strlen(rng-name);
  strncat(buf,  , PAGE_SIZE - ret - 1);
  ret++;
  }
  +   mutex_unlock(rng_mutex);
  strncat(buf, \n, PAGE_SIZE - ret - 1);
  ret++;
  -   mutex_unlock(rng_mutex);
   
  return ret;
   }
 
 This looks ok.
 
 -- 
 Michael

-- 
Amos.


pgpLebQhrkTiH.pgp
Description: PGP signature
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-15 Thread Amos Kong

CC linux-kernel

Original thread: 
http://comments.gmane.org/gmane.linux.kernel.virtualization/22775

On Mon, Sep 15, 2014 at 06:48:46PM +0200, Radim Krčmář wrote:
 2014-09-14 10:25+0800, Amos Kong:
  On Sun, Sep 14, 2014 at 09:12:08AM +0800, Amos Kong wrote:
  
  ...
  diff --git a/drivers/char/hw_random/core.c 
  b/drivers/char/hw_random/core.c
  index c591d7e..b5d1b6f 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, 
  char __user *buf,
   
  mutex_unlock(rng_mutex);
   
  -   if (need_resched())
  -   schedule_timeout_interruptible(1);
  +   schedule_timeout_interruptible(10);
 
 If cond_resched() does not work, it is a bug elsewehere.

Thanks for your reply, Jason also told me the TIF_NEED_RESCHED should
be set in this case, then need_resched() returns true.

I will investigate the issue and reply you later.
 
  Problem only occurred in non-smp guest, we can improve it to:
  
  if(!is_smp())
  schedule_timeout_interruptible(10);
  
  is_smp() is only available for arm arch, we need a general one.
 
 (It is num_online_cpus()  1.)

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-13 Thread Amos Kong

On Thu, Sep 11, 2014 at 09:08:03PM +0930, Rusty Russell wrote:
 Amos Kong ak...@redhat.com writes:
  When I check hwrng attributes in sysfs, cat process always gets
  stuck if guest has only 1 vcpu and uses a slow rng backend.
 
  Currently we check if there is any tasks waiting to be run on
  current cpu in rng_dev_read() by need_resched(). But need_resched()
  doesn't work because rng_dev_read() is executing in user context.
 
 I don't understand this explanation?  I'd expect the sysfs process to be
 woken by the mutex_unlock().

But actually sysfs process's not woken always, this is they the
process gets stuck.
 
 If we're really high priority (vs. the sysfs process) then I can see why
 we'd need schedule_timeout_interruptible() instead of just schedule(),
 and in that case, need_resched() would be false too.
 
 You could argue that's intended behaviour, but I can't see how it
 happens in the normal case anyway.
 
 What am I missing?
 
 Thanks,
 Rusty.
 
  This patch removed need_resched() and increase delay to 10 jiffies,
  then other tasks can have chance to execute protected code.
  Delaying 1 jiffy also works, but 10 jiffies is safer.
 
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/char/hw_random/core.c | 3 +--
   1 file changed, 1 insertion(+), 2 deletions(-)
 
  diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
  index c591d7e..b5d1b6f 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
  __user *buf,
   
  mutex_unlock(rng_mutex);
   
  -   if (need_resched())
  -   schedule_timeout_interruptible(1);
  +   schedule_timeout_interruptible(10);
   
  if (signal_pending(current)) {
  err = -ERESTARTSYS;
  -- 
  1.9.3

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-13 Thread Amos Kong

On Sun, Sep 14, 2014 at 01:12:58AM +0800, Amos Kong wrote:
 On Thu, Sep 11, 2014 at 09:08:03PM +0930, Rusty Russell wrote:
  Amos Kong ak...@redhat.com writes:
   When I check hwrng attributes in sysfs, cat process always gets
   stuck if guest has only 1 vcpu and uses a slow rng backend.
  
   Currently we check if there is any tasks waiting to be run on
   current cpu in rng_dev_read() by need_resched(). But need_resched()
   doesn't work because rng_dev_read() is executing in user context.
  
  I don't understand this explanation?  I'd expect the sysfs process to be
  woken by the mutex_unlock().
 
 But actually sysfs process's not woken always, this is they the
 process gets stuck.

%s/they/why/

Hi Rusty,


Reference:
http://www.linuxgrill.com/anonymous/fire/netfilter/kernel-hacking-HOWTO-2.html

read() syscall of /dev/hwrng will enter into kernel, the read operation is
rng_dev_read(), it's userspace context (not interrupt context).

Userspace context doesn't allow other user contexts run on that CPU,
unless the kernel code sleeps for some reason.


In this case, the need_resched() doesn't work.

My solution is removing need_resched() and use an appropriate delay by 
schedule_timeout_interruptible(10).

Thanks, Amos
  
  If we're really high priority (vs. the sysfs process) then I can see why
  we'd need schedule_timeout_interruptible() instead of just schedule(),
  and in that case, need_resched() would be false too.
  
  You could argue that's intended behaviour, but I can't see how it
  happens in the normal case anyway.
  
  What am I missing?

  Thanks,
  Rusty.
  
   This patch removed need_resched() and increase delay to 10 jiffies,
   then other tasks can have chance to execute protected code.
   Delaying 1 jiffy also works, but 10 jiffies is safer.
  
   Signed-off-by: Amos Kong ak...@redhat.com
   ---
drivers/char/hw_random/core.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
  
   diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
   index c591d7e..b5d1b6f 100644
   --- a/drivers/char/hw_random/core.c
   +++ b/drivers/char/hw_random/core.c
   @@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
   __user *buf,

 mutex_unlock(rng_mutex);

   - if (need_resched())
   - schedule_timeout_interruptible(1);
   + schedule_timeout_interruptible(10);

 if (signal_pending(current)) {
 err = -ERESTARTSYS;
   -- 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-13 Thread Amos Kong

On Thu, Sep 11, 2014 at 11:38:38AM +0530, Amit Shah wrote:
 On (Wed) 10 Sep 2014 [17:07:07], Amos Kong wrote:
  When I check hwrng attributes in sysfs, cat process always gets
  stuck if guest has only 1 vcpu and uses a slow rng backend.
  
  Currently we check if there is any tasks waiting to be run on
  current cpu in rng_dev_read() by need_resched(). But need_resched()
  doesn't work because rng_dev_read() is executing in user context.
  
  This patch removed need_resched() and increase delay to 10 jiffies,
  then other tasks can have chance to execute protected code.
  Delaying 1 jiffy also works, but 10 jiffies is safer.

Hi Amit,
 
 I'd prefer two patches for this one: one to remove the need_resched()
 check, and the other to increase the timeout.

If Rusty agrees with this fix, I will respin to update the commitlog
with clear description and split the patches to 3.

Thanks for the review.
 
 Anyway,
 
 Reviewed-by: Amit Shah amit.s...@redhat.com

--
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-13 Thread Amos Kong

On Sun, Sep 14, 2014 at 09:12:08AM +0800, Amos Kong wrote:

...
diff --git a/drivers/char/hw_random/core.c 
b/drivers/char/hw_random/core.c
index c591d7e..b5d1b6f 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
__user *buf,
 
mutex_unlock(rng_mutex);
 
-   if (need_resched())
-   schedule_timeout_interruptible(1);
+   schedule_timeout_interruptible(10);

Problem only occurred in non-smp guest, we can improve it to:

if(!is_smp())
schedule_timeout_interruptible(10);

is_smp() is only available for arm arch, we need a general one.

 
if (signal_pending(current)) {
err = -ERESTARTSYS;
-- 

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v3 0/2] virtio-rng: fix hotunplug

2014-09-10 Thread Amos Kong

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1121540

When we try to hot-unplugging a busy virtio-rng device, the device
can't be removed. And the reading process in guest gets stuck.

Those two patches fixed this issue by completing have_data completion
and preventing invalid reading.

Thanks for the help of Amit.

Cc: sta...@vger.kernel.org

V2: reset data_avail (Amit)
adjust unregister order
V3: split patch, update commitlog

Amos Kong (2):
  virtio-rng: fix stuck of hot-unplugging busy device
  virtio-rng: skip reading when we start to remove the device

 drivers/char/hw_random/virtio-rng.c | 7 +++
 1 file changed, 7 insertions(+)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v3 2/2] virtio-rng: skip reading when we start to remove the device

2014-09-10 Thread Amos Kong

Before we really unregister the hwrng device, reading will get stuck if
the virtio device is reset. We should return error for reading when we
start to remove the device.

Signed-off-by: Amos Kong ak...@redhat.com
Cc: sta...@vger.kernel.org
---
 drivers/char/hw_random/virtio-rng.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 849b228..132c9cc 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -36,6 +36,7 @@ struct virtrng_info {
int index;
bool busy;
bool hwrng_register_done;
+   bool hwrng_removed;
 };
 
 
@@ -68,6 +69,9 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t 
size, bool wait)
int ret;
struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
+   if (vi-hwrng_removed)
+   return -ENODEV;
+
if (!vi-busy) {
vi-busy = true;
init_completion(vi-have_data);
@@ -137,6 +141,7 @@ static void remove_common(struct virtio_device *vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-hwrng_removed = true;
vi-data_avail = 0;
complete(vi-have_data);
vdev-config-reset(vdev);
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v3 1/2] virtio-rng: fix stuck of hot-unplugging busy device

2014-09-10 Thread Amos Kong

When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch exits the waiting by completing have_data completion before
unregistering, resets data_avail to avoid the hwrng core use wrong
buffer bytes.

Signed-off-by: Amos Kong ak...@redhat.com
Cc: sta...@vger.kernel.org
---
 drivers/char/hw_random/virtio-rng.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 2e3139e..849b228 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -137,6 +137,8 @@ static void remove_common(struct virtio_device *vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-data_avail = 0;
+   complete(vi-have_data);
vdev-config-reset(vdev);
vi-busy = false;
if (vi-hwrng_register_done)
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2] virtio-rng: fix stuck of hot-unplugging busy device

2014-09-10 Thread Amos Kong

On Wed, Sep 10, 2014 at 11:04:54AM +0530, Amit Shah wrote:
 Hi Amos,
 
 On (Tue) 09 Sep 2014 [19:14:02], Amos Kong wrote:
  When we try to hot-remove a busy virtio-rng device from QEMU monitor,
  the device can't be hot-removed. Because virtio-rng driver hangs at
  wait_for_completion_killable().
  
  This patch exits the waiting by completing have_data completion before
  unregistering, resets data_avail to avoid the hwrng core use wrong
  buffer bytes. Before real unregistering we should return -ENODEV for
  reading.
  
  When we hot-unplug the device, dd process in guest will exit.
$ dd if=/dev/hwrng of=/dev/null
dd: error reading ‘/dev/hwrng’: No such device
  
  Signed-off-by: Amos Kong ak...@redhat.com
  Cc: sta...@vger.kernel.org
  ---
  V2: reset data_avail (Amit)
  adjust unregister order
 
 Thanks, this looks good.
 
 Can you please split into two parts, the complete() in one, and the
 hwrng_register_done stuff into another?

I just posted the V3, split to two patches, and updated the commitlog
to describe why we need to return ENODEV for reading.
 
 Thanks,
 
   Amit

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: RFC virtio-rng: fail to read sysfs of a busy device

2014-09-10 Thread Amos Kong

On Wed, Sep 10, 2014 at 11:22:12AM +0530, Amit Shah wrote:
 On (Tue) 09 Sep 2014 [23:23:07], Amos Kong wrote:
  (Resend to fix the subject)
  
  Hi Amit, Rusty
  
  RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1127062
  steps:
  - Read random data by 'dd if=/dev/hwrng of=/dev/null' in guest
  - check sysfs files in the same time, 'cat /sys/class/misc/hw_random/rng_*'
  
  Result: cat process will get stuck, it will return if we kill dd process.
 
 How common is it going to be to have a long-running 'dd' process on
 /dev/hwrng?

Not a common usage, but we have this strict testing.
 
 Also, with the new khwrng thread, reading from /dev/hwrng isn't
 required -- just use /dev/random?

Yes.
 
 (This doesn't mean we shouldn't fix the issue here...)

Completely agree :-)
 
  We have some static variables (eg, current_rng, data_avail, etc) in 
  hw_random/core.c,
  they are protected by rng_mutex. I try to workaround this issue by 
  undelay(100)
  after mutex_unlock() in rng_dev_read(). This gives chance for 
  hwrng_attr_*_show()
  to get mutex.
  
  This patch also contains some cleanup, moving some code out of mutex
  protection.
  
  Do you have some suggestion? Thanks.
  
  
  diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
  index aa30a25..fa69020 100644
  --- a/drivers/char/hw_random/core.c
  +++ b/drivers/char/hw_random/core.c
  @@ -194,6 +194,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
  __user *buf,
  }
   
  mutex_unlock(rng_mutex);
  +   udelay(100);
 
 We have a need_resched() right below.  Why doesn't that work?

need_resched() is giving chance for userspace to 
 
  if (need_resched())

It never success in my debugging.

If we remove this check and always call schedule_timeout_interruptible(1),
problem also disappears.

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..263a370 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp,
char __user *buf,
 
mutex_unlock(rng_mutex);
 
-   if (need_resched())
-   schedule_timeout_interruptible(1);
+   schedule_timeout_interruptible(1);
 
if (signal_pending(current)) {
err = -ERESTARTSYS;

  schedule_timeout_interruptible(1);
  @@ -233,10 +234,10 @@ static ssize_t hwrng_attr_current_store(struct device 
  *dev,
  int err;
  struct hwrng *rng;
 
 The following hunk doesn't work:
 
  +   err = -ENODEV;
  err = mutex_lock_interruptible(rng_mutex);
 
 err is being set to another value in the next line!
 
  if (err)
  return -ERESTARTSYS;
  -   err = -ENODEV;
 
 And all usage of err below now won't have -ENODEV but some other value.

Oops!
 
  list_for_each_entry(rng, rng_list, list) {
  if (strcmp(rng-name, buf) == 0) {
  if (rng == current_rng) {
  @@ -270,8 +271,8 @@ static ssize_t hwrng_attr_current_show(struct device 
  *dev,
  return -ERESTARTSYS;
  if (current_rng)
  name = current_rng-name;
  -   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
  mutex_unlock(rng_mutex);
  +   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
 This looks OK...
 
   
  return ret;
   }
  @@ -284,19 +285,19 @@ static ssize_t hwrng_attr_available_show(struct 
  device *dev,
  ssize_t ret = 0;
  struct hwrng *rng;
   
  +   buf[0] = '\0';
  err = mutex_lock_interruptible(rng_mutex);
  if (err)
  return -ERESTARTSYS;
 
  -   buf[0] = '\0';
  list_for_each_entry(rng, rng_list, list) {
  strncat(buf, rng-name, PAGE_SIZE - ret - 1);
  ret += strlen(rng-name);
  strncat(buf,  , PAGE_SIZE - ret - 1);
  ret++;
  }
  +   mutex_unlock(rng_mutex);
  strncat(buf, \n, PAGE_SIZE - ret - 1);
  ret++;
  -   mutex_unlock(rng_mutex);
 
 But this isn't resulting in savings; the majority of the time is being
 spent in the for loop, and that writes to the buffer.

Right
 
 BTW I don't expect strcat'ing to the buf in each of these scenarios is
 a long operation, so this reworking doesn't strike to me as something
 we should pursue.
 
   Amit

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: RFC virtio-rng: fail to read sysfs of a busy device

2014-09-10 Thread Amos Kong

On Wed, Sep 10, 2014 at 02:49:38PM +0800, Amos Kong wrote:
 On Wed, Sep 10, 2014 at 11:22:12AM +0530, Amit Shah wrote:
  On (Tue) 09 Sep 2014 [23:23:07], Amos Kong wrote:
   (Resend to fix the subject)
   
   Hi Amit, Rusty
   
   RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1127062
   steps:
   - Read random data by 'dd if=/dev/hwrng of=/dev/null' in guest
   - check sysfs files in the same time, 'cat 
   /sys/class/misc/hw_random/rng_*'
   
   Result: cat process will get stuck, it will return if we kill dd process.
  
  How common is it going to be to have a long-running 'dd' process on
  /dev/hwrng?
 
 Not a common usage, but we have this strict testing.

For -smp 1:
 It's easy to reproduce with slow backend (/dev/random). cat can return
 most of time with some delay if we use quick backend (/dev/urandom).

But for -smp 2:
 I didn't touch this problem even with slow backend.
  
  Also, with the new khwrng thread, reading from /dev/hwrng isn't
  required -- just use /dev/random?
 
 Yes.
  
  (This doesn't mean we shouldn't fix the issue here...)
 
 Completely agree :-)
  
   We have some static variables (eg, current_rng, data_avail, etc) in 
   hw_random/core.c,
   they are protected by rng_mutex. I try to workaround this issue by 
   undelay(100)
   after mutex_unlock() in rng_dev_read(). This gives chance for 
   hwrng_attr_*_show()
   to get mutex.
   
   This patch also contains some cleanup, moving some code out of mutex
   protection.
   
   Do you have some suggestion? Thanks.
   
   
   diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
   index aa30a25..fa69020 100644
   --- a/drivers/char/hw_random/core.c
   +++ b/drivers/char/hw_random/core.c
   @@ -194,6 +194,7 @@ static ssize_t rng_dev_read(struct file *filp, char 
   __user *buf,
 }

 mutex_unlock(rng_mutex);
   + udelay(100);
  
  We have a need_resched() right below.  Why doesn't that work?

[smp 1]
Why need_resched() always return zero? what's the original purpose of it ?

  
 if (need_resched())
 
 It never success in my debugging.
 
 If we remove this check and always call schedule_timeout_interruptible(1),
 problem also disappears.
 
 diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
 index aa30a25..263a370 100644
 --- a/drivers/char/hw_random/core.c
 +++ b/drivers/char/hw_random/core.c
 @@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp,
 char __user *buf,
  
 mutex_unlock(rng_mutex);
  
 -   if (need_resched())
 -   schedule_timeout_interruptible(1);
 +   schedule_timeout_interruptible(1);
  
 if (signal_pending(current)) {
 err = -ERESTARTSYS;
 
 schedule_timeout_interruptible(1);
   @@ -233,10 +234,10 @@ static ssize_t hwrng_attr_current_store(struct 
   device *dev,
 int err;
 struct hwrng *rng;
  
  The following hunk doesn't work:
  
   + err = -ENODEV;
 err = mutex_lock_interruptible(rng_mutex);
  
  err is being set to another value in the next line!
  
 if (err)
 return -ERESTARTSYS;
   - err = -ENODEV;
  
  And all usage of err below now won't have -ENODEV but some other value.
 
 Oops!
  
 list_for_each_entry(rng, rng_list, list) {
 if (strcmp(rng-name, buf) == 0) {
 if (rng == current_rng) {
   @@ -270,8 +271,8 @@ static ssize_t hwrng_attr_current_show(struct device 
   *dev,
 return -ERESTARTSYS;
 if (current_rng)
 name = current_rng-name;
   - ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 mutex_unlock(rng_mutex);
   + ret = snprintf(buf, PAGE_SIZE, %s\n, name);
  
  This looks OK...
  

 return ret;
}
   @@ -284,19 +285,19 @@ static ssize_t hwrng_attr_available_show(struct 
   device *dev,
 ssize_t ret = 0;
 struct hwrng *rng;

   + buf[0] = '\0';
 err = mutex_lock_interruptible(rng_mutex);
 if (err)
 return -ERESTARTSYS;
  
   - buf[0] = '\0';
 list_for_each_entry(rng, rng_list, list) {
 strncat(buf, rng-name, PAGE_SIZE - ret - 1);
 ret += strlen(rng-name);
 strncat(buf,  , PAGE_SIZE - ret - 1);
 ret++;
 }
   + mutex_unlock(rng_mutex);
 strncat(buf, \n, PAGE_SIZE - ret - 1);
 ret++;
   - mutex_unlock(rng_mutex);
  
  But this isn't resulting in savings; the majority of the time is being
  spent in the for loop, and that writes to the buffer.
 
 Right
  
  BTW I don't expect strcat'ing to the buf in each of these scenarios is
  a long operation, so this reworking doesn't strike to me as something
  we should pursue.
  
  Amit
 
 -- 
   Amos.

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo

[PATCH 1/2] virtio-rng cleanup: move some code out of mutex protection

2014-09-10 Thread Amos Kong

It doesn't save too much cpu time as expected, just a cleanup.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..c591d7e 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -270,8 +270,8 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
return -ERESTARTSYS;
if (current_rng)
name = current_rng-name;
-   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
mutex_unlock(rng_mutex);
+   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
return ret;
 }
@@ -284,19 +284,19 @@ static ssize_t hwrng_attr_available_show(struct device 
*dev,
ssize_t ret = 0;
struct hwrng *rng;
 
+   buf[0] = '\0';
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   buf[0] = '\0';
list_for_each_entry(rng, rng_list, list) {
strncat(buf, rng-name, PAGE_SIZE - ret - 1);
ret += strlen(rng-name);
strncat(buf,  , PAGE_SIZE - ret - 1);
ret++;
}
+   mutex_unlock(rng_mutex);
strncat(buf, \n, PAGE_SIZE - ret - 1);
ret++;
-   mutex_unlock(rng_mutex);
 
return ret;
 }
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH 2/2] virtio-rng: fix stuck in catting hwrng attributes

2014-09-10 Thread Amos Kong

When I check hwrng attributes in sysfs, cat process always gets
stuck if guest has only 1 vcpu and uses a slow rng backend.

Currently we check if there is any tasks waiting to be run on
current cpu in rng_dev_read() by need_resched(). But need_resched()
doesn't work because rng_dev_read() is executing in user context.

This patch removed need_resched() and increase delay to 10 jiffies,
then other tasks can have chance to execute protected code.
Delaying 1 jiffy also works, but 10 jiffies is safer.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index c591d7e..b5d1b6f 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -195,8 +195,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
 
mutex_unlock(rng_mutex);
 
-   if (need_resched())
-   schedule_timeout_interruptible(1);
+   schedule_timeout_interruptible(10);
 
if (signal_pending(current)) {
err = -ERESTARTSYS;
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH 0/2] fix stuck in catting hwrng attributes

2014-09-10 Thread Amos Kong

If we read hwrng by long-running dd process, it takes too much cpu time.
When we check hwrng attributes from sysfs by cat, it gets stuck.
The problem can only be reproduced with non-smp guest with slow backend.

This patchset changed hwrng core to always delay 10 jiffies, cat process
have chance to execute protected code, the problem is resolved.

Thanks.

Amos Kong (2):
  virtio-rng cleanup: move some code out of mutex protection
  virtio-rng: fix stuck in catting hwrng attributes

 drivers/char/hw_random/core.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio-rng: complete have_data completion in removing device

2014-09-09 Thread Amos Kong

On Mon, Sep 08, 2014 at 11:29:51PM +0800, Amos Kong wrote:
 On Wed, Aug 06, 2014 at 01:55:29PM +0530, Amit Shah wrote:
  On (Wed) 06 Aug 2014 [16:05:41], Amos Kong wrote:
   On Wed, Aug 06, 2014 at 01:35:15AM +0800, Amos Kong wrote:
When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch fixed the hang by completing have_data completion before
unregistering a virtio-rng device.
   
   Hi Amit,
   
   Before applying this patch, it's blocking insider 
   wait_for_completion_killable() 
   Applied this patch, wait_for_completion_killable() returns 0,
   and vi-data_avail becomes 0, then rng_get_date() will return 0.
  
  Thanks for checking this.
  
   Is it expected result?
  

Hi Amit

  I think what will happen is vi-data_avail will be set to whatever it
  was set last.  In case of a previous successful read request, the
  data_avail will be set to whatever number of bytes the host gave.  On
  doing a hot-unplug on the succeeding wait, the value in data_avail
  will be re-used, and the hwrng core will wrongly take some bytes in
  the buffer as input from the host.
  
  So, I think we need to set vi-data_avail = 0; before calling
  wait_event_completion_killable().

I finally understand content, we need to set vi-data_avail to 0
before returning virtio_read(), it might enter wait_event_completion_killable()
when we try to remove the device.

We call complete() in remove_common(), then wait_event_completion_killable()
will exit, but virtio_read() will be re-entered if the device still
isn't unregistered, then re-stuck inside wait_event_completion_killable().

I tested some complex condition (both quick/slow backend), I found
some problem in below two patches.

I will post another fix later. The test result is expected.

1. Hotplug remove virtio-rng0, dd process will exit with an error:
dd: error reading ‘/dev/hwrng’: Operation not permitted
   virtio-rng0 disappear from 'info pci'
 
2. Re-read by dd, hotplug virtio-rng1, dd process exit with same
   error, virtio-rng1 disappear


Thanks, Amos

  
  Amit
 
 In my latest debugging, I found the hang is caused by unexpected reading
 when we started to remove the device.
 
 I have two draft fix, 1) is skip unexpected reading by checking a
 remove flag. 2) is unregistering device at the beginning of
 remove_common(). I think second patch is better if it won't cause
 new problem.
 
 The original patch (complete in remove_common()) is still necessary.
 
 Test results: hotplug issue disappeared (dd process will quit).
 
 
 diff --git a/drivers/char/hw_random/virtio-rng.c
 b/drivers/char/hw_random/virtio-rng.c
 index 2e3139e..028797c 100644
 --- a/drivers/char/hw_random/virtio-rng.c
 +++ b/drivers/char/hw_random/virtio-rng.c
 @@ -35,6 +35,7 @@ struct virtrng_info {
 unsigned int data_avail;
 int index;
 bool busy;
 +bool remove;
 bool hwrng_register_done;
  };
  
 @@ -68,6 +69,9 @@ static int virtio_read(struct hwrng *rng, void *buf,
 size_t size, bool wait)
 int ret;
 struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
  
 +if (vi-remove)
 +   return 0;
 +
 if (!vi-busy) {
 vi-busy = true;
 init_completion(vi-have_data);
 @@ -137,6 +141,8 @@ static void remove_common(struct virtio_device
 *vdev)
  {
 struct virtrng_info *vi = vdev-priv;
  
 +   vi-remove = true;
 +   complete(vi-have_data);
 vdev-config-reset(vdev);
 vi-busy = false;
 if (vi-hwrng_register_done)
 
 
 diff --git a/drivers/char/hw_random/virtio-rng.c
 b/drivers/char/hw_random/virtio-rng.c
 index 2e3139e..9b8c2ce 100644
 --- a/drivers/char/hw_random/virtio-rng.c
 +++ b/drivers/char/hw_random/virtio-rng.c
 @@ -137,10 +137,11 @@ static void remove_common(struct virtio_device
 *vdev)
  {
 struct virtrng_info *vi = vdev-priv;
  
 -   vdev-config-reset(vdev);
 -   vi-busy = false;
 if (vi-hwrng_register_done)
 hwrng_unregister(vi-hwrng);
 +   complete(vi-have_data);
 +   vdev-config-reset(vdev);
 +   vi-busy = false;
 vdev-config-del_vqs(vdev);
 ida_simple_remove(rng_index_ida, vi-index);
 kfree(vi);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v2] virtio-rng: fix stuck of hot-unplugging busy device

2014-09-09 Thread Amos Kong

When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch exits the waiting by completing have_data completion before
unregistering, resets data_avail to avoid the hwrng core use wrong
buffer bytes. Before real unregistering we should return -ENODEV for
reading.

When we hot-unplug the device, dd process in guest will exit.
  $ dd if=/dev/hwrng of=/dev/null
  dd: error reading ‘/dev/hwrng’: No such device

Signed-off-by: Amos Kong ak...@redhat.com
Cc: sta...@vger.kernel.org
---
V2: reset data_avail (Amit)
adjust unregister order
---
 drivers/char/hw_random/virtio-rng.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 2e3139e..e76433b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -68,6 +68,10 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t 
size, bool wait)
int ret;
struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
+   if (!vi-hwrng_register_done) {
+   return -ENODEV;
+   }
+
if (!vi-busy) {
vi-busy = true;
init_completion(vi-have_data);
@@ -137,10 +141,14 @@ static void remove_common(struct virtio_device *vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-data_avail = 0;
+   complete(vi-have_data);
vdev-config-reset(vdev);
vi-busy = false;
-   if (vi-hwrng_register_done)
+   if (vi-hwrng_register_done) {
+   vi-hwrng_register_done = false;
hwrng_unregister(vi-hwrng);
+   }
vdev-config-del_vqs(vdev);
ida_simple_remove(rng_index_ida, vi-index);
kfree(vi);
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

mutex

2014-09-09 Thread Amos Kong

Hi Amit, Rusty

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1127062
steps:
- Read random data by 'dd if=/dev/hwrng of=/dev/null' in guest
- check sysfs files in the same time, 'cat /sys/class/misc/hw_random/rng_*'

Result: cat process will get stuck, it will return if we kill dd process.

We have some static variables (eg, current_rng, data_avail, etc) in 
hw_random/core.c,
they are protected by rng_mutex. I try to workaround this issue by undelay(100)
after mutex_unlock() in rng_dev_read(). This gives chance for 
hwrng_attr_*_show()
to get mutex.

This patch also contains some cleanup, moving some code out of mutex
protection.

Do you have some suggestion? Thanks.


diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..fa69020 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -194,6 +194,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
}
 
mutex_unlock(rng_mutex);
+   udelay(100);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -233,10 +234,10 @@ static ssize_t hwrng_attr_current_store(struct device 
*dev,
int err;
struct hwrng *rng;
 
+   err = -ENODEV;
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   err = -ENODEV;
list_for_each_entry(rng, rng_list, list) {
if (strcmp(rng-name, buf) == 0) {
if (rng == current_rng) {
@@ -270,8 +271,8 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
return -ERESTARTSYS;
if (current_rng)
name = current_rng-name;
-   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
mutex_unlock(rng_mutex);
+   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
return ret;
 }
@@ -284,19 +285,19 @@ static ssize_t hwrng_attr_available_show(struct device 
*dev,
ssize_t ret = 0;
struct hwrng *rng;
 
+   buf[0] = '\0';
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   buf[0] = '\0';
list_for_each_entry(rng, rng_list, list) {
strncat(buf, rng-name, PAGE_SIZE - ret - 1);
ret += strlen(rng-name);
strncat(buf,  , PAGE_SIZE - ret - 1);
ret++;
}
+   mutex_unlock(rng_mutex);
strncat(buf, \n, PAGE_SIZE - ret - 1);
ret++;
-   mutex_unlock(rng_mutex);
 
return ret;
 }
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

RFC virtio-rng: fail to read sysfs of a busy device

2014-09-09 Thread Amos Kong

(Resend to fix the subject)

Hi Amit, Rusty

RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1127062
steps:
- Read random data by 'dd if=/dev/hwrng of=/dev/null' in guest
- check sysfs files in the same time, 'cat /sys/class/misc/hw_random/rng_*'

Result: cat process will get stuck, it will return if we kill dd process.

We have some static variables (eg, current_rng, data_avail, etc) in 
hw_random/core.c,
they are protected by rng_mutex. I try to workaround this issue by undelay(100)
after mutex_unlock() in rng_dev_read(). This gives chance for 
hwrng_attr_*_show()
to get mutex.

This patch also contains some cleanup, moving some code out of mutex
protection.

Do you have some suggestion? Thanks.


diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aa30a25..fa69020 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -194,6 +194,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user 
*buf,
}
 
mutex_unlock(rng_mutex);
+   udelay(100);
 
if (need_resched())
schedule_timeout_interruptible(1);
@@ -233,10 +234,10 @@ static ssize_t hwrng_attr_current_store(struct device 
*dev,
int err;
struct hwrng *rng;
 
+   err = -ENODEV;
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   err = -ENODEV;
list_for_each_entry(rng, rng_list, list) {
if (strcmp(rng-name, buf) == 0) {
if (rng == current_rng) {
@@ -270,8 +271,8 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
return -ERESTARTSYS;
if (current_rng)
name = current_rng-name;
-   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
mutex_unlock(rng_mutex);
+   ret = snprintf(buf, PAGE_SIZE, %s\n, name);
 
return ret;
 }
@@ -284,19 +285,19 @@ static ssize_t hwrng_attr_available_show(struct device 
*dev,
ssize_t ret = 0;
struct hwrng *rng;
 
+   buf[0] = '\0';
err = mutex_lock_interruptible(rng_mutex);
if (err)
return -ERESTARTSYS;
-   buf[0] = '\0';
list_for_each_entry(rng, rng_list, list) {
strncat(buf, rng-name, PAGE_SIZE - ret - 1);
ret += strlen(rng-name);
strncat(buf,  , PAGE_SIZE - ret - 1);
ret++;
}
+   mutex_unlock(rng_mutex);
strncat(buf, \n, PAGE_SIZE - ret - 1);
ret++;
-   mutex_unlock(rng_mutex);
 
return ret;
 }

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio-rng: complete have_data completion in removing device

2014-09-08 Thread Amos Kong

On Wed, Aug 06, 2014 at 01:55:29PM +0530, Amit Shah wrote:
 On (Wed) 06 Aug 2014 [16:05:41], Amos Kong wrote:
  On Wed, Aug 06, 2014 at 01:35:15AM +0800, Amos Kong wrote:
   When we try to hot-remove a busy virtio-rng device from QEMU monitor,
   the device can't be hot-removed. Because virtio-rng driver hangs at
   wait_for_completion_killable().
   
   This patch fixed the hang by completing have_data completion before
   unregistering a virtio-rng device.
  
  Hi Amit,
  
  Before applying this patch, it's blocking insider 
  wait_for_completion_killable() 
  Applied this patch, wait_for_completion_killable() returns 0,
  and vi-data_avail becomes 0, then rng_get_date() will return 0.
 
 Thanks for checking this.
 
  Is it expected result?
 
 I think what will happen is vi-data_avail will be set to whatever it
 was set last.  In case of a previous successful read request, the
 data_avail will be set to whatever number of bytes the host gave.  On
 doing a hot-unplug on the succeeding wait, the value in data_avail
 will be re-used, and the hwrng core will wrongly take some bytes in
 the buffer as input from the host.
 
 So, I think we need to set vi-data_avail = 0; before calling
 wait_event_completion_killable().
 
   Amit

In my latest debugging, I found the hang is caused by unexpected reading
when we started to remove the device.

I have two draft fix, 1) is skip unexpected reading by checking a
remove flag. 2) is unregistering device at the beginning of
remove_common(). I think second patch is better if it won't cause
new problem.

The original patch (complete in remove_common()) is still necessary.

Test results: hotplug issue disappeared (dd process will quit).


diff --git a/drivers/char/hw_random/virtio-rng.c
b/drivers/char/hw_random/virtio-rng.c
index 2e3139e..028797c 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -35,6 +35,7 @@ struct virtrng_info {
unsigned int data_avail;
int index;
bool busy;
+bool remove;
bool hwrng_register_done;
 };
 
@@ -68,6 +69,9 @@ static int virtio_read(struct hwrng *rng, void *buf,
size_t size, bool wait)
int ret;
struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
+if (vi-remove)
+   return 0;
+
if (!vi-busy) {
vi-busy = true;
init_completion(vi-have_data);
@@ -137,6 +141,8 @@ static void remove_common(struct virtio_device
*vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
+   vi-remove = true;
+   complete(vi-have_data);
vdev-config-reset(vdev);
vi-busy = false;
if (vi-hwrng_register_done)


diff --git a/drivers/char/hw_random/virtio-rng.c
b/drivers/char/hw_random/virtio-rng.c
index 2e3139e..9b8c2ce 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -137,10 +137,11 @@ static void remove_common(struct virtio_device
*vdev)
 {
struct virtrng_info *vi = vdev-priv;
 
-   vdev-config-reset(vdev);
-   vi-busy = false;
if (vi-hwrng_register_done)
hwrng_unregister(vi-hwrng);
+   complete(vi-have_data);
+   vdev-config-reset(vdev);
+   vi-busy = false;
vdev-config-del_vqs(vdev);
ida_simple_remove(rng_index_ida, vi-index);
kfree(vi);

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH RFC] virtio-pci: share config interrupt between virtio devices

2014-09-01 Thread Amos Kong

On Mon, Sep 01, 2014 at 09:37:30AM +0300, Michael S. Tsirkin wrote:


Hi Michael,

 On Mon, Sep 01, 2014 at 01:41:54PM +0800, Amos Kong wrote:
  One VM only has 128 msix interrupt, virtio-config interrupt
  has less workload. This patch shares one normal interrupt

Thanks for your quick reply.

 normal == INT#x? Please don't call it normal. The proper name is
 legacy INT#x.
 
OK

 So you are trying to use legacy INT#x at the same time
 with MSI-X?  This does not work: the PCI spec says:
   While enabled for MSI or MSI-X
   operation, a function is prohibited from using its INTx# pin (if
   implemented) to request
   service (MSI, MSI-X, and INTx# are mutually exclusive).

It means we can't use interrupt and triggered-interrupt together for
one PCI device. I will study this problem.
 
 does the patch work for you? If it does it might be a (minor) spec
 violation in kvm.

I did some basic testing (multiple nics, scp, ping, etc), it works.
 
 Besides, INT#x really leads to terrible performance because
 sharing is forced even if there aren't many devices.
 
 Why do we need INT#x?
 How about setting IRQF_SHARED for the config interrupt
 while using MSI-X?
 You'd have to read ISR to check that the interrupt was
 intended for your device.
 
I have a draft patch to share one MSI-X for all virtio-config, it has
some problem in hotplugging devices. I will continue this way.
 
  for configuration between virtio devices.
  
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/virtio/virtio_pci.c | 41 -
   1 file changed, 16 insertions(+), 25 deletions(-)
  
  diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
  index 3d1463c..b1263b3 100644
  --- a/drivers/virtio/virtio_pci.c
  +++ b/drivers/virtio/virtio_pci.c
  @@ -52,6 +52,7 @@ struct virtio_pci_device
  /* Name strings for interrupts. This size should be enough,
   * and I'm too lazy to allocate each name separately. */
  char (*msix_names)[256];
  +   char config_msix_name[256];
  /* Number of available vectors */
  unsigned msix_vectors;
  /* Vectors allocated, excluding per-vq vectors if any */
  @@ -282,12 +283,6 @@ static void vp_free_vectors(struct virtio_device *vdev)
  free_cpumask_var(vp_dev-msix_affinity_masks[i]);
   
  if (vp_dev-msix_enabled) {
  -   /* Disable the vector used for configuration */
  -   iowrite16(VIRTIO_MSI_NO_VECTOR,
  - vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
  -   /* Flush the write out to device */
  -   ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
  -
  pci_disable_msix(vp_dev-pci_dev);
  vp_dev-msix_enabled = 0;
  }
  @@ -339,24 +334,18 @@ static int vp_request_msix_vectors(struct 
  virtio_device *vdev, int nvectors,
  goto error;
  vp_dev-msix_enabled = 1;
   
  -   /* Set the vector used for configuration */
  -   v = vp_dev-msix_used_vectors;
  -   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
  +   /* Set shared IRQ for configuration */
  +   snprintf(vp_dev-config_msix_name, sizeof(*vp_dev-msix_names),
   %s-config, name);
  -   err = request_irq(vp_dev-msix_entries[v].vector,
  - vp_config_changed, 0, vp_dev-msix_names[v],
  +   err = request_irq(vp_dev-pci_dev-irq,
  + vp_config_changed,
  + IRQF_SHARED,
  + vp_dev-config_msix_name,
vp_dev);
  -   if (err)
  -   goto error;
  -   ++vp_dev-msix_used_vectors;
  -
  -   iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
  -   /* Verify we had enough resources to assign the vector */
  -   v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
  -   if (v == VIRTIO_MSI_NO_VECTOR) {
  -   err = -EBUSY;
  +   if (!err)
  +   vp_dev-intx_enabled = 1;
  +   else
  goto error;
  -   }
   
  if (!per_vq_vectors) {
  /* Shared vector for all VQs */
  @@ -535,14 +524,16 @@ static int vp_try_to_find_vqs(struct virtio_device 
  *vdev, unsigned nvqs,
  goto error_request;
  } else {
  if (per_vq_vectors) {
  -   /* Best option: one for change interrupt, one per vq. */
  -   nvectors = 1;
  +   /* Best option: one normal interrupt for change,
  +  one msix per vq. */
  +   nvectors = 0;
  for (i = 0; i  nvqs; ++i)
  if (callbacks[i])
  ++nvectors;
  } else {
  -   /* Second best: one for change, shared for all vqs. */
  -   nvectors = 2;
  +   /* Second best: one normal interrupt for
  +  change, share one msix for all vqs. */
  +   nvectors = 1

[PATCH RFC] virtio-pci: share config interrupt between virtio devices

2014-08-31 Thread Amos Kong

One VM only has 128 msix interrupt, virtio-config interrupt
has less workload. This patch shares one normal interrupt
for configuration between virtio devices.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/virtio/virtio_pci.c | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1463c..b1263b3 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -52,6 +52,7 @@ struct virtio_pci_device
/* Name strings for interrupts. This size should be enough,
 * and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
+   char config_msix_name[256];
/* Number of available vectors */
unsigned msix_vectors;
/* Vectors allocated, excluding per-vq vectors if any */
@@ -282,12 +283,6 @@ static void vp_free_vectors(struct virtio_device *vdev)
free_cpumask_var(vp_dev-msix_affinity_masks[i]);
 
if (vp_dev-msix_enabled) {
-   /* Disable the vector used for configuration */
-   iowrite16(VIRTIO_MSI_NO_VECTOR,
- vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   /* Flush the write out to device */
-   ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-
pci_disable_msix(vp_dev-pci_dev);
vp_dev-msix_enabled = 0;
}
@@ -339,24 +334,18 @@ static int vp_request_msix_vectors(struct virtio_device 
*vdev, int nvectors,
goto error;
vp_dev-msix_enabled = 1;
 
-   /* Set the vector used for configuration */
-   v = vp_dev-msix_used_vectors;
-   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
+   /* Set shared IRQ for configuration */
+   snprintf(vp_dev-config_msix_name, sizeof(*vp_dev-msix_names),
 %s-config, name);
-   err = request_irq(vp_dev-msix_entries[v].vector,
- vp_config_changed, 0, vp_dev-msix_names[v],
+   err = request_irq(vp_dev-pci_dev-irq,
+ vp_config_changed,
+ IRQF_SHARED,
+ vp_dev-config_msix_name,
  vp_dev);
-   if (err)
-   goto error;
-   ++vp_dev-msix_used_vectors;
-
-   iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   /* Verify we had enough resources to assign the vector */
-   v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   if (v == VIRTIO_MSI_NO_VECTOR) {
-   err = -EBUSY;
+   if (!err)
+   vp_dev-intx_enabled = 1;
+   else
goto error;
-   }
 
if (!per_vq_vectors) {
/* Shared vector for all VQs */
@@ -535,14 +524,16 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, 
unsigned nvqs,
goto error_request;
} else {
if (per_vq_vectors) {
-   /* Best option: one for change interrupt, one per vq. */
-   nvectors = 1;
+   /* Best option: one normal interrupt for change,
+  one msix per vq. */
+   nvectors = 0;
for (i = 0; i  nvqs; ++i)
if (callbacks[i])
++nvectors;
} else {
-   /* Second best: one for change, shared for all vqs. */
-   nvectors = 2;
+   /* Second best: one normal interrupt for
+  change, share one msix for all vqs. */
+   nvectors = 1;
}
 
err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio-rng: complete have_data completion in removing device

2014-08-06 Thread Amos Kong

On Wed, Aug 06, 2014 at 01:35:15AM +0800, Amos Kong wrote:
 When we try to hot-remove a busy virtio-rng device from QEMU monitor,
 the device can't be hot-removed. Because virtio-rng driver hangs at
 wait_for_completion_killable().
 
 This patch fixed the hang by completing have_data completion before
 unregistering a virtio-rng device.

Hi Amit,

Before applying this patch, it's blocking insider 
wait_for_completion_killable() 
Applied this patch, wait_for_completion_killable() returns 0,
and vi-data_avail becomes 0, then rng_get_date() will return 0.

Is it expected result?

 
 Signed-off-by: Amos Kong ak...@redhat.com
 Cc: sta...@vger.kernel.org
 ---
  drivers/char/hw_random/virtio-rng.c | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/drivers/char/hw_random/virtio-rng.c 
 b/drivers/char/hw_random/virtio-rng.c
 index 0027137..416b15c 100644
 --- a/drivers/char/hw_random/virtio-rng.c
 +++ b/drivers/char/hw_random/virtio-rng.c
 @@ -137,6 +137,7 @@ static void remove_common(struct virtio_device *vdev)
   struct virtrng_info *vi = vdev-priv;
  
   vdev-config-reset(vdev);
 + complete(vi-have_data);
   vi-busy = false;
   if (vi-hwrng_register_done)
   hwrng_unregister(vi-hwrng);
 -- 
 1.9.3
 
 ___
 Virtualization mailing list
 Virtualization@lists.linux-foundation.org
 https://lists.linuxfoundation.org/mailman/listinfo/virtualization

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: When I boot two virtio-rng devices, guest will hang

2014-08-05 Thread Amos Kong

3.16 (guest hangs with two rng devices)
3.16 + quick fix (can startup with two rng devices) (hotplug issue 1 + hotplug 
issue 2 exist)
lates torvalds/linux.git + amit 4 patches (can startup with two rng devices) 
(only hotplug issue 2 exists)

However, the 4 patches also fixed the hang issue, the hotplug issue was fixed a 
little.
The hotplug issue is effected by the backend, or maybe it's not a real issue, 
because
the rng device can be hot-removed after dd process is killed.


Hotplug issue 1:
  1. boot up guest with two rng device (rng0 uses /dev/urandom, rng1 uses 
/dev/random)
  2. read data by dd in guest
  3 (option 1). hot-remove rng0, then hot-remove rng1 - result: _only rng1_ 
can't be removed until dd process is killed
  3 (option 2). hot-remove rng1, then hot-remove rng0 - result: two devices 
can be removed successfully, dd process will exit automatically.

  If we use /dev/urandom for rng0 and rng1, _rng0  rng1_ can be removed, dd 
process will exit automatically.

Hotplug issue 2:
  If we use /dev/random for rng0 and rng1, _rng0  rng1_ can't be removed until 
dd process is killed.

Hotplug issue 3:
  If we use /dev/random for rng0 and rng1, _only rng1_ can't be removed until 
dd process is killed.


(The difference between /dev/random and /dev/urandom is the speed.)

Thanks, Amos
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] virtio-rng: complete have_data completion in removing device

2014-08-05 Thread Amos Kong

When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch fixed the hang by completing have_data completion before
unregistering a virtio-rng device.

Signed-off-by: Amos Kong ak...@redhat.com
Cc: sta...@vger.kernel.org
---
 drivers/char/hw_random/virtio-rng.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 0027137..416b15c 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -137,6 +137,7 @@ static void remove_common(struct virtio_device *vdev)
struct virtrng_info *vi = vdev-priv;
 
vdev-config-reset(vdev);
+   complete(vi-have_data);
vi-busy = false;
if (vi-hwrng_register_done)
hwrng_unregister(vi-hwrng);
-- 
1.9.3

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: When I boot two virtio-rng devices, guest will hang

2014-08-05 Thread Amos Kong

On Tue, Aug 05, 2014 at 06:28:54PM +0800, Amos Kong wrote:
 3.16 (guest hangs with two rng devices)
 3.16 + quick fix (can startup with two rng devices) (hotplug issue 1 + 
 hotplug issue 2 exist)
 lates torvalds/linux.git + amit 4 patches (can startup with two rng devices) 
 (only hotplug issue 2 exists)
 
 However, the 4 patches also fixed the hang issue, the hotplug issue was fixed 
 a little.
 The hotplug issue is effected by the backend, or maybe it's not a real issue, 
 because
 the rng device can be hot-removed after dd process is killed.
 
 
 Hotplug issue 1:
   1. boot up guest with two rng device (rng0 uses /dev/urandom, rng1 uses 
 /dev/random)
   2. read data by dd in guest
   3 (option 1). hot-remove rng0, then hot-remove rng1 - result: _only rng1_ 
 can't be removed until dd process is killed
   3 (option 2). hot-remove rng1, then hot-remove rng0 - result: two devices 
 can be removed successfully, dd process will exit automatically.
 
   If we use /dev/urandom for rng0 and rng1, _rng0  rng1_ can be removed, dd 
 process will exit automatically.
 
 Hotplug issue 2:
   If we use /dev/random for rng0 and rng1, _rng0  rng1_ can't be removed 
 until dd process is killed.
 
 Hotplug issue 3:
   If we use /dev/random for rng0 and rng1, _only rng1_ can't be removed until 
 dd process is killed.

Hi Amit,

I finally found the root problem and posted a fix to upstream:
  
http://lists.linuxfoundation.org/pipermail/virtualization/2014-August/027049.html

It can help to fix the hotplug issues on 3.16  latest kernel, so
stable kernel is CCed.

 (The difference between /dev/random and /dev/urandom is the speed.)
 
 Thanks, Amos

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: When I boot two virtio-rng devices, guest will hang

2014-07-28 Thread Amos Kong

On Mon, Jul 28, 2014 at 01:25:14PM +0530, Amit Shah wrote:
 On (Mon) 28 Jul 2014 [15:32:42], Amos Kong wrote:
  QEMU commandline:
  
  ./x86_64-softmmu/qemu-system-x86_64 --enable-kvm   -m 2000 -drive 
  file=/images/nolvm.qcow2 --kernel /home/devel/linux/arch/x86/boot/bzImage 
  -append ro root=/dev/sda1 console=ttyS0,115200  -monitor 
  unix:/tmp/m,nowait,server -device 
  virtio-net-pci,netdev=h0,vectors=17,mq=on,id=n0 -netdev tap,id=h0,queues=8 
  -device virtio-net-pci,netdev=h1,vectors=0,mq=on,id=n1 -netdev 
  tap,id=h1,queues=8  -vnc :0 -mon chardev=qmp,mode=control,pretty=on 
  -chardev socket,id=qmp,host=localhost,port=1234,server,nowait -serial stdio 
  -object rng-random,filename=/dev/urandom,id=rng0   -device 
  virtio-rng-pci,rng=rng0,id=h0 -object 
  rng-random,filename=/dev/urandom,id=rng1   -device 
  virtio-rng-pci,rng=rng1,id=h1
  
  It works when I only add one virtio-rng device. Did you touch this
  problem?
  
  I'm using latest net-next/master (ac3d2e5a9ef2f4d8f57c50070c4883ecb7cec29f)
 
Hi Amit,

 snip
  [0.223503] Non-volatile memory driver v1.3
  [1.172293] tsc: Refined TSC clocksource calibration: 2893.436 MHz
  qemu: terminating on signal 2   -- (I have to cancel QEMU 
  process by Ctrl + C)
 
 This looks similar to what I saw when driver asks for randomness from the host
 before probe is completed.
 
 Does the following patch help?

This patch was already inclued in latest net-next/master
patch commit: e052dbf554610e2104c5a7518c4d8374bed701bb

 While the driver is setup (DRIVER_OK is set), the vqs aren't marked
 usable before the probe for the other device finishes as well.  That's
 not a scenario I considered.  We can try to put this patch in 3.16,
 but it won't be applicable for 3.17, where this is handled the proper
 way.  Also, 3.15 isn't affected, since that doesn't have multi-device
 support.
 
 Also attaching a patch that enables traces in qemu so it's easier to
 see what's going on.

 diff --git a/drivers/char/hw_random/virtio-rng.c
 b/drivers/char/hw_random/virtio-rng.c
 index e9b15bc..124cac5 100644
 --- a/drivers/char/hw_random/virtio-rng.c
 +++ b/drivers/char/hw_random/virtio-rng.c
 @@ -34,12 +34,11 @@ struct virtrng_info {
 unsigned int data_avail;
 struct completion have_data;
 bool busy;
 +   bool probe_done;
 char name[25];
 int index;
  };
  
 -static bool probe_done;
 -
  static void random_recv_done(struct virtqueue *vq)
  {
 struct virtrng_info *vi = vq-vdev-priv;
 @@ -73,7 +72,7 @@ static int virtio_read(struct hwrng *rng, void *buf,
 -size_t size, bool wait)
  * Don't ask host for data till we're setup.  This call can
  * happen during hwrng_register(), after commit d9e7972619.
  */
 -   if (unlikely(!probe_done))
 +   if (unlikely(!vi-probe_done))
 return 0;
  
 if (!vi-busy) {
 @@ -146,7 +145,7 @@ static int probe_common(struct virtio_device
 *vdev)
 return err;
 }
  
 -   probe_done = true;
 +   vi-probe_done = true;
 return 0;
  }
  
 
   Amit

 From ec1aa555b67628beefa0ac6902baa2cc2e156f58 Mon Sep 17 00:00:00 2001
 Message-Id: 
 ec1aa555b67628beefa0ac6902baa2cc2e156f58.1406533638.git.amit.s...@redhat.com
 From: Amit Shah amit.s...@redhat.com
 Date: Mon, 21 Jul 2014 14:46:28 +0530
 Subject: [PATCH 1/1] virtio-rng: add some trace events
 
 Add some trace events to virtio-rng for easier debugging
 
 Signed-off-by: Amit Shah amit.s...@redhat.com
 ---
  hw/virtio/virtio-rng.c | 7 +++
  trace-events   | 5 +
  2 files changed, 12 insertions(+)
 
 diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
 index 7c5a675..4a6472b 100644
 --- a/hw/virtio/virtio-rng.c
 +++ b/hw/virtio/virtio-rng.c
 @@ -16,6 +16,7 @@
  #include hw/virtio/virtio-rng.h
  #include sysemu/rng.h
  #include qom/object_interfaces.h
 +#include trace.h
  
  static bool is_guest_ready(VirtIORNG *vrng)
  {
 @@ -24,6 +25,7 @@ static bool is_guest_ready(VirtIORNG *vrng)
   (vdev-status  VIRTIO_CONFIG_S_DRIVER_OK)) {
  return true;
  }
 +trace_virtio_rng_guest_not_ready(vrng);
  return false;
  }
  
 @@ -62,6 +64,7 @@ static void chr_read(void *opaque, const void *buf, size_t 
 size)
  offset += len;
  
  virtqueue_push(vrng-vq, elem, len);
 +trace_virtio_rng_pushed(vrng, len);
  }
  virtio_notify(vdev, vrng-vq);
  }
 @@ -81,7 +84,11 @@ static void virtio_rng_process(VirtIORNG *vrng)
  quota = MIN((uint64_t)vrng-quota_remaining, (uint64_t)UINT32_MAX);
  }
  size = get_request_size(vrng-vq, quota);
 +
 +trace_virtio_rng_request(vrng, size, quota);
 +
  size = MIN(vrng-quota_remaining, size);
 +
  if (size) {
  rng_backend_request_entropy(vrng-rng, size, chr_read, vrng);
  }
 diff --git a/trace-events b/trace-events
 index 11a17a8..99f39ac 100644
 --- a/trace-events
 +++ b/trace-events
 @@ -41,6

Re: When I boot two virtio-rng devices, guest will hang

2014-07-28 Thread Amos Kong

On Mon, Jul 28, 2014 at 02:42:13PM +0530, Amit Shah wrote:
 On (Mon) 28 Jul 2014 [16:49:20], Amos Kong wrote:
  On Mon, Jul 28, 2014 at 01:25:14PM +0530, Amit Shah wrote:
   On (Mon) 28 Jul 2014 [15:32:42], Amos Kong wrote:
QEMU commandline:

./x86_64-softmmu/qemu-system-x86_64 --enable-kvm   -m 2000 -drive 
file=/images/nolvm.qcow2 --kernel 
/home/devel/linux/arch/x86/boot/bzImage -append ro root=/dev/sda1 
console=ttyS0,115200  -monitor unix:/tmp/m,nowait,server -device 
virtio-net-pci,netdev=h0,vectors=17,mq=on,id=n0 -netdev 
tap,id=h0,queues=8 -device 
virtio-net-pci,netdev=h1,vectors=0,mq=on,id=n1 -netdev 
tap,id=h1,queues=8  -vnc :0 -mon chardev=qmp,mode=control,pretty=on 
-chardev socket,id=qmp,host=localhost,port=1234,server,nowait -serial 
stdio -object rng-random,filename=/dev/urandom,id=rng0   -device 
virtio-rng-pci,rng=rng0,id=h0 -object 
rng-random,filename=/dev/urandom,id=rng1   -device 
virtio-rng-pci,rng=rng1,id=h1

It works when I only add one virtio-rng device. Did you touch this
problem?

I'm using latest net-next/master 
(ac3d2e5a9ef2f4d8f57c50070c4883ecb7cec29f)
   
  Hi Amit,
  
   snip
[0.223503] Non-volatile memory driver v1.3
[1.172293] tsc: Refined TSC clocksource calibration: 2893.436 MHz
qemu: terminating on signal 2   -- (I have to cancel 
QEMU process by Ctrl + C)
   
   This looks similar to what I saw when driver asks for randomness from the 
   host
   before probe is completed.
   
   Does the following patch help?
  
  This patch was already inclued in latest net-next/master
  patch commit: e052dbf554610e2104c5a7518c4d8374bed701bb
 
 No, it's a different one, goes on top of the commit you referenced.
 
Thanks. This patch fixed the problem.

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] virtio-rng: support multiple virtio-rng devices

2014-04-25 Thread Amos Kong

Current hwrng core supports to register multiple hwrng devices,
and there is only one device really works in the same time.
QEMU alsu supports to have multiple virtio-rng backends.

This patch changes virtio-rng driver to support multiple
virtio-rng devices.

]# cat /sys/class/misc/hw_random/rng_available
virtio_rng.0 virtio_rng.1
]# cat /sys/class/misc/hw_random/rng_current
virtio_rng.0
]# echo -n virtio_rng.1  /sys/class/misc/hw_random/rng_current
]# dd if=/dev/hwrng of=/dev/null

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/char/hw_random/virtio-rng.c | 102 ++--
 include/linux/hw_random.h   |   2 +-
 2 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 2ce0e22..12e242b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -25,88 +25,108 @@
 #include linux/virtio_rng.h
 #include linux/module.h
 
-static struct virtqueue *vq;
-static unsigned int data_avail;
-static DECLARE_COMPLETION(have_data);
-static bool busy;
+
+struct virtrng_info {
+   struct virtio_device *vdev;
+   struct hwrng hwrng;
+   struct virtqueue *vq;
+   unsigned int data_avail;
+   struct completion have_data;
+   bool busy;
+};
 
 static void random_recv_done(struct virtqueue *vq)
 {
+   struct virtrng_info *vi = vq-vdev-priv;
+
/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
-   if (!virtqueue_get_buf(vq, data_avail))
+   if (!virtqueue_get_buf(vi-vq, vi-data_avail))
return;
 
-   complete(have_data);
+   complete(vi-have_data);
 }
 
 /* The host will fill any buffer we give it with sweet, sweet randomness. */
-static void register_buffer(u8 *buf, size_t size)
+static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
 {
struct scatterlist sg;
 
sg_init_one(sg, buf, size);
 
/* There should always be room for one buffer. */
-   virtqueue_add_inbuf(vq, sg, 1, buf, GFP_KERNEL);
+   virtqueue_add_inbuf(vi-vq, sg, 1, buf, GFP_KERNEL);
 
-   virtqueue_kick(vq);
+   virtqueue_kick(vi-vq);
 }
 
 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 {
int ret;
+   struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
-   if (!busy) {
-   busy = true;
-   init_completion(have_data);
-   register_buffer(buf, size);
+   if (!vi-busy) {
+   vi-busy = true;
+   init_completion(vi-have_data);
+   register_buffer(vi, buf, size);
}
 
if (!wait)
return 0;
 
-   ret = wait_for_completion_killable(have_data);
+   ret = wait_for_completion_killable(vi-have_data);
if (ret  0)
return ret;
 
-   busy = false;
+   vi-busy = false;
 
-   return data_avail;
+   return vi-data_avail;
 }
 
 static void virtio_cleanup(struct hwrng *rng)
 {
-   if (busy)
-   wait_for_completion(have_data);
-}
-
+   struct virtrng_info *vi = (struct virtrng_info *)rng-priv;
 
-static struct hwrng virtio_hwrng = {
-   .name   = virtio,
-   .cleanup= virtio_cleanup,
-   .read   = virtio_read,
-};
+   if (vi-busy)
+   wait_for_completion(vi-have_data);
+}
 
 static int probe_common(struct virtio_device *vdev)
 {
-   int err;
+   int err, i;
+   struct virtrng_info *vi = NULL;
+
+   vi = kmalloc(sizeof(struct virtrng_info), GFP_KERNEL);
+   vi-hwrng.name = kmalloc(40, GFP_KERNEL);
+   init_completion(vi-have_data);
+
+   vi-hwrng.read = virtio_read;
+   vi-hwrng.cleanup = virtio_cleanup;
+   vi-hwrng.priv = (unsigned long)vi;
+   vdev-priv = vi;
 
-   if (vq) {
-   /* We only support one device for now */
-   return -EBUSY;
-   }
/* We expect a single virtqueue. */
-   vq = virtio_find_single_vq(vdev, random_recv_done, input);
-   if (IS_ERR(vq)) {
-   err = PTR_ERR(vq);
-   vq = NULL;
+   vi-vq = virtio_find_single_vq(vdev, random_recv_done, input);
+   if (IS_ERR(vi-vq)) {
+   err = PTR_ERR(vi-vq);
+   kfree(vi-hwrng.name);
+   vi-vq = NULL;
+   kfree(vi);
+   vi = NULL;
return err;
}
 
-   err = hwrng_register(virtio_hwrng);
+   i = 0;
+   do {
+   sprintf(vi-hwrng.name, virtio_rng.%d, i++);
+   err = hwrng_register(vi-hwrng);
+   } while (err == -EEXIST);
+
if (err) {
vdev-config-del_vqs(vdev);
-   vq = NULL;
+   kfree(vi-hwrng.name);
+   vi-vq = NULL;
+   kfree(vi);
+   vi = NULL;
return err;
}
 
@@ -115,11 +135,15 @@ static int probe_common

Re: RFC: sharing config interrupt between virtio devices for saving MSI

2014-04-22 Thread Amos Kong

On Sat, Apr 19, 2014 at 12:08:15PM +0800, Amos Kong wrote:
 
 Hi all,
 
 I'm working on this item in Upstream Networking todolist:
 
 | - Sharing config interrupts
 |Support more devices by sharing a single msi vector
 |between multiple virtio devices.
 |(Applies to virtio-blk too).
 
 I have this solution here, and only have draft patches of Solution
 1  2, let's discuss if solution 3 is feasible.
 
 * We should not introduce perf regression in this change
 * little effect is acceptable because we are _sharing_ interrupt
 
 Solution 1:
 ==
 share one LSI interrupt for configuration change of all virtio devices.
 Draft patch: share_lsi_for_all_config.patch
 
 Solution 2:
 ==
 share one MSI interrupt for configuration change of all virtio devices.
 Draft patch: share_msix_for_all_config.patch
 
 Solution 3:
 ==
 dynamic adjust interrupt policy when device is added or removed
 Current:
 ---
 - try to allocate a MSI to device's config
 - try to allocate a MSI for each vq
 - share one MSI for all vqs of one device
 - share one LSI for config  vqs of one device
 
 additional dynamic policies:
 ---
 - if there is no enough MSI, adjust interrupt allocation for returning
   some MSI
- try to find a device that allocated one MSI for each vq, change
  it to share one MSI for saving the MSI
- try to share one MSI for config of all virtio_pci devices
- try to share one LSI for config of all devices
 
 - if device is removed, try to re-allocate freed MSI to existed
   devices, if they aren't in best status (one MSI for config, one
   MSI for each vq)
- if config of all devices is sharing one LSI, try to upgrade it to MSI
- if config of all devices is sharing one MSI, try to allocate one
  MSI for configuration change of each device
- try to find a device that is sharing one MSI for all vqs, try to
  allocate one MSI for each vq
 
 
 
 BTW, I saw we still notify all vqs even VIRTIO_PCI_ISR_CONFIG bit of
 isr is set, is it necessary?

[Reply myself]

Quote from Virtio-spec:

| 2.4.3 Dealing With Configuration Changes
| Some virtio PCI devices can change the device configuration state, as 
reflected
| in the virtio header in the PCI configuration space. In this case:
| 
| 1. If MSI-X capability is disabled: an interrupt is delivered and the sec-
| ond highest bit is set in the ISR Status field to indicate that the driver
| should re-examine the configuration space.

| Note that a single interrupt can
| indicate both that one or more virtqueue has been used and that the con-
| figuration space has changed: even if the config bit is set, virtqueues must
| be scanned. 

It seems current code is fine.

 
 diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
 index 101db3f..176aabc 100644
 --- a/drivers/virtio/virtio_pci.c
 +++ b/drivers/virtio/virtio_pci.c
 @@ -259,9 +259,9 @@ static irqreturn_t vp_interrupt(int irq, void
 *opaque)
  
 /* Configuration change?  Tell driver if it wants to know. */
 if (isr  VIRTIO_PCI_ISR_CONFIG)
 -   vp_config_changed(irq, opaque);
 -
 -   return vp_vring_interrupt(irq, opaque);
 +   return vp_config_changed(irq, opaque);
 +   else
 +   return vp_vring_interrupt(irq, opaque);
  }
  
  static void vp_free_vectors(struct virtio_device *vdev)
 
 -- 
   Amos.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] virtio_net: return error when there is no virtqueue or MQ isn't support

2014-04-21 Thread Amos Kong

Currently ethtool returns zero if there is no virtqueue or MQ isn't
support, we should return -ENOTSUPP to notice user.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8a852b5..eaf8266 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1053,7 +1053,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, 
u16 queue_pairs)
struct net_device *dev = vi-dev;
 
if (!vi-has_cvq || !virtio_has_feature(vi-vdev, VIRTIO_NET_F_MQ))
-   return 0;
+   return -ENOTSUPP;
 
s.virtqueue_pairs = queue_pairs;
sg_init_one(sg, s, sizeof(s));
-- 
1.9.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio_net: return error when there is no virtqueue or MQ isn't support

2014-04-21 Thread Amos Kong

On Tue, Apr 22, 2014 at 11:11:50AM +0800, Jason Wang wrote:
 On 04/21/2014 11:11 PM, Amos Kong wrote:
  Currently ethtool returns zero if there is no virtqueue or MQ isn't
  support, we should return -ENOTSUPP to notice user.
 
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/net/virtio_net.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
  diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
  index 8a852b5..eaf8266 100644
  --- a/drivers/net/virtio_net.c
  +++ b/drivers/net/virtio_net.c
  @@ -1053,7 +1053,7 @@ static int virtnet_set_queues(struct virtnet_info 
  *vi, u16 queue_pairs)
  struct net_device *dev = vi-dev;
   
  if (!vi-has_cvq || !virtio_has_feature(vi-vdev, VIRTIO_NET_F_MQ))
  -   return 0;
  +   return -ENOTSUPP;
   
  s.virtqueue_pairs = queue_pairs;
  sg_init_one(sg, s, sizeof(s));
 
 How about check the return value of virtnet_set_queues() in
 virtnet_restore() also?

If we migrate a guest from MQ supported side to MQ non-supported side,
virtnet_restore() will not return error right now.

If we return error when no vq or no mq support, and check return value
of virtnet_set_queues() in virtnet_restore(), migration will fail. 
Is it expected?


-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio_net: return error when there is no virtqueue or MQ isn't support

2014-04-21 Thread Amos Kong

On Tue, Apr 22, 2014 at 11:53:34AM +0800, Jason Wang wrote:
 On 04/22/2014 11:23 AM, Amos Kong wrote:
  On Tue, Apr 22, 2014 at 11:11:50AM +0800, Jason Wang wrote:
  On 04/21/2014 11:11 PM, Amos Kong wrote:
  Currently ethtool returns zero if there is no virtqueue or MQ isn't
  support, we should return -ENOTSUPP to notice user.
 
  Signed-off-by: Amos Kong ak...@redhat.com

NAK this patch.

  ---
   drivers/net/virtio_net.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 
  diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
  index 8a852b5..eaf8266 100644
  --- a/drivers/net/virtio_net.c
  +++ b/drivers/net/virtio_net.c
  @@ -1053,7 +1053,7 @@ static int virtnet_set_queues(struct virtnet_info 
  *vi, u16 queue_pairs)
struct net_device *dev = vi-dev;
   
if (!vi-has_cvq || !virtio_has_feature(vi-vdev, VIRTIO_NET_F_MQ))
  - return 0;
  + return -ENOTSUPP;
   
s.virtqueue_pairs = queue_pairs;
sg_init_one(sg, s, sizeof(s));
  How about check the return value of virtnet_set_queues() in
  virtnet_restore() also?
  If we migrate a guest from MQ supported side to MQ non-supported side,
  virtnet_restore() will not return error right now.
 
  If we return error when no vq or no mq support, and check return value
  of virtnet_set_queues() in virtnet_restore(), migration will fail. 
  Is it expected?
 
 
 
 I think what you mean is restore not migration here. The point is not to
 execute VIRTIO_NET_CTRL_MQ when multiqueue is not supported, so it's the
 caller responsibility to pass a valid queue_pairs to
 virtnet_set_queues(). This patch is not necessary, your last patch is
 fine enough to solve the issue. Do you agree?
 
Yes.

If mq is supported,
  - change queue to 0: virtnet_send_command() will return error
  - change queue to = 1: success

If mq isn't support, current queue is 1.
  - change queue to 1: return 0 at mq checking point, nothing is
   changed, expected.
  * change queue to 0:   we should return -INVAL
(my first patch checked this in virtnet_set_channels())
  * change queue to  1: we should return -INVAL
(we already checked 'queue_pairs  vi-max_queue_pairs' in 
virtnet_set_channels())

The * items are lack in current virtnet_send_command(), but can
already checked it in virtnet_set_channels(), it's enough.

So we can just apply my first patch [1].

[1] http://www.spinics.net/lists/netdev/msg279631.html

 Thanks

Thanks.

BTW, -ENOTSUPP isn't an invalid ERROR for virtio-net.

/* Defined for the NFSv3 protocol */
#define ENOTSUPP524 /* Operation is not supported */

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH] virtio_net: zero is an invald queue_pairs number

2014-04-17 Thread Amos Kong

Execute ethtool -L eth0 combined 0 in guest, if multiqueue
is enabled, virtnet_send_command() will return -EINVAL error,
there is a validation in QEMU.

But if multiqueue is disabled, virtnet_set_queues() will just
return zero (success). We should return error for this situation.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7b68746..8a852b5 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1285,7 +1285,7 @@ static int virtnet_set_channels(struct net_device *dev,
if (channels-rx_count || channels-tx_count || channels-other_count)
return -EINVAL;
 
-   if (queue_pairs  vi-max_queue_pairs)
+   if (queue_pairs  vi-max_queue_pairs || queue_pairs == 0)
return -EINVAL;
 
get_online_cpus();
-- 
1.9.0

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [Qemu-devel] [RFC qemu PATCH] only writing out the last byte of MAC makes it have effect

2013-04-08 Thread Amos Kong

On Mon, Mar 25, 2013 at 08:58:49AM +0200, Michael S. Tsirkin wrote:
 On Mon, Mar 25, 2013 at 10:23:57AM +0800, Amos Kong wrote:
  On Fri, Mar 22, 2013 at 10:45:09AM +1030, Rusty Russell wrote:
   Michael S. Tsirkin m...@redhat.com writes:
On Thu, Mar 21, 2013 at 02:44:50PM +0800, Amos Kong wrote:
The lengcy guests don't have mac programming command, we don't know 
when
it's safe to use MAC. This patch changed qemu to makes MAC change 
effect
when the last byte of MAC is written to config space.

MAC address takes first 6 bytes of config space of virtio-net, the addr
is 5 when the last byte is written in virtio_config_writeb().

MAC change will effect when n-mac is updated in 
virtio_net_set_config().

Signed-off-by: Amos Kong ak...@redhat.com
   
Let's see what Rusty says about the spec change.
   
   Implementation notes like this belong as a footnote, eg:
   
   For older systems, it is recommended and typical that the device
   write byte 5 of the mac address last, so devices can use that as
   a trigger to commit the mac address change.
   
   Now, is this a real, or theoretical issue?  Have we seen this problem in
   practice, or should we continue to ignore it?
  
  Hi Rusty, Michael
  
  I didn't touch any problem. MST, and you?
  
  In Linux guest, we should disable the interface before changing mac address.
   ifconfig eth0 down
   ifconfig eth0 hw ether 10:12:13:14:15:16
   ifconfig eth0 up
   
  In Windows 7 guest, after changing mac address in register table,
  re-enabling interface to make it effect.
   reg add HKLM 
  SYSTEM\CurrentControlSet\Control\Class\{4D36E972-E325-11CE-BFC1-08002BE10318}\0001]
   /v NetworkAddress /d 0123456789AB
   netsh interface set interface Local Area Connection DISABLED
   netsh interface set interface Local Area Connection ENABLED
  
  
  So when we change the mac address, guest os always disable interface
  to receive all packages. It seems the theoretical issue doesn't exist?
  
   Cheers,
   Rusty.
   
  
  -- 
  Amos.
 
 Nope. Looks like no spec change is necessary. We already say it's
 not atomic and it looks like guests expect exactly that
 and disable link to prevent strange issues.

I just found Jiri has a commit to allow changing mac when iface
is running. It seems only the virt nics are allowned, right?

But the leagcy guests don't allow to change mac when interface is
running, so no spec change is necessary.

[upstream kernel]
commit d4fc6918f4b3cc844185f59fc518351525950449
Author: Jiri Pirko jpi...@redhat.com
Date:   Wed Jun 27 05:27:46 2012 +

virtio_net: allow to change mac when iface is running

Signed-off-by: Jiri Pirko jpi...@redhat.com
Signed-off-by: David S. Miller da...@davemloft.net

RH-BZ: https://bugzilla.redhat.com/show_bug.cgi?id=882868

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [Qemu-devel] [RFC qemu PATCH] only writing out the last byte of MAC makes it have effect

2013-03-24 Thread Amos Kong

On Fri, Mar 22, 2013 at 10:45:09AM +1030, Rusty Russell wrote:
 Michael S. Tsirkin m...@redhat.com writes:
  On Thu, Mar 21, 2013 at 02:44:50PM +0800, Amos Kong wrote:
  The lengcy guests don't have mac programming command, we don't know when
  it's safe to use MAC. This patch changed qemu to makes MAC change effect
  when the last byte of MAC is written to config space.
  
  MAC address takes first 6 bytes of config space of virtio-net, the addr
  is 5 when the last byte is written in virtio_config_writeb().
  
  MAC change will effect when n-mac is updated in virtio_net_set_config().
  
  Signed-off-by: Amos Kong ak...@redhat.com
 
  Let's see what Rusty says about the spec change.
 
 Implementation notes like this belong as a footnote, eg:
 
 For older systems, it is recommended and typical that the device
 write byte 5 of the mac address last, so devices can use that as
 a trigger to commit the mac address change.
 
 Now, is this a real, or theoretical issue?  Have we seen this problem in
 practice, or should we continue to ignore it?

Hi Rusty, Michael

I didn't touch any problem. MST, and you?

In Linux guest, we should disable the interface before changing mac address.
 ifconfig eth0 down
 ifconfig eth0 hw ether 10:12:13:14:15:16
 ifconfig eth0 up
 
In Windows 7 guest, after changing mac address in register table,
re-enabling interface to make it effect.
 reg add HKLM 
SYSTEM\CurrentControlSet\Control\Class\{4D36E972-E325-11CE-BFC1-08002BE10318}\0001]
 /v NetworkAddress /d 0123456789AB
 netsh interface set interface Local Area Connection DISABLED
 netsh interface set interface Local Area Connection ENABLED


So when we change the mac address, guest os always disable interface
to receive all packages. It seems the theoretical issue doesn't exist?

 Cheers,
 Rusty.
 

-- 
Amos.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC qemu PATCH] only writing out the last byte of MAC makes it have effect

2013-03-21 Thread Amos Kong

The lengcy guests don't have mac programming command, we don't know when
it's safe to use MAC. This patch changed qemu to makes MAC change effect
when the last byte of MAC is written to config space.

MAC address takes first 6 bytes of config space of virtio-net, the addr
is 5 when the last byte is written in virtio_config_writeb().

MAC change will effect when n-mac is updated in virtio_net_set_config().

Signed-off-by: Amos Kong ak...@redhat.com
---
 hw/virtio.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 26fbc79..2e08302 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -605,8 +605,9 @@ void virtio_config_writeb(VirtIODevice *vdev, uint32_t 
addr, uint32_t data)
 
 stb_p(vdev-config + addr, val);
 
-if (vdev-set_config)
+if (vdev-set_config  (addr == 5 || strcmp(vdev-name, virtio-net))) {
 vdev-set_config(vdev, vdev-config);
+}
 }
 
 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC virt-spec PATCH] only writing out the last byte of MAC makes it have effect

2013-03-20 Thread Amos Kong

The lengcy guests don't have mac programming command, we don't know when
it's safe to use MAC. We can change QEMU to make MAC change effect when
the last byte of MAC is written to config space.

Signed-off-by: Amos Kong ak...@redhat.com
---
 virtio-spec.lyx | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/virtio-spec.lyx b/virtio-spec.lyx
index dbc4ef0..bb289fb 100644
--- a/virtio-spec.lyx
+++ b/virtio-spec.lyx
@@ -5430,7 +5430,7 @@ T_CTRL_MAC_TABLE_SET.
 
 \begin_layout Standard
 
-\change_inserted -1930653948 1358506710
+\change_inserted -1930653948 1363832689
 The config space 
 \begin_inset Quotes eld
 \end_inset
@@ -5464,6 +5464,15 @@ mac
  Therefore, VIRTIO_NET_CTRL_MAC_ADDR_SET is preferred, especially while
  the NIC is up.
  The command-specific-data is a 6-byte MAC address.
+\end_layout
+
+\begin_layout Standard
+
+\change_inserted -1930653948 1363833477
+The legacy guests don't support the new command, they still change MAC address
+ in original way, that's not atomic.
+ For more robust, QEMU only makes the MAC change effect when the last byte
+ of MAC address is written to config space.
 \change_unchanged
 
 \end_layout
-- 
1.8.1.4

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [QEMU PATCH v4 2/3] virtio-net: introduce a new macaddr control

2013-01-22 Thread Amos Kong

On Mon, Jan 21, 2013 at 05:08:26PM +0100, Stefan Hajnoczi wrote:
 On Sat, Jan 19, 2013 at 09:54:27AM +0800, ak...@redhat.com wrote:
  @@ -350,6 +351,18 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t 
  cmd,
   struct virtio_net_ctrl_mac mac_data;
   size_t s;
   
  +if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
  +if (iov_size(iov, iov_cnt) != ETH_ALEN) {
  +return VIRTIO_NET_ERR;
  +}
  +s = iov_to_buf(iov, iov_cnt, 0, n-mac, sizeof(n-mac));
  +if (s != sizeof(n-mac)) {
  +return VIRTIO_NET_ERR;
  +}

 
 Since iov_size() was checked before iov_to_buf(), we never hit this
 error.  And if we did n-mac would be trashed (i.e. error handling is
 not complete).

You are right.
iov_size() computes the size by accounting iov[].iov_lens, the first
check is enough.
 
 I think assert(s == sizeof(n-mac)) is more appropriate appropriate.
 Also, please change ETH_ALEN to sizeof(n-mac) to make the relationship
 between the check and the copy clear.
 

Will update this patch.

 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
 if (iov_size(iov, iov_cnt) != sizeof(n-mac)) {
 return VIRTIO_NET_ERR;
 }
 s = iov_to_buf(iov, iov_cnt, 0, n-mac, sizeof(n-mac));
 assert(s == sizeof(n-mac));
 qemu_format_nic_info_str(n-nic-nc, n-mac);
 return VIRTIO_NET_OK;
 }

 Stefan
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [QEMU PATCH v4 1/3] virtio-net: remove layout assumptions for ctrl vq

2013-01-22 Thread Amos Kong

On Mon, Jan 21, 2013 at 05:03:30PM +0100, Stefan Hajnoczi wrote:
 On Sat, Jan 19, 2013 at 09:54:26AM +0800, ak...@redhat.com wrote:
  From: Michael S. Tsirkin m...@redhat.com
  
  Virtio-net code makes assumption about virtqueue descriptor layout
  (e.g. sg[0] is the header, sg[1] is the data buffer).
  
  This patch makes code not rely on the layout of descriptors.
  
  Signed-off-by: Michael S. Tsirkin m...@redhat.com
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   hw/virtio-net.c | 128 
  
   1 file changed, 74 insertions(+), 54 deletions(-)
  
  diff --git a/hw/virtio-net.c b/hw/virtio-net.c
  index 3bb01b1..113e194 100644
  --- a/hw/virtio-net.c
  +++ b/hw/virtio-net.c
  @@ -315,44 +315,44 @@ static void virtio_net_set_features(VirtIODevice 
  *vdev, uint32_t features)
   }
   
   static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
  - VirtQueueElement *elem)
  + struct iovec *iov, unsigned int 
  iov_cnt)
   {
   uint8_t on;
  +size_t s;
   
  -if (elem-out_num != 2 || elem-out_sg[1].iov_len != sizeof(on)) {
  -error_report(virtio-net ctrl invalid rx mode command);
  -exit(1);
  +s = iov_to_buf(iov, iov_cnt, 0, on, sizeof(on));
  +if (s != sizeof(on)) {
  +return VIRTIO_NET_ERR;
   }
   
  -on = ldub_p(elem-out_sg[1].iov_base);
  -
  -if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
  +if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC) {
   n-promisc = on;
  -else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
  +} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI) {
   n-allmulti = on;
  -else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
  +} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI) {
   n-alluni = on;
  -else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
  +} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI) {
   n-nomulti = on;
  -else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
  +} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI) {
   n-nouni = on;
  -else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
  +} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST) {
   n-nobcast = on;
  -else
  +} else {
   return VIRTIO_NET_ERR;
  +}
   
   return VIRTIO_NET_OK;
   }
   
   static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
  - VirtQueueElement *elem)
  + struct iovec *iov, unsigned int iov_cnt)
   {
   struct virtio_net_ctrl_mac mac_data;
  +size_t s;
   
  -if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem-out_num != 3 ||
  -elem-out_sg[1].iov_len  sizeof(mac_data) ||
  -elem-out_sg[2].iov_len  sizeof(mac_data))
  +if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
   return VIRTIO_NET_ERR;
  +}
   
   n-mac_table.in_use = 0;
   n-mac_table.first_multi = 0;
  @@ -360,54 +360,71 @@ static int virtio_net_handle_mac(VirtIONet *n, 
  uint8_t cmd,
   n-mac_table.multi_overflow = 0;
   memset(n-mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
   
  -mac_data.entries = ldl_p(elem-out_sg[1].iov_base);
  +s = iov_to_buf(iov, iov_cnt, 0, mac_data.entries,
  +   sizeof(mac_data.entries));

Hi Stefan, can we adjust the endianness after each iov_to_buf() copy?


diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 72d7857..0088d6c 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -321,6 +321,7 @@ static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t 
cmd,
 size_t s;
 
 s = iov_to_buf(iov, iov_cnt, 0, on, sizeof(on));
+on = ldub_p(on);
 if (s != sizeof(on)) {
 return VIRTIO_NET_ERR;
 }
@@ -362,7 +363,7 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 
 s = iov_to_buf(iov, iov_cnt, 0, mac_data.entries,
sizeof(mac_data.entries));
-
+mac_data.entries = ldl_p(mac_data.entries);
 if (s != sizeof(mac_data.entries)) {
 return VIRTIO_NET_ERR;
 }
@@ -389,7 +390,7 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 
 s = iov_to_buf(iov, iov_cnt, 0, mac_data.entries,
sizeof(mac_data.entries));
-
+mac_data.entries = ldl_p(mac_data.entries);
 if (s != sizeof(mac_data.entries)) {
 return VIRTIO_NET_ERR;
 }
@@ -421,6 +422,7 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, 
uint8_t cmd,
 size_t s;
 
 s = iov_to_buf(iov, iov_cnt, 0, vid, sizeof(vid));
+vid = lduw_p(vid);
 if (s != sizeof(vid)) {
 return VIRTIO_NET_ERR;
 }
@@ -458,6 +460,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, 
VirtQueue *vq)
 iov = elem.out_sg;
 iov_cnt = elem.out_num;
 s = iov_to_buf(iov, iov_cnt, 0, ctrl, sizeof(ctrl));
+ctrl.class = ldub_p(ctrl.class);
+ctrl.cmd = ldub_p

[QEMU PATCH v5 0/3] virtio-net: fix of ctrl commands

2013-01-22 Thread Amos Kong

Currently virtio-net code relys on the layout of descriptor,
this patchset removed the assumptions and introduced a control
command to set mac address. Last patch is a trivial renaming.

V2: check guest's iov_len
V3: fix of migration compatibility
make mac field in config space read-only when new feature is acked
V4: add fix of descriptor layout assumptions, trivial rename
V5: fix endianness after iov_to_buf copy

Amos Kong (2):
  virtio-net: introduce a new macaddr control
  virtio-net: rename ctrl rx commands

Michael S. Tsirkin (1):
  virtio-net: remove layout assumptions for ctrl vq

 hw/pc_piix.c|4 ++
 hw/virtio-net.c |  142 +-
 hw/virtio-net.h |   26 +++
 3 files changed, 108 insertions(+), 64 deletions(-)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[QEMU PATCH v5 1/3] virtio-net: remove layout assumptions for ctrl vq

2013-01-22 Thread Amos Kong

From: Michael S. Tsirkin m...@redhat.com

Virtio-net code makes assumption about virtqueue descriptor layout
(e.g. sg[0] is the header, sg[1] is the data buffer).

This patch makes code not rely on the layout of descriptors.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Amos Kong ak...@redhat.com
---
 hw/virtio-net.c |  129 ---
 1 files changed, 75 insertions(+), 54 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 3bb01b1..af1f3a1 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -315,44 +315,44 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint32_t features)
 }
 
 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
- VirtQueueElement *elem)
+ struct iovec *iov, unsigned int iov_cnt)
 {
 uint8_t on;
+size_t s;
 
-if (elem-out_num != 2 || elem-out_sg[1].iov_len != sizeof(on)) {
-error_report(virtio-net ctrl invalid rx mode command);
-exit(1);
+s = iov_to_buf(iov, iov_cnt, 0, on, sizeof(on));
+if (s != sizeof(on)) {
+return VIRTIO_NET_ERR;
 }
 
-on = ldub_p(elem-out_sg[1].iov_base);
-
-if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
+if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC) {
 n-promisc = on;
-else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
+} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI) {
 n-allmulti = on;
-else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
+} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI) {
 n-alluni = on;
-else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
+} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI) {
 n-nomulti = on;
-else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
+} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI) {
 n-nouni = on;
-else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
+} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST) {
 n-nobcast = on;
-else
+} else {
 return VIRTIO_NET_ERR;
+}
 
 return VIRTIO_NET_OK;
 }
 
 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
- VirtQueueElement *elem)
+ struct iovec *iov, unsigned int iov_cnt)
 {
 struct virtio_net_ctrl_mac mac_data;
+size_t s;
 
-if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem-out_num != 3 ||
-elem-out_sg[1].iov_len  sizeof(mac_data) ||
-elem-out_sg[2].iov_len  sizeof(mac_data))
+if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
 return VIRTIO_NET_ERR;
+}
 
 n-mac_table.in_use = 0;
 n-mac_table.first_multi = 0;
@@ -360,54 +360,72 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t 
cmd,
 n-mac_table.multi_overflow = 0;
 memset(n-mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 
-mac_data.entries = ldl_p(elem-out_sg[1].iov_base);
+s = iov_to_buf(iov, iov_cnt, 0, mac_data.entries,
+   sizeof(mac_data.entries));
+mac_data.entries = ldl_p(mac_data.entries);
+if (s != sizeof(mac_data.entries)) {
+return VIRTIO_NET_ERR;
+}
+iov_discard_front(iov, iov_cnt, s);
 
-if (sizeof(mac_data.entries) +
-(mac_data.entries * ETH_ALEN)  elem-out_sg[1].iov_len)
+if (mac_data.entries * ETH_ALEN  iov_size(iov, iov_cnt)) {
 return VIRTIO_NET_ERR;
+}
 
 if (mac_data.entries = MAC_TABLE_ENTRIES) {
-memcpy(n-mac_table.macs, elem-out_sg[1].iov_base + sizeof(mac_data),
-   mac_data.entries * ETH_ALEN);
+s = iov_to_buf(iov, iov_cnt, 0, n-mac_table.macs,
+   mac_data.entries * ETH_ALEN);
+if (s != mac_data.entries * ETH_ALEN) {
+return VIRTIO_NET_ERR;
+}
 n-mac_table.in_use += mac_data.entries;
 } else {
 n-mac_table.uni_overflow = 1;
 }
 
+iov_discard_front(iov, iov_cnt, mac_data.entries * ETH_ALEN);
+
 n-mac_table.first_multi = n-mac_table.in_use;
 
-mac_data.entries = ldl_p(elem-out_sg[2].iov_base);
+s = iov_to_buf(iov, iov_cnt, 0, mac_data.entries,
+   sizeof(mac_data.entries));
+mac_data.entries = ldl_p(mac_data.entries);
+if (s != sizeof(mac_data.entries)) {
+return VIRTIO_NET_ERR;
+}
+
+iov_discard_front(iov, iov_cnt, s);
 
-if (sizeof(mac_data.entries) +
-(mac_data.entries * ETH_ALEN)  elem-out_sg[2].iov_len)
+if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
 return VIRTIO_NET_ERR;
+}
 
-if (mac_data.entries) {
-if (n-mac_table.in_use + mac_data.entries = MAC_TABLE_ENTRIES) {
-memcpy(n-mac_table.macs + (n-mac_table.in_use * ETH_ALEN),
-   elem-out_sg[2].iov_base + sizeof(mac_data),
-   mac_data.entries * ETH_ALEN);
-n-mac_table.in_use += mac_data.entries;
-} else {
-n

[QEMU PATCH v5 2/3] virtio-net: introduce a new macaddr control

2013-01-22 Thread Amos Kong

In virtio-net guest driver, currently we write MAC address to
pci config space byte by byte, this means that we have an
intermediate step where mac is wrong. This patch introduced
a new control command to set MAC address, it's atomic.

VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility.

mac field will be set to read-only when VIRTIO_NET_F_CTRL_MAC_ADDR
is acked.

Signed-off-by: Amos Kong ak...@redhat.com
---
 hw/pc_piix.c|4 
 hw/virtio-net.c |   13 -
 hw/virtio-net.h |   12 ++--
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 0a6923d..6218350 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -297,6 +297,10 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
 .driver   = usb-tablet,\
 .property = usb_version,\
 .value= stringify(1),\
+},{\
+.driver   = virtio-net-pci,\
+.property = ctrl_mac_addr,\
+.value= off,  \
 }
 
 static QEMUMachine pc_machine_v1_3 = {
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index af1f3a1..acef5a5 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -93,7 +93,8 @@ static void virtio_net_set_config(VirtIODevice *vdev, const 
uint8_t *config)
 
 memcpy(netcfg, config, sizeof(netcfg));
 
-if (memcmp(netcfg.mac, n-mac, ETH_ALEN)) {
+if (!(n-vdev.guest_features  VIRTIO_NET_F_CTRL_MAC_ADDR  1) 
+memcmp(netcfg.mac, n-mac, ETH_ALEN)) {
 memcpy(n-mac, netcfg.mac, ETH_ALEN);
 qemu_format_nic_info_str(n-nic-nc, n-mac);
 }
@@ -350,6 +351,16 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
 struct virtio_net_ctrl_mac mac_data;
 size_t s;
 
+if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
+if (iov_size(iov, iov_cnt) != sizeof(n-mac)) {
+return VIRTIO_NET_ERR;
+}
+s = iov_to_buf(iov, iov_cnt, 0, n-mac, sizeof(n-mac));
+assert(s == sizeof(n-mac));
+qemu_format_nic_info_str(n-nic-nc, n-mac);
+return VIRTIO_NET_OK;
+}
+
 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
 return VIRTIO_NET_ERR;
 }
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index d46fb98..1ec632f 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -44,6 +44,8 @@
 #define VIRTIO_NET_F_CTRL_VLAN  19  /* Control channel VLAN filtering */
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20   /* Extra RX mode control support */
 
+#define VIRTIO_NET_F_CTRL_MAC_ADDR   23 /* Set MAC address */
+
 #define VIRTIO_NET_S_LINK_UP1   /* Link is up */
 
 #define TX_TIMER_INTERVAL 15 /* 150 us */
@@ -106,7 +108,7 @@ typedef uint8_t virtio_net_ctrl_ack;
  #define VIRTIO_NET_CTRL_RX_MODE_NOBCAST  5
 
 /*
- * Control the MAC filter table.
+ * Control the MAC
  *
  * The MAC filter table is managed by the hypervisor, the guest should
  * assume the size is infinite.  Filtering should be considered
@@ -119,6 +121,10 @@ typedef uint8_t virtio_net_ctrl_ack;
  * first sg list contains unicast addresses, the second is for multicast.
  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
  * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
  */
 struct virtio_net_ctrl_mac {
 uint32_t entries;
@@ -126,6 +132,7 @@ struct virtio_net_ctrl_mac {
 };
 #define VIRTIO_NET_CTRL_MAC1
  #define VIRTIO_NET_CTRL_MAC_TABLE_SET0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1
 
 /*
  * Control VLAN filtering
@@ -158,5 +165,6 @@ struct virtio_net_ctrl_mac {
 DEFINE_PROP_BIT(ctrl_vq, _state, _field, VIRTIO_NET_F_CTRL_VQ, 
true), \
 DEFINE_PROP_BIT(ctrl_rx, _state, _field, VIRTIO_NET_F_CTRL_RX, 
true), \
 DEFINE_PROP_BIT(ctrl_vlan, _state, _field, VIRTIO_NET_F_CTRL_VLAN, 
true), \
-DEFINE_PROP_BIT(ctrl_rx_extra, _state, _field, 
VIRTIO_NET_F_CTRL_RX_EXTRA, true)
+DEFINE_PROP_BIT(ctrl_rx_extra, _state, _field, 
VIRTIO_NET_F_CTRL_RX_EXTRA, true), \
+DEFINE_PROP_BIT(ctrl_mac_addr, _state, _field, 
VIRTIO_NET_F_CTRL_MAC_ADDR, true)
 #endif
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[QEMU PATCH v5 3/3] virtio-net: rename ctrl rx commands

2013-01-22 Thread Amos Kong

This patch makes rx commands consistent with specification.

Signed-off-by: Amos Kong ak...@redhat.com
---
 hw/virtio-net.c |   14 +++---
 hw/virtio-net.h |   14 +++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index acef5a5..ac4434e 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -326,17 +326,17 @@ static int virtio_net_handle_rx_mode(VirtIONet *n, 
uint8_t cmd,
 return VIRTIO_NET_ERR;
 }
 
-if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC) {
+if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 n-promisc = on;
-} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI) {
+} else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 n-allmulti = on;
-} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI) {
+} else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 n-alluni = on;
-} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI) {
+} else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 n-nomulti = on;
-} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI) {
+} else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 n-nouni = on;
-} else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST) {
+} else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 n-nobcast = on;
 } else {
 return VIRTIO_NET_ERR;
@@ -473,7 +473,7 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, 
VirtQueue *vq)
 iov_discard_front(iov, iov_cnt, sizeof(ctrl));
 if (s != sizeof(ctrl)) {
 status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE) {
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
diff --git a/hw/virtio-net.h b/hw/virtio-net.h
index 1ec632f..c0bb284 100644
--- a/hw/virtio-net.h
+++ b/hw/virtio-net.h
@@ -99,13 +99,13 @@ typedef uint8_t virtio_net_ctrl_ack;
  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
  */
-#define VIRTIO_NET_CTRL_RX_MODE0
- #define VIRTIO_NET_CTRL_RX_MODE_PROMISC  0
- #define VIRTIO_NET_CTRL_RX_MODE_ALLMULTI 1
- #define VIRTIO_NET_CTRL_RX_MODE_ALLUNI   2
- #define VIRTIO_NET_CTRL_RX_MODE_NOMULTI  3
- #define VIRTIO_NET_CTRL_RX_MODE_NOUNI4
- #define VIRTIO_NET_CTRL_RX_MODE_NOBCAST  5
+#define VIRTIO_NET_CTRL_RX0
+ #define VIRTIO_NET_CTRL_RX_PROMISC  0
+ #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
+ #define VIRTIO_NET_CTRL_RX_ALLUNI   2
+ #define VIRTIO_NET_CTRL_RX_NOMULTI  3
+ #define VIRTIO_NET_CTRL_RX_NOUNI4
+ #define VIRTIO_NET_CTRL_RX_NOBCAST  5
 
 /*
  * Control the MAC
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 0/3] make mac programming for virtio net more robust

2013-01-21 Thread Amos Kong

Currenly mac is programmed byte by byte. This means that we
have an intermediate step where mac is wrong. 

Third patch introduced a new vq control command to set mac
address, it's atomic.

V2: check return of sending command, delay eth_mac_addr()
V3: restore software address when fail to set hardware address
V4: split eth_mac_addr, fix error handle
V5: rebase patches to net-next tree

Amos Kong (2):
  move virtnet_send_command() above virtnet_set_mac_address()
  virtio-net: introduce a new control to set macaddr

Stefan Hajnoczi (1):
  net: split eth_mac_addr for better error handling

 drivers/net/virtio_net.c|  110 ++-
 include/linux/etherdevice.h |2 +
 include/uapi/linux/virtio_net.h |8 +++-
 net/ethernet/eth.c  |   41 --
 4 files changed, 106 insertions(+), 55 deletions(-)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 1/3] move virtnet_send_command() above virtnet_set_mac_address()

2013-01-21 Thread Amos Kong

We want to send vq command to set mac address in
virtnet_set_mac_address(), so do this function moving.
Fixed a little issue of coding style.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/net/virtio_net.c |   89 ++---
 1 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a6fcf15..395ab4f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -753,6 +753,50 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct 
net_device *dev)
return NETDEV_TX_OK;
 }
 
+/*
+ * Send command via the control virtqueue and check status.  Commands
+ * supported by the hypervisor, as indicated by feature bits, should
+ * never fail unless improperly formated.
+ */
+static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
+struct scatterlist *data, int out, int in)
+{
+   struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
+   struct virtio_net_ctrl_hdr ctrl;
+   virtio_net_ctrl_ack status = ~0;
+   unsigned int tmp;
+   int i;
+
+   /* Caller should know better */
+   BUG_ON(!virtio_has_feature(vi-vdev, VIRTIO_NET_F_CTRL_VQ) ||
+   (out + in  VIRTNET_SEND_COMMAND_SG_MAX));
+
+   out++; /* Add header */
+   in++; /* Add return status */
+
+   ctrl.class = class;
+   ctrl.cmd = cmd;
+
+   sg_init_table(sg, out + in);
+
+   sg_set_buf(sg[0], ctrl, sizeof(ctrl));
+   for_each_sg(data, s, out + in - 2, i)
+   sg_set_buf(sg[i + 1], sg_virt(s), s-length);
+   sg_set_buf(sg[out + in - 1], status, sizeof(status));
+
+   BUG_ON(virtqueue_add_buf(vi-cvq, sg, out, in, vi, GFP_ATOMIC)  0);
+
+   virtqueue_kick(vi-cvq);
+
+   /* Spin for a response, the kick causes an ioport write, trapping
+* into the hypervisor, so the request should be handled immediately.
+*/
+   while (!virtqueue_get_buf(vi-cvq, tmp))
+   cpu_relax();
+
+   return status == VIRTIO_NET_OK;
+}
+
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
 {
struct virtnet_info *vi = netdev_priv(dev);
@@ -819,51 +863,6 @@ static void virtnet_netpoll(struct net_device *dev)
 }
 #endif
 
-/*
- * Send command via the control virtqueue and check status.  Commands
- * supported by the hypervisor, as indicated by feature bits, should
- * never fail unless improperly formated.
- */
-static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
-struct scatterlist *data, int out, int in)
-{
-   struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
-   struct virtio_net_ctrl_hdr ctrl;
-   virtio_net_ctrl_ack status = ~0;
-   unsigned int tmp;
-   int i;
-
-   /* Caller should know better */
-   BUG_ON(!virtio_has_feature(vi-vdev, VIRTIO_NET_F_CTRL_VQ) ||
-   (out + in  VIRTNET_SEND_COMMAND_SG_MAX));
-
-   out++; /* Add header */
-   in++; /* Add return status */
-
-   ctrl.class = class;
-   ctrl.cmd = cmd;
-
-   sg_init_table(sg, out + in);
-
-   sg_set_buf(sg[0], ctrl, sizeof(ctrl));
-   for_each_sg(data, s, out + in - 2, i)
-   sg_set_buf(sg[i + 1], sg_virt(s), s-length);
-   sg_set_buf(sg[out + in - 1], status, sizeof(status));
-
-   BUG_ON(virtqueue_add_buf(vi-cvq, sg, out, in, vi, GFP_ATOMIC)  0);
-
-   virtqueue_kick(vi-cvq);
-
-   /*
-* Spin for a response, the kick causes an ioport write, trapping
-* into the hypervisor, so the request should be handled immediately.
-*/
-   while (!virtqueue_get_buf(vi-cvq, tmp))
-   cpu_relax();
-
-   return status == VIRTIO_NET_OK;
-}
-
 static void virtnet_ack_link_announce(struct virtnet_info *vi)
 {
rtnl_lock();
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 2/3] net: split eth_mac_addr for better error handling

2013-01-21 Thread Amos Kong

From: Stefan Hajnoczi stefa...@gmail.com

When we set mac address, software mac address in system and hardware mac
address all need to be updated. Current eth_mac_addr() doesn't allow
callers to implement error handling nicely.

This patch split eth_mac_addr() to prepare part and real commit part,
then we can prepare first, and try to change hardware address, then do
the real commit if hardware address is set successfully.

Signed-off-by: Stefan Hajnoczi stefa...@gmail.com
Signed-off-by: Amos Kong ak...@redhat.com
---
 include/linux/etherdevice.h |2 ++
 net/ethernet/eth.c  |   41 +++--
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1a43e1b..c623861 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -40,6 +40,8 @@ extern int eth_header_cache(const struct neighbour *neigh, 
struct hh_cache *hh,
 extern void eth_header_cache_update(struct hh_cache *hh,
const struct net_device *dev,
const unsigned char *haddr);
+extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
 extern int eth_mac_addr(struct net_device *dev, void *p);
 extern int eth_change_mtu(struct net_device *dev, int new_mtu);
 extern int eth_validate_addr(struct net_device *dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index bc39c8c..a36c85e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -272,6 +272,36 @@ void eth_header_cache_update(struct hh_cache *hh,
 EXPORT_SYMBOL(eth_header_cache_update);
 
 /**
+ * eth_prepare_mac_addr_change - prepare for mac change
+ * @dev: network device
+ * @p: socket address
+ */
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
+{
+   struct sockaddr *addr = p;
+
+   if (!(dev-priv_flags  IFF_LIVE_ADDR_CHANGE)  netif_running(dev))
+   return -EBUSY;
+   if (!is_valid_ether_addr(addr-sa_data))
+   return -EADDRNOTAVAIL;
+   return 0;
+}
+EXPORT_SYMBOL(eth_prepare_mac_addr_change);
+
+/**
+ * eth_commit_mac_addr_change - commit mac change
+ * @dev: network device
+ * @p: socket address
+ */
+void eth_commit_mac_addr_change(struct net_device *dev, void *p)
+{
+   struct sockaddr *addr = p;
+
+   memcpy(dev-dev_addr, addr-sa_data, ETH_ALEN);
+}
+EXPORT_SYMBOL(eth_commit_mac_addr_change);
+
+/**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
  * @p: socket address
@@ -283,13 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update);
  */
 int eth_mac_addr(struct net_device *dev, void *p)
 {
-   struct sockaddr *addr = p;
+   int ret;
 
-   if (!(dev-priv_flags  IFF_LIVE_ADDR_CHANGE)  netif_running(dev))
-   return -EBUSY;
-   if (!is_valid_ether_addr(addr-sa_data))
-   return -EADDRNOTAVAIL;
-   memcpy(dev-dev_addr, addr-sa_data, ETH_ALEN);
+   ret = eth_prepare_mac_addr_change(dev, p);
+   if (ret  0)
+   return ret;
+   eth_commit_mac_addr_change(dev, p);
return 0;
 }
 EXPORT_SYMBOL(eth_mac_addr);
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v5 3/3] virtio-net: introduce a new control to set macaddr

2013-01-21 Thread Amos Kong

Currently we write MAC address to pci config space byte by byte,
this means that we have an intermediate step where mac is wrong.
This patch introduced a new control command to set MAC address,
it's atomic.

VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility.

Signed-off-by: Amos Kong ak...@redhat.com
---
 drivers/net/virtio_net.c|   21 ++---
 include/uapi/linux/virtio_net.h |8 +++-
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 395ab4f..701408a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -802,14 +802,28 @@ static int virtnet_set_mac_address(struct net_device 
*dev, void *p)
struct virtnet_info *vi = netdev_priv(dev);
struct virtio_device *vdev = vi-vdev;
int ret;
+   struct sockaddr *addr = p;
+   struct scatterlist sg;
 
-   ret = eth_mac_addr(dev, p);
+   ret = eth_prepare_mac_addr_change(dev, p);
if (ret)
return ret;
 
-   if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+   sg_init_one(sg, addr-sa_data, dev-addr_len);
+   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
+ VIRTIO_NET_CTRL_MAC_ADDR_SET,
+ sg, 1, 0)) {
+   dev_warn(vdev-dev,
+Failed to set mac address by vq command.\n);
+   return -EINVAL;
+   }
+   } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
vdev-config-set(vdev, offsetof(struct virtio_net_config, mac),
- dev-dev_addr, dev-addr_len);
+ addr-sa_data, dev-addr_len);
+   }
+
+   eth_commit_mac_addr_change(dev, p);
 
return 0;
 }
@@ -1627,6 +1641,7 @@ static unsigned int features[] = {
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
+   VIRTIO_NET_F_CTRL_MAC_ADDR,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 848e358..a5a8c88 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -53,6 +53,7 @@
 * network */
 #define VIRTIO_NET_F_MQ22  /* Device supports Receive Flow
 * Steering */
+#define VIRTIO_NET_F_CTRL_MAC_ADDR 23  /* Set MAC address */
 
 #define VIRTIO_NET_S_LINK_UP   1   /* Link is up */
 #define VIRTIO_NET_S_ANNOUNCE  2   /* Announcement is needed */
@@ -127,7 +128,7 @@ typedef __u8 virtio_net_ctrl_ack;
  #define VIRTIO_NET_CTRL_RX_NOBCAST  5
 
 /*
- * Control the MAC filter table.
+ * Control the MAC
  *
  * The MAC filter table is managed by the hypervisor, the guest should
  * assume the size is infinite.  Filtering should be considered
@@ -140,6 +141,10 @@ typedef __u8 virtio_net_ctrl_ack;
  * first sg list contains unicast addresses, the second is for multicast.
  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
  * is available.
+ *
+ * The ADDR_SET command requests one out scatterlist, it contains a
+ * 6 bytes MAC address. This functionality is present if the
+ * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
  */
 struct virtio_net_ctrl_mac {
__u32 entries;
@@ -148,6 +153,7 @@ struct virtio_net_ctrl_mac {
 
 #define VIRTIO_NET_CTRL_MAC1
  #define VIRTIO_NET_CTRL_MAC_TABLE_SET0
+ #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1
 
 /*
  * Control VLAN filtering
-- 
1.7.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 2/2] virtio-net: introduce a new control to set macaddr

2013-01-19 Thread Amos Kong

On Fri, Jan 18, 2013 at 12:00:42PM +0100, Stefan Hajnoczi wrote:
 On Thu, Jan 17, 2013 at 06:40:12PM +0800, ak...@redhat.com wrote:
  diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
  index 395ab4f..837c978 100644
  --- a/drivers/net/virtio_net.c
  +++ b/drivers/net/virtio_net.c
  @@ -802,14 +802,32 @@ static int virtnet_set_mac_address(struct net_device 
  *dev, void *p)
  struct virtnet_info *vi = netdev_priv(dev);
  struct virtio_device *vdev = vi-vdev;
  int ret;
  +   struct scatterlist sg;
  +   char save_addr[ETH_ALEN];
  +   unsigned char save_aatype;
  +
  +   memcpy(save_addr, dev-dev_addr, ETH_ALEN);
  +   save_aatype = dev-addr_assign_type;
   
  ret = eth_mac_addr(dev, p);
  if (ret)
  return ret;
   
  -   if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
  +   if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  +   sg_init_one(sg, dev-dev_addr, dev-addr_len);
  +   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  + VIRTIO_NET_CTRL_MAC_ADDR_SET,
  + sg, 1, 0)) {
  +   dev_warn(vdev-dev,
  +Failed to set mac address by vq command.\n);
  +   memcpy(dev-dev_addr, save_addr, ETH_ALEN);
  +   dev-addr_assign_type = save_aatype;
  +   return -EINVAL;
  +   }
 
 eth_mac_addr() doesn't allow callers to implement error handling nicely.
 Although you didn't duplicate it's code directly, this patch still leaks
 internals of eth_mac_addr().
 
 How about splitting eth_mac_addr() in a separate patch:

Agree, then we can have nice error handling.
I will send a V4 and include this patch.
Thanks.
 
 int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
 {
 struct sockaddr *addr = p;
 if (!(dev-priv_flags  IFF_LIVE_ADDR_CHANGE)  netif_running(dev))
 return -EBUSY;
 if (!is_valid_ether_addr(addr-sa_data))
 return -EADDRNOTAVAIL;
   return 0;
 }

...

 Now virtio_net.c does:
 
 ret = eth_prepare_mac_addr_change(dev, p);
 if (ret  0)
   return ret;
 
 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
   sg_init_one(sg, dev-dev_addr, dev-addr_len);

 trivial issue, %s/dev-dev_addr/addr-sa_data

   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
 VIRTIO_NET_CTRL_MAC_ADDR_SET,
 sg, 1, 0)) {
   dev_warn(vdev-dev,
Failed to set mac address by vq command.\n);
   return -EINVAL;
   }
 } ...
 
 eth_commit_mac_addr_change(dev, p);
 return 0;
 
 Stefan
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [Qemu-devel] [QEMU PATCH v2] virtio-net: introduce a new macaddr control

2013-01-17 Thread Amos Kong

On Thu, Jan 17, 2013 at 01:45:11PM +0800, Amos Kong wrote:
 On Thu, Jan 17, 2013 at 11:49:20AM +1030, Rusty Russell wrote:
  ak...@redhat.com writes:
   @@ -349,6 +351,14 @@ static int virtio_net_handle_mac(VirtIONet *n, 
   uint8_t cmd,
{
struct virtio_net_ctrl_mac mac_data;

   +if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET  elem-out_num == 2 
   +elem-out_sg[1].iov_len == ETH_ALEN) {
   +/* Set MAC address */
   +memcpy(n-mac, elem-out_sg[1].iov_base, 
   elem-out_sg[1].iov_len);
   +qemu_format_nic_info_str(n-nic-nc, n-mac);
   +return VIRTIO_NET_OK;
   +}
  
  Does the rest of the net device still rely on the layout of descriptors?
 
 No, only info string of net client relies on n-mac

I misunderstood. There is no clear limitation of how much descriptor are
used for each vq command, but many commands rely on the layout of
descriptiors. eg:

virtio-net:
   VIRTIO_NET_CTRL_RX_PROMISC
   VIRTIO_NET_CTRL_RX_ALLMULTI
   VIRTIO_NET_CTRL_MAC_TABLE_SET
   etc
 
  If so, OK, we'll fix them all together.  If not, this introduces a new
  one.
  
  Cheers,
  Rusty.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [QEMU PATCH v2] virtio-net: introduce a new macaddr control

2013-01-17 Thread Amos Kong

On Thu, Jan 17, 2013 at 02:13:08PM +0200, Michael S. Tsirkin wrote:
 On Thu, Jan 17, 2013 at 11:49:20AM +1030, Rusty Russell wrote:
  ak...@redhat.com writes:
   @@ -349,6 +351,14 @@ static int virtio_net_handle_mac(VirtIONet *n, 
   uint8_t cmd,
{
struct virtio_net_ctrl_mac mac_data;

   +if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET  elem-out_num == 2 
   +elem-out_sg[1].iov_len == ETH_ALEN) {
   +/* Set MAC address */
   +memcpy(n-mac, elem-out_sg[1].iov_base, 
   elem-out_sg[1].iov_len);
   +qemu_format_nic_info_str(n-nic-nc, n-mac);
   +return VIRTIO_NET_OK;
   +}
  
  Does the rest of the net device still rely on the layout of descriptors?
  If so, OK, we'll fix them all together.  If not, this introduces a new
  one.
  
  Cheers,
  Rusty.
 
 The following fixes all existing users.
 Got to deal with some urgent stuff so did not test yet -
 Amos, would you like to include this in your patchset
 and build on it, test it all together?

No problem.

 If not I'll get to it next week.
 
 ---
 
 virtio-net: remove layout assumptions for ctrl vq
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com

...
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 2/2] virtio-net: introduce a new control to set macaddr

2013-01-16 Thread Amos Kong

On Wed, Jan 16, 2013 at 02:20:39PM +0800, Jason Wang wrote:
 On Wednesday, January 16, 2013 01:57:01 PM ak...@redhat.com wrote:
  From: Amos Kong ak...@redhat.com
  
  Currently we write MAC address to pci config space byte by byte,
  this means that we have an intermediate step where mac is wrong.
  This patch introduced a new control command to set MAC address
  in one time.
  
  VIRTIO_NET_F_CTRL_MAC_ADDR is a new feature bit for compatibility.
  
  Signed-off-by: Amos Kong ak...@redhat.com
  ---
   drivers/net/virtio_net.c| 24 +---
   include/uapi/linux/virtio_net.h |  8 +++-
   2 files changed, 24 insertions(+), 8 deletions(-)
  
  diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
  index 395ab4f..c8901b6 100644
  --- a/drivers/net/virtio_net.c
  +++ b/drivers/net/virtio_net.c
  @@ -802,16 +802,25 @@ static int virtnet_set_mac_address(struct net_device
  *dev, void *p) struct virtnet_info *vi = netdev_priv(dev);
  struct virtio_device *vdev = vi-vdev;
  int ret;
  +   struct sockaddr *addr = p;
  +   struct scatterlist sg;
  
  -   ret = eth_mac_addr(dev, p);
  -   if (ret)
  -   return ret;
  -
  -   if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
  +   if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
  +   sg_init_one(sg, addr-sa_data, dev-addr_len);
  +   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
  + VIRTIO_NET_CTRL_MAC_ADDR_SET,
  + sg, 1, 0)) {
  +   dev_warn(vdev-dev,
  +Failed to set mac address by vq command.\n);
  +   return -EINVAL;
  +   }
  +   } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
  vdev-config-set(vdev, offsetof(struct virtio_net_config, mac),
  - dev-dev_addr, dev-addr_len);
  + addr-sa_data, dev-addr_len);
  +   }
  +   ret = eth_mac_addr(dev, p);
  
 
 The you will the validity check in eth_mac_addr which may result a wrong mac 
 address to be set in the hardware (or is there any check in qemu) and a 
 inconsistency bettween what kernel assumes and qemu has.
 
 You can take a look at netvsc driver that calls eth_mac_addr() first and 
 restore the software mac address when fail to enforce it to hardware.

Thanks for the catching, I will move eth_mac_addr() back to above,
just restore addr if fail to send command.

I will also use DEFINE_PROP_BIT to fix migration issue, thanks.
 
 Thanks
  -   return 0;
  +   return ret;
   }


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [Qemu-devel] [QEMU PATCH v2] virtio-net: introduce a new macaddr control

2013-01-16 Thread Amos Kong

On Thu, Jan 17, 2013 at 11:49:20AM +1030, Rusty Russell wrote:
 ak...@redhat.com writes:
  @@ -349,6 +351,14 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t 
  cmd,
   {
   struct virtio_net_ctrl_mac mac_data;
   
  +if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET  elem-out_num == 2 
  +elem-out_sg[1].iov_len == ETH_ALEN) {
  +/* Set MAC address */
  +memcpy(n-mac, elem-out_sg[1].iov_base, elem-out_sg[1].iov_len);
  +qemu_format_nic_info_str(n-nic-nc, n-mac);
  +return VIRTIO_NET_OK;
  +}
 
 Does the rest of the net device still rely on the layout of descriptors?

No, only info string of net client relies on n-mac

 If so, OK, we'll fix them all together.  If not, this introduces a new
 one.
 
 Cheers,
 Rusty.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] virtio-spec: set mac address by a new vq command

2013-01-16 Thread Amos Kong

On Wed, Jan 16, 2013 at 11:22:23AM +0200, Michael S. Tsirkin wrote:
 On Wed, Jan 16, 2013 at 10:13:28AM +0100, Stefan Hajnoczi wrote:
  On Wed, Jan 16, 2013 at 03:33:24PM +0800, ak...@redhat.com wrote:
   +\change_inserted -1930653948 1358320004
   +The command VIRTIO_NET_CTRL_MAC_ADDR_SET is used to set 
   +\begin_inset Quotes eld
   +\end_inset
   +
   +physical
   +\begin_inset Quotes erd
   +\end_inset
   +
   + address of the network card.
  
  The physical address of the network card?  That term is not defined
  anywhere in the specification.

I will replace it with 'the default MAC address'

  Perhaps it's best to explain that the config space mac field and
  VIRTIO_NET_CTRL_MAC_ADDR_SET both set the default MAC address which rx
  filtering accepts.  (The MAC table is an additional set of MAC addresses
  which rx filtering accepts.)
  
  It would also be worth explaining that VIRTIO_NET_CTRL_MAC_ADDR_SET is
  atomic whereas the config space mac field is not.  Therefore,
  VIRTIO_NET_CTRL_MAC_ADDR_SET is preferred, especially while the NIC is
  up.

Ok, will send a v2, thanks.

  Stefan
 
 It's probably best to simply make the config space field
 read-only if the feature bit is acked.

It's reasonable.

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c18e276..54c5eae 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -93,7 +93,8 @@ static void virtio_net_set_config(VirtIODevice
*vdev, const uint8_t *config)
 
 memcpy(netcfg, config, sizeof(netcfg));
 
-if (memcmp(netcfg.mac, n-mac, ETH_ALEN)) {
+if (!(n-vdev.guest_features  VIRTIO_NET_F_CTRL_MAC_ADDR  1) 
+memcmp(netcfg.mac, n-mac, ETH_ALEN)) {
 memcpy(n-mac, netcfg.mac, ETH_ALEN);
 qemu_format_nic_info_str(n-nic-nc, n-mac);
 }

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

1 2 >

1 - 100 of 111 matches

Mail list logo