Re: [libvirt] [PATCHv4 0/2] lxc keep/drop capabilities

2014-07-23 Thread Gao feng
On 07/18/2014 04:02 PM, Cédric Bosdonnat wrote:
 Hi all,
 
 Even though the v3 has been ACKed (but not pushed), I rebased it on top of
 master before pushing it and did a few changes that are worth checking before.
 
  * Path 1 (feature) and 3 (doc) of the previous version merge merged together 
 as
suggested for another similar series.
 
  * virCgroupAllowDevice() has been changed to use negative major / minor 
 device
values to output '*'. I never saw any of them negative, but I don't have a
good knowledge of that.
 


ACK  Pushed, thanks!

 --
 Cedric
 
 
 Cédric Bosdonnat (2):
   lxc: allow to keep or drop capabilities
   lxc domain from xml: convert lxc.cap.drop
 
  docs/drvlxc.html.in|  47 
  docs/schemas/domaincommon.rng  | 207 ++
  src/conf/domain_conf.c | 126 ++-
  src/conf/domain_conf.h |  56 +
  src/libvirt_private.syms   |   3 +
  src/lxc/lxc_cgroup.c   |   8 +
  src/lxc/lxc_container.c| 241 
 +++--
  src/lxc/lxc_native.c   |  25 +++
  src/util/vircgroup.c   |  57 -
  src/util/vircgroup.h   |   2 +
  tests/domainschemadata/domain-caps-features.xml|  28 +++
  tests/lxcconf2xmldata/lxcconf2xml-blkiotune.xml|   2 +
  tests/lxcconf2xmldata/lxcconf2xml-cpusettune.xml   |   2 +
  tests/lxcconf2xmldata/lxcconf2xml-cputune.xml  |   2 +
  tests/lxcconf2xmldata/lxcconf2xml-idmap.xml|   2 +
  .../lxcconf2xmldata/lxcconf2xml-macvlannetwork.xml |   4 +
  tests/lxcconf2xmldata/lxcconf2xml-memtune.xml  |   2 +
  tests/lxcconf2xmldata/lxcconf2xml-nonenetwork.xml  |   4 +
  tests/lxcconf2xmldata/lxcconf2xml-nonetwork.xml|   2 +
  tests/lxcconf2xmldata/lxcconf2xml-physnetwork.xml  |   4 +
  tests/lxcconf2xmldata/lxcconf2xml-simple.xml   |   8 +
  tests/lxcconf2xmldata/lxcconf2xml-vlannetwork.xml  |   4 +
  22 files changed, 816 insertions(+), 20 deletions(-)
  create mode 100644 tests/domainschemadata/domain-caps-features.xml
 


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] LXC: create a bind mount for sysfs when enable userns but disable netns

2014-07-23 Thread Gao feng
On 07/14/2014 06:01 PM, Chen Hanxiao wrote:
 kernel commit 7dc5dbc879bd0779924b5132a48b731a0bc04a1e
 forbid us doing a fresh mount for sysfs
 when enable userns but disable netns.
 This patch will create a bind mount in this senario.
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 44 +---
  1 file changed, 33 insertions(+), 11 deletions(-)
 

Pushed, thanks!

 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 4d89677..8a27215 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -815,10 +815,13 @@ static int lxcContainerSetReadOnly(void)
  }
  
  
 -static int lxcContainerMountBasicFS(bool userns_enabled)
 +static int lxcContainerMountBasicFS(bool userns_enabled,
 +bool netns_disabled)
  {
  size_t i;
  int rc = -1;
 +char* mnt_src = NULL;
 +int mnt_mflags;
  
  VIR_DEBUG(Mounting basic filesystems);
  
 @@ -826,8 +829,25 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  bool bindOverReadonly;
  virLXCBasicMountInfo const *mnt = lxcBasicMounts[i];
  
 +/* When enable userns but disable netns, kernel will
 + * forbid us doing a new fresh mount for sysfs.
 + * So we had to do a bind mount for sysfs instead.
 + */
 +if (userns_enabled  netns_disabled 
 +STREQ(mnt-src, sysfs)) {
 +if (VIR_STRDUP(mnt_src, /sys)  0) {
 +goto cleanup;
 +}
 +mnt_mflags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY|MS_BIND;
 +} else {
 +if (VIR_STRDUP(mnt_src, mnt-src)  0) {
 +goto cleanup;
 +}
 +mnt_mflags = mnt-mflags;
 +}
 +
  VIR_DEBUG(Processing %s - %s,
 -  mnt-src, mnt-dst);
 +  mnt_src, mnt-dst);
  
  if (mnt-skipUnmounted) {
  char *hostdir;
 @@ -856,7 +876,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  if (virFileMakePath(mnt-dst)  0) {
  virReportSystemError(errno,
   _(Failed to mkdir %s),
 - mnt-src);
 + mnt_src);
  goto cleanup;
  }
  
 @@ -867,24 +887,24 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
   * we mount the filesystem in read-write mode initially, and then do 
 a
   * separate read-only bind mount on top of that.
   */
 -bindOverReadonly = !!(mnt-mflags  MS_RDONLY);
 +bindOverReadonly = !!(mnt_mflags  MS_RDONLY);
  
  VIR_DEBUG(Mount %s on %s type=%s flags=%x,
 -  mnt-src, mnt-dst, mnt-type, mnt-mflags  ~MS_RDONLY);
 -if (mount(mnt-src, mnt-dst, mnt-type, mnt-mflags  ~MS_RDONLY, 
 NULL)  0) {
 +  mnt_src, mnt-dst, mnt-type, mnt_mflags  ~MS_RDONLY);
 +if (mount(mnt_src, mnt-dst, mnt-type, mnt_mflags  ~MS_RDONLY, 
 NULL)  0) {
  virReportSystemError(errno,
   _(Failed to mount %s on %s type %s 
 flags=%x),
 - mnt-src, mnt-dst, NULLSTR(mnt-type),
 - mnt-mflags  ~MS_RDONLY);
 + mnt_src, mnt-dst, NULLSTR(mnt-type),
 + mnt_mflags  ~MS_RDONLY);
  goto cleanup;
  }
  
  if (bindOverReadonly 
 -mount(mnt-src, mnt-dst, NULL,
 +mount(mnt_src, mnt-dst, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)  0) {
  virReportSystemError(errno,
   _(Failed to re-mount %s on %s flags=%x),
 - mnt-src, mnt-dst,
 + mnt_src, mnt-dst,
   MS_BIND|MS_REMOUNT|MS_RDONLY);
  goto cleanup;
  }
 @@ -893,6 +913,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  rc = 0;
  
   cleanup:
 +VIR_FREE(mnt_src);
  VIR_DEBUG(rc=%d, rc);
  return rc;
  }
 @@ -1643,7 +1664,8 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
 vmDef,
  goto cleanup;
  
  /* Mounts the core /proc, /sys, etc filesystems */
 -if (lxcContainerMountBasicFS(vmDef-idmap.nuidmap)  0)
 +if (lxcContainerMountBasicFS(vmDef-idmap.nuidmap,
 + !vmDef-nnets)  0)
  goto cleanup;
  
  /* Ensure entire root filesystem (except /.oldroot) is readonly */
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: show used memory as 0 when domain is not active

2014-07-23 Thread Gao feng
On 07/17/2014 05:28 PM, Chen Hanxiao wrote:
 Before:
 virsh # dominfo chx3
 State:  shut off
 Max memory: 92160 KiB
 Used memory:92160 KiB
 
 After:
 virsh # dominfo container1
 State:  shut off
 Max memory: 92160 KiB
 Used memory:0 KiB
 
 Similar to qemu cases.
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
  src/lxc/lxc_driver.c   | 2 +-
  src/qemu/qemu_driver.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)
 


ACK  Pushed, thanks!

 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index b7b4b02..f094f86 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -584,7 +584,7 @@ static int lxcDomainGetInfo(virDomainPtr dom,
  
  if (!virDomainObjIsActive(vm)) {
  info-cpuTime = 0;
 -info-memory = vm-def-mem.cur_balloon;
 +info-memory = 0;
  } else {
  if (virCgroupGetCpuacctUsage(priv-cgroup, (info-cpuTime))  0) {
  virReportError(VIR_ERR_OPERATION_FAILED,
 diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
 index 33541d3..01107cf 100644
 --- a/src/qemu/qemu_driver.c
 +++ b/src/qemu/qemu_driver.c
 @@ -2534,7 +2534,7 @@ static int qemuDomainGetInfo(virDomainPtr dom,
  info-memory = vm-def-mem.cur_balloon;
  }
  } else {
 -info-memory = vm-def-mem.cur_balloon;
 +info-memory = 0;
  }
  
  info-nrVirtCpu = vm-def-vcpus;
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv4 0/2] lxc keep/drop capabilities

2014-07-23 Thread Gao feng
On 07/23/2014 04:18 PM, Peter Krempa wrote:
 On 07/23/14 09:17, Gao feng wrote:
 On 07/18/2014 04:02 PM, Cédric Bosdonnat wrote:
 Hi all,

 Even though the v3 has been ACKed (but not pushed), I rebased it on top of
 master before pushing it and did a few changes that are worth checking 
 before.

  * Path 1 (feature) and 3 (doc) of the previous version merge merged 
 together as
suggested for another similar series.

  * virCgroupAllowDevice() has been changed to use negative major / minor 
 device
values to output '*'. I never saw any of them negative, but I don't have 
 a
good knowledge of that.



 ACK  Pushed, thanks!
 
 Please compile the code before pushing! This broke the build:
 

oops! My fault, will take care next time.

 
  CC   lxc/libvirt_driver_lxc_impl_la-lxc_cgroup.lo
 In file included from /usr/include/cap-ng.h:27:0,
  from lxc/lxc_container.c:48:
 lxc/lxc_container.c: In function 'lxcContainerDropCapabilities':
 lxc/lxc_container.c:1951:35: error: comparison of unsigned expression = 0 is 
 always true [-Werror=type-limits]
  if (!cap_valid(capsMapping[i]))
^
 cc1: all warnings being treated as errors
 make[3]: *** [lxc/libvirt_driver_lxc_impl_la-lxc_container.lo] Error 1
 make[3]: *** Waiting for unfinished jobs
 lxc/lxc_cgroup.c: In function 'virLXCCgroupSetupDeviceACL':
 lxc/lxc_cgroup.c:354:9: error: jump skips variable initialization 
 [-Werror=jump-misses-init]
  goto cleanup;
  ^
 lxc/lxc_cgroup.c:456:2: note: label 'cleanup' defined here
   cleanup:
   ^
 lxc/lxc_cgroup.c:357:9: note: 'capMknod' declared here
  int capMknod = def-caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
  ^
 cc1: all warnings being treated as errors
 
 
 Fixup patch comming soon.


Thanks!


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] LXC: create a bind mount for sysfs when enable userns but disable netns

2014-07-20 Thread Gao feng
On 07/14/2014 06:01 PM, Chen Hanxiao wrote:
 kernel commit 7dc5dbc879bd0779924b5132a48b731a0bc04a1e
 forbid us doing a fresh mount for sysfs
 when enable userns but disable netns.
 This patch will create a bind mount in this senario.
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---

Looks good to me, ACK

  src/lxc/lxc_container.c | 44 +---
  1 file changed, 33 insertions(+), 11 deletions(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 4d89677..8a27215 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -815,10 +815,13 @@ static int lxcContainerSetReadOnly(void)
  }
  
  
 -static int lxcContainerMountBasicFS(bool userns_enabled)
 +static int lxcContainerMountBasicFS(bool userns_enabled,
 +bool netns_disabled)
  {
  size_t i;
  int rc = -1;
 +char* mnt_src = NULL;
 +int mnt_mflags;
  
  VIR_DEBUG(Mounting basic filesystems);
  
 @@ -826,8 +829,25 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  bool bindOverReadonly;
  virLXCBasicMountInfo const *mnt = lxcBasicMounts[i];
  
 +/* When enable userns but disable netns, kernel will
 + * forbid us doing a new fresh mount for sysfs.
 + * So we had to do a bind mount for sysfs instead.
 + */
 +if (userns_enabled  netns_disabled 
 +STREQ(mnt-src, sysfs)) {
 +if (VIR_STRDUP(mnt_src, /sys)  0) {
 +goto cleanup;
 +}
 +mnt_mflags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY|MS_BIND;
 +} else {
 +if (VIR_STRDUP(mnt_src, mnt-src)  0) {
 +goto cleanup;
 +}
 +mnt_mflags = mnt-mflags;
 +}
 +
  VIR_DEBUG(Processing %s - %s,
 -  mnt-src, mnt-dst);
 +  mnt_src, mnt-dst);
  
  if (mnt-skipUnmounted) {
  char *hostdir;
 @@ -856,7 +876,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  if (virFileMakePath(mnt-dst)  0) {
  virReportSystemError(errno,
   _(Failed to mkdir %s),
 - mnt-src);
 + mnt_src);
  goto cleanup;
  }
  
 @@ -867,24 +887,24 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
   * we mount the filesystem in read-write mode initially, and then do 
 a
   * separate read-only bind mount on top of that.
   */
 -bindOverReadonly = !!(mnt-mflags  MS_RDONLY);
 +bindOverReadonly = !!(mnt_mflags  MS_RDONLY);
  
  VIR_DEBUG(Mount %s on %s type=%s flags=%x,
 -  mnt-src, mnt-dst, mnt-type, mnt-mflags  ~MS_RDONLY);
 -if (mount(mnt-src, mnt-dst, mnt-type, mnt-mflags  ~MS_RDONLY, 
 NULL)  0) {
 +  mnt_src, mnt-dst, mnt-type, mnt_mflags  ~MS_RDONLY);
 +if (mount(mnt_src, mnt-dst, mnt-type, mnt_mflags  ~MS_RDONLY, 
 NULL)  0) {
  virReportSystemError(errno,
   _(Failed to mount %s on %s type %s 
 flags=%x),
 - mnt-src, mnt-dst, NULLSTR(mnt-type),
 - mnt-mflags  ~MS_RDONLY);
 + mnt_src, mnt-dst, NULLSTR(mnt-type),
 + mnt_mflags  ~MS_RDONLY);
  goto cleanup;
  }
  
  if (bindOverReadonly 
 -mount(mnt-src, mnt-dst, NULL,
 +mount(mnt_src, mnt-dst, NULL,
MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)  0) {
  virReportSystemError(errno,
   _(Failed to re-mount %s on %s flags=%x),
 - mnt-src, mnt-dst,
 + mnt_src, mnt-dst,
   MS_BIND|MS_REMOUNT|MS_RDONLY);
  goto cleanup;
  }
 @@ -893,6 +913,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  rc = 0;
  
   cleanup:
 +VIR_FREE(mnt_src);
  VIR_DEBUG(rc=%d, rc);
  return rc;
  }
 @@ -1643,7 +1664,8 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
 vmDef,
  goto cleanup;
  
  /* Mounts the core /proc, /sys, etc filesystems */
 -if (lxcContainerMountBasicFS(vmDef-idmap.nuidmap)  0)
 +if (lxcContainerMountBasicFS(vmDef-idmap.nuidmap,
 + !vmDef-nnets)  0)
  goto cleanup;
  
  /* Ensure entire root filesystem (except /.oldroot) is readonly */
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv2 0/6] Add BlkIO and CPU/mem stat API implementations for lxc

2014-01-29 Thread Gao feng
on 2014/01/28 03:27, Thorsten Behrens wrote:
 Hi,
 

Hi,

Sorry, I'm out of office now, If nobody gives you review or comments,
I will pick this up when I am back on Feb 10.

Thanks!

 sorry for the noise - but I don't see the patch series pushed yet?
 
 Gao feng wrote:
 [...]
 ACK with these changed.

 Should I fix the editorial issues  resubmit?
 
this should be great and I hadn't finished reviewing patch
Implement lxcDomainBlockStatsFlags for lxc driver.

Thanks!

Gao

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv3] Add util virCgroupGetBlkioIo*Serviced methods.

2014-01-23 Thread Gao feng
On 01/20/2014 08:15 PM, Thorsten Behrens wrote:
 This reads blkio stats from blkio.throttle.io_service_bytes and
 blkio.throttle.io_serviced.
 ---
 
 Note on v3:
  - rebased to current master, sadly the
virCgroupSetBlkioDeviceReadBps etc conflicted
 
  src/libvirt_private.syms |   2 +
  src/util/vircgroup.c | 242 
 +++
  src/util/vircgroup.h |  12 +++
  3 files changed, 256 insertions(+)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index 3ede3d5..1e44ed8 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1002,6 +1002,8 @@ virCgroupDenyDevice;
  virCgroupDenyDeviceMajor;
  virCgroupDenyDevicePath;
  virCgroupFree;
 +virCgroupGetBlkioIoDeviceServiced;
 +virCgroupGetBlkioIoServiced;
  virCgroupGetBlkioWeight;
  virCgroupGetCpuacctPercpuUsage;
  virCgroupGetCpuacctStat;
 diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
 index a6d60c5..2b70bcb 100644
 --- a/src/util/vircgroup.c
 +++ b/src/util/vircgroup.c
 @@ -1786,6 +1786,221 @@ virCgroupPathOfController(virCgroupPtr group,
  
  
  /**
 + * virCgroupGetBlkioIoServiced:
 + *
 + * @group: The cgroup to get throughput for
 + * @bytes_read: Pointer to returned bytes read
 + * @bytes_write: Pointer to returned bytes written
 + * @requests_read: Pointer to returned read io ops
 + * @requests_write: Pointer to returned write io ops
 + *
 + * Returns: 0 on success, -1 on error
 + */
 +int
 +virCgroupGetBlkioIoServiced(virCgroupPtr group,
 +long long *bytes_read,
 +long long *bytes_write,
 +long long *requests_read,
 +long long *requests_write)
 +{
 +long long stats_val;
 +char *str1 = NULL, *str2 = NULL, *p1, *p2;
 +size_t i;
 +int ret = -1;
 +
 +const char *value_names[] = {
 +Read ,
 +Write 
 +};
 +long long *bytes_ptrs[] = {
 +bytes_read,
 +bytes_write
 +};
 +long long *requests_ptrs[] = {
 +requests_read,
 +requests_write
 +};
 +
 +*bytes_read = 0;
 +*bytes_write = 0;
 +*requests_read = 0;
 +*requests_write = 0;
 +
 +if (virCgroupGetValueStr(group,
 + VIR_CGROUP_CONTROLLER_BLKIO,
 + blkio.throttle.io_service_bytes, str1)  0)
 +goto cleanup;
 +
 +if (virCgroupGetValueStr(group,
 + VIR_CGROUP_CONTROLLER_BLKIO,
 + blkio.throttle.io_serviced, str2)  0)
 +goto cleanup;
 +
 +/* sum up all entries of the same kind, from all devices */
 +for (i = 0; i  ARRAY_CARDINALITY(value_names); i++) {
 +p1 = str1;
 +p2 = str2;
 +
 +while ((p1 = strstr(p1, value_names[i]))) {
 +p1 += strlen(value_names[i]);
 +if (virStrToLong_ll(p1, p1, 10, stats_val)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Cannot parse byte %sstat '%s'),
 +   value_names[i],
 +   p1);
 +goto cleanup;
 +}
 +
 +if (stats_val  0 ||
 +(stats_val  0  *bytes_ptrs[i]  (LLONG_MAX - stats_val)))
 +{
 +virReportError(VIR_ERR_OVERFLOW,
 +   _(Sum of byte %sstat overflows),
 +   value_names[i]);
 +goto cleanup;
 +}
 +*bytes_ptrs[i] += stats_val;
 +}
 +
 +while ((p2 = strstr(p2, value_names[i]))) {
 +p2 += strlen(value_names[i]);
 +if (virStrToLong_ll(p2, p2, 10, stats_val)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Cannot parse %srequest stat '%s'),
 +   value_names[i],
 +   p2);
 +goto cleanup;
 +}
 +
 +if (stats_val  0 ||
 +(stats_val  0  *requests_ptrs[i]  (LLONG_MAX - 
 stats_val)))
 +{
 +virReportError(VIR_ERR_OVERFLOW,
 +   _(Sum of %srequest stat overflows),
 +   value_names[i]);
 +goto cleanup;
 +}
 +*requests_ptrs[i] += stats_val;
 +}
 +}
 +
 +ret = 0;
 +
 +cleanup:
 +VIR_FREE(str2);
 +VIR_FREE(str1);
 +return ret;
 +}
 +
 +
 +/**
 + * virCgroupGetBlkioIoDeviceServiced:
 + *
 + * @group: The cgroup to get throughput for
 + * @path: The device to get throughput for
 + * @bytes_read: Pointer to returned bytes read
 + * @bytes_write: Pointer to returned bytes written
 + * @requests_read: Pointer to returned read io ops
 + * @requests_write: Pointer to returned write io ops
 + *
 + * Returns: 0 on success, -1 on error
 + */
 +int
 

Re: [libvirt] [PATCHv2 2/6] Implement domainMemoryStats API slot for LXC driver.

2014-01-23 Thread Gao feng
On 01/20/2014 07:12 PM, Thorsten Behrens wrote:
 ---
 
 Notes on v2:
  - check if domain is running, fixed ret val calculation
  - api slot comment is now referencing 1.2.2
 
  src/lxc/lxc_driver.c | 50 ++
  1 file changed, 50 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 5ae4b65..8cf8e48 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -4556,6 +4556,55 @@ lxcNodeGetInfo(virConnectPtr conn,
  
  
  static int
 +lxcDomainMemoryStats(virDomainPtr dom,
 + struct _virDomainMemoryStat *stats,
 + unsigned int nr_stats,
 + unsigned int flags)
 +{
 +virDomainObjPtr vm;
 +int ret = -1;
 +virLXCDomainObjPrivatePtr priv;
 +
 +virCheckFlags(0, -1);
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +goto cleanup;
 +
 +priv = vm-privateData;
 +
 +if (virDomainMemoryStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +
 +ret = 0;
 +if (!virDomainObjIsActive(vm))
 +goto cleanup;
 +
 +if (ret  nr_stats) {
 +stats[ret].tag = VIR_DOMAIN_MEMORY_STAT_ACTUAL_BALLOON;
 +stats[ret].val = vm-def-mem.cur_balloon;
 +ret++;
 +}
 +if (ret  nr_stats) {
 +stats[ret].tag = VIR_DOMAIN_MEMORY_STAT_SWAP_IN;
 +virCgroupGetMemSwapUsage(priv-cgroup, stats[ret].val);
 +ret++;
 +}
 +if (ret  nr_stats) {
 +unsigned long kb;
 +stats[ret].tag = VIR_DOMAIN_MEMORY_STAT_RSS;
 +virCgroupGetMemoryUsage(priv-cgroup, kb);
 +stats[ret].val = kb;
 +ret++;
 +}
 +
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
 +static int
  lxcNodeGetCPUStats(virConnectPtr conn,
 int cpuNum,
 virNodeCPUStatsPtr params,
 @@ -4783,6 +4832,7 @@ static virDriver lxcDriver = {
  .domainSetSchedulerParameters = lxcDomainSetSchedulerParameters, /* 
 0.5.0 */
  .domainSetSchedulerParametersFlags = 
 lxcDomainSetSchedulerParametersFlags, /* 0.9.2 */
  .domainInterfaceStats = lxcDomainInterfaceStats, /* 0.7.3 */
 +.domainMemoryStats = lxcDomainMemoryStats, /* 1.2.2 */
  .nodeGetCPUStats = lxcNodeGetCPUStats, /* 0.9.3 */
  .nodeGetMemoryStats = lxcNodeGetMemoryStats, /* 0.9.3 */
  .nodeGetCellsFreeMemory = lxcNodeGetCellsFreeMemory, /* 0.6.5 */
 
looks good to me
ACK

thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv2 3/6] Make qemuGetDomainTotalCPUStats a virCgroup function.

2014-01-23 Thread Gao feng
On 01/20/2014 07:12 PM, Thorsten Behrens wrote:
 To reuse this from other drivers, like lxc.
 ---
 
 Notes on v2:
  - renamed to proper camel case: virCgroupGetDomainTotalCpuStats
 
  src/libvirt_private.syms |  1 +
  src/qemu/qemu_driver.c   | 54 
 ++--
  src/util/vircgroup.c | 53 +++
  src/util/vircgroup.h |  5 +
  4 files changed, 61 insertions(+), 52 deletions(-)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index e4815b3..97198d1 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1013,6 +1013,7 @@ virCgroupGetCpuCfsQuota;
  virCgroupGetCpusetCpus;
  virCgroupGetCpusetMems;
  virCgroupGetCpuShares;
 +virCgroupGetDomainTotalCpuStats;
  virCgroupGetFreezerState;
  virCgroupGetMemoryHardLimit;
  virCgroupGetMemorySoftLimit;
 diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
 index ebb77dc..a1e45c3 100644
 --- a/src/qemu/qemu_driver.c
 +++ b/src/qemu/qemu_driver.c
 @@ -105,7 +105,6 @@
  
  #define QEMU_NB_NUMA_PARAM 2
  
 -#define QEMU_NB_TOTAL_CPU_STAT_PARAM 3
  #define QEMU_NB_PER_CPU_STAT_PARAM 2
  
  #define QEMU_SCHED_MIN_PERIOD  1000LL
 @@ -15304,56 +15303,6 @@ cleanup:
  return ret;
  }
  
 -/* qemuDomainGetCPUStats() with start_cpu == -1 */
 -static int
 -qemuDomainGetTotalcpuStats(virDomainObjPtr vm,
 -   virTypedParameterPtr params,
 -   int nparams)
 -{
 -unsigned long long cpu_time;
 -int ret;
 -qemuDomainObjPrivatePtr priv = vm-privateData;
 -
 -if (nparams == 0) /* return supported number of params */
 -return QEMU_NB_TOTAL_CPU_STAT_PARAM;
 -/* entry 0 is cputime */
 -ret = virCgroupGetCpuacctUsage(priv-cgroup, cpu_time);
 -if (ret  0) {
 -virReportSystemError(-ret, %s, _(unable to get cpu account));
 -return -1;
 -}
 -
 -if (virTypedParameterAssign(params[0], VIR_DOMAIN_CPU_STATS_CPUTIME,
 -VIR_TYPED_PARAM_ULLONG, cpu_time)  0)
 -return -1;
 -
 -if (nparams  1) {
 -unsigned long long user;
 -unsigned long long sys;
 -
 -ret = virCgroupGetCpuacctStat(priv-cgroup, user, sys);
 -if (ret  0) {
 -virReportSystemError(-ret, %s, _(unable to get cpu account));
 -return -1;
 -}
 -
 -if (virTypedParameterAssign(params[1],
 -VIR_DOMAIN_CPU_STATS_USERTIME,
 -VIR_TYPED_PARAM_ULLONG, user)  0)
 -return -1;
 -if (nparams  2 
 -virTypedParameterAssign(params[2],
 -VIR_DOMAIN_CPU_STATS_SYSTEMTIME,
 -VIR_TYPED_PARAM_ULLONG, sys)  0)
 -return -1;
 -
 -if (nparams  QEMU_NB_TOTAL_CPU_STAT_PARAM)
 -nparams = QEMU_NB_TOTAL_CPU_STAT_PARAM;
 -}
 -
 -return nparams;
 -}
 -
  /* This function gets the sums of cpu time consumed by all vcpus.
   * For example, if there are 4 physical cpus, and 2 vcpus in a domain,
   * then for each vcpu, the cpuacct.usage_percpu looks like this:
 @@ -15552,7 +15501,8 @@ qemuDomainGetCPUStats(virDomainPtr domain,
  }
  
  if (start_cpu == -1)
 -ret = qemuDomainGetTotalcpuStats(vm, params, nparams);
 +ret = virCgroupGetDomainTotalCpuStats(priv-cgroup,
 +  params, nparams);
  else
  ret = qemuDomainGetPercpuStats(vm, params, nparams,
 start_cpu, ncpus);
 diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
 index 83fcefc..66045fb 100644
 --- a/src/util/vircgroup.c
 +++ b/src/util/vircgroup.c
 @@ -51,11 +51,14 @@
  #include virhashcode.h
  #include virstring.h
  #include virsystemd.h
 +#include virtypedparam.h
  
  #define CGROUP_MAX_VAL 512
  
  #define VIR_FROM_THIS VIR_FROM_CGROUP
  
 +#define CGROUP_NB_TOTAL_CPU_STAT_PARAM 3
 +
  #if defined(__linux__)  defined(HAVE_GETMNTENT_R)  \
  defined(_DIRENT_HAVE_D_TYPE)  defined(_SC_CLK_TCK)
  # define VIR_CGROUP_SUPPORTED
 @@ -2629,6 +2632,56 @@ virCgroupDenyDevicePath(virCgroupPtr group, const char 
 *path, int perms)
  }
  
  
 +
 +int
 +virCgroupGetDomainTotalCpuStats(virCgroupPtr group,
 +virTypedParameterPtr params,
 +int nparams)
 +{
 +unsigned long long cpu_time;
 +int ret;
 +
 +if (nparams == 0) /* return supported number of params */
 +return CGROUP_NB_TOTAL_CPU_STAT_PARAM;
 +/* entry 0 is cputime */
 +ret = virCgroupGetCpuacctUsage(group, cpu_time);
 +if (ret  0) {
 +virReportSystemError(-ret, %s, _(unable to get cpu account));
 +return -1;
 +}
 +
 +if (virTypedParameterAssign(params[0], VIR_DOMAIN_CPU_STATS_CPUTIME,
 +VIR_TYPED_PARAM_ULLONG, cpu_time)  

[libvirt] [PATCH] qemu: remove memset params array to zero in qemuDomainGetPercpuStats

2014-01-23 Thread Gao feng
the array params is allocated by VIR_ALLOC_N in
remoteDispatchDomainGetCPUStats. it had been set
to zero. No need to reset it to zero again, and
this reset here is incorrect too, nparams * ncpus
is the array length not the size of params array.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/qemu/qemu_driver.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index df4f5b5..1e54164 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -15818,7 +15818,6 @@ qemuDomainGetPercpuStats(virDomainObjPtr vm,
 if (virCgroupGetCpuacctPercpuUsage(priv-cgroup, buf))
 goto cleanup;
 pos = buf;
-memset(params, 0, nparams * ncpus);
 
 /* return percpu cputime in index 0 */
 param_idx = 0;
-- 
1.8.4.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv2 4/6] Implement domainGetCPUStats for lxc driver.

2014-01-23 Thread Gao feng
On 01/20/2014 07:12 PM, Thorsten Behrens wrote:
 ---
 
 Notes on v2:
  - elided extra memset and leftover loop var n
  - api slot comment references 1.2.2 now
 
  src/lxc/lxc_driver.c | 128 
 +++
  1 file changed, 128 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 8cf8e48..19426f5 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -75,6 +75,8 @@
  
  
  #define LXC_NB_MEM_PARAM  3
 +#define LXC_NB_PER_CPU_STAT_PARAM 1
 +
  
  static int lxcStateInitialize(bool privileged,
virStateInhibitCallback callback,
 @@ -4775,6 +4777,131 @@ cleanup:
  }
  
  
 +static int
 +lxcDomainGetPercpuStats(virDomainObjPtr vm,
 +virTypedParameterPtr params,
 +unsigned int nparams,
 +int start_cpu,
 +unsigned int ncpus)
 +{
 +int rv = -1;
 +size_t i;
 +int id, max_id;
 +char *pos;
 +char *buf = NULL;
 +virLXCDomainObjPrivatePtr priv = vm-privateData;
 +virTypedParameterPtr ent;
 +int param_idx;
 +unsigned long long cpu_time;
 +
 +/* TODO: share api contract code with other drivers here */
 +
 +/* return the number of supported params */
 +if (nparams == 0  ncpus != 0)
 +return LXC_NB_PER_CPU_STAT_PARAM;
 +
 +/* To parse account file, we need to know how many cpus are present.  */
 +max_id = nodeGetCPUCount();
 +if (max_id  0)
 +return rv;
 +
 +if (ncpus == 0) { /* returns max cpu ID */
 +rv = max_id;
 +goto cleanup;
 +}
 +
 +if (start_cpu  max_id) {
 +virReportError(VIR_ERR_INVALID_ARG,
 +   _(start_cpu %d larger than maximum of %d),
 +   start_cpu, max_id);
 +goto cleanup;
 +}
 +
 +/* we get percpu cputime accounting info. */
 +if (virCgroupGetCpuacctPercpuUsage(priv-cgroup, buf))
 +goto cleanup;
 +pos = buf;
 +
 +/* return percpu cputime in index 0 */
 +param_idx = 0;
 +
 +/* number of cpus to compute */
 +if (start_cpu = max_id - ncpus)
 +id = max_id - 1;
 +else
 +id = start_cpu + ncpus - 1;
 +
 +for (i = 0; i = id; i++) {
 +if (virStrToLong_ull(pos, pos, 10, cpu_time)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR, %s,
 +   _(cpuacct parse error));
 +goto cleanup;
 +}
 +if (i  start_cpu)
 +continue;
 +ent = params[(i - start_cpu) * nparams + param_idx];
 +if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME,
 +VIR_TYPED_PARAM_ULLONG, cpu_time)  0)
 +goto cleanup;
 +}
 +
 +rv = nparams;
 +
 +cleanup:
 +VIR_FREE(buf);
 +return rv;
 +}
 +
 +
 +static int
 +lxcDomainGetCPUStats(virDomainPtr dom,
 + virTypedParameterPtr params,
 + unsigned int nparams,
 + int start_cpu,
 + unsigned int ncpus,
 + unsigned int flags)
 +{
 +virDomainObjPtr vm = NULL;
 +int ret = -1;
 +bool isActive;
 +virLXCDomainObjPrivatePtr priv;
 +
 +virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY, -1);
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +return ret;
 +
 +priv = vm-privateData;
 +
 +if (virDomainGetCPUStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +
 +isActive = virDomainObjIsActive(vm);
 +if (!isActive) {
 +virReportError(VIR_ERR_OPERATION_INVALID, %s,
 +   _(domain is not running));
 +goto cleanup;
 +}
 +
 +if (!virCgroupHasController(priv-cgroup, 
 VIR_CGROUP_CONTROLLER_CPUACCT)) {
 +virReportError(VIR_ERR_OPERATION_INVALID,
 +   %s, _(cgroup CPUACCT controller is not mounted));
 +goto cleanup;
 +}
 +
 +if (start_cpu == -1)
 +ret = virCgroupGetDomainTotalCpuStats(priv-cgroup,
 +  params, nparams);
 +else
 +ret = lxcDomainGetPercpuStats(vm, params, nparams,
 +  start_cpu, ncpus);
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
  /* Function Tables */
  static virDriver lxcDriver = {
  .no = VIR_DRV_LXC,
 @@ -4852,6 +4979,7 @@ static virDriver lxcDriver = {
  .nodeSuspendForDuration = lxcNodeSuspendForDuration, /* 0.9.8 */
  .domainSetMetadata = lxcDomainSetMetadata, /* 1.1.3 */
  .domainGetMetadata = lxcDomainGetMetadata, /* 1.1.3 */
 +.domainGetCPUStats = lxcDomainGetCPUStats, /* 1.2.2 */
  .nodeGetMemoryParameters = lxcNodeGetMemoryParameters, /* 0.10.2 */
  .nodeSetMemoryParameters = lxcNodeSetMemoryParameters, /* 0.10.2 */
  .domainSendProcessSignal = lxcDomainSendProcessSignal, /* 1.0.1 */
 


Looks good to me

ACK

thanks!

--

Re: [libvirt] [PATCHv2 5/6] Implemet lxcDomainBlockStats for lxc driver

2014-01-23 Thread Gao feng
On 01/20/2014 07:12 PM, Thorsten Behrens wrote:
 ---
 
 Notes on v2:
  - works as-is, will send lxcDomainBlockStatsFlags patch separately
 
  src/lxc/lxc_driver.c | 51 +++
  1 file changed, 51 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 19426f5..bf6fd5c 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -2023,6 +2023,56 @@ lxcDomainGetSchedulerParameters(virDomainPtr domain,
  
  
  static int
 +lxcDomainBlockStats(virDomainPtr dom,
 +const char *path,
 +struct _virDomainBlockStats *stats)
 +{
 +int ret = -1, idx;
 +virDomainObjPtr vm;
 +virDomainDiskDefPtr disk = NULL;
 +virLXCDomainObjPrivatePtr priv;
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +return ret;
 +
 +priv = vm-privateData;
 +
 +if (virDomainBlockStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +
 +if (!virDomainObjIsActive(vm)) {
 +virReportError(VIR_ERR_OPERATION_INVALID,
 +   %s, _(domain is not running));
 +goto cleanup;
 +}
 +
 +if ((idx = virDomainDiskIndexByName(vm-def, path, false))  0) {
 +virReportError(VIR_ERR_INVALID_ARG,
 +   _(invalid path: %s), path);
 +goto cleanup;
 +}
 +disk = vm-def-disks[idx];
 +
 +if (!disk-info.alias) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(missing disk device alias name for %s), 
 disk-dst);
 +goto cleanup;
 +}
 +
 +ret = virCgroupGetBlkioIoDeviceServiced(priv-cgroup,
 +disk-info.alias,
 +stats-rd_bytes,
 +stats-wr_bytes,
 +stats-rd_req,
 +stats-wr_req);
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
 +static int
  lxcDomainSetBlkioParameters(virDomainPtr dom,
  virTypedParameterPtr params,
  int nparams,
 @@ -4958,6 +5008,7 @@ static virDriver lxcDriver = {
  .domainGetSchedulerParametersFlags = 
 lxcDomainGetSchedulerParametersFlags, /* 0.9.2 */
  .domainSetSchedulerParameters = lxcDomainSetSchedulerParameters, /* 
 0.5.0 */
  .domainSetSchedulerParametersFlags = 
 lxcDomainSetSchedulerParametersFlags, /* 0.9.2 */
 +.domainBlockStats = lxcDomainBlockStats, /* 0.4.1 */

this should be 1.2.2

  .domainInterfaceStats = lxcDomainInterfaceStats, /* 0.7.3 */
  .domainMemoryStats = lxcDomainMemoryStats, /* 1.2.2 */
  .nodeGetCPUStats = lxcNodeGetCPUStats, /* 0.9.3 */
 

ACK

thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCHv2 6/6] Widening API change - accept empty path for virDomainBlockStats

2014-01-23 Thread Gao feng
On 01/20/2014 07:12 PM, Thorsten Behrens wrote:
 And provide domain summary stat in that case, for lxc backend.
 Use case is a container inheriting all devices from the host,
 e.g. when doing application containerization.
 ---
 
 Notes on v2:
  - adapted virDomainBlockStats docs
  - adapted virsh domblkstat docs
  - made virsh actually accept empty disk argument
  - pedaling back a bit on the API change - accepting a NULL ptr
for the disk arg would need changing the remote protocol, so
better just take the empty string. Makes this less invasive even.
 
  src/libvirt.c|  8 ++--
  src/lxc/lxc_driver.c | 10 ++
  tools/virsh-domain-monitor.c | 11 ---
  tools/virsh.pod  |  5 +++--
  4 files changed, 27 insertions(+), 7 deletions(-)
 
 diff --git a/src/libvirt.c b/src/libvirt.c
 index 87a4d46..ead0813 100644
 --- a/src/libvirt.c
 +++ b/src/libvirt.c
 @@ -7781,7 +7781,9 @@ error:
   * an unambiguous source name of the block device (the source
   * file='...'/ sub-element, such as /path/to/image).  Valid names
   * can be found by calling virDomainGetXMLDesc() and inspecting
 - * elements within //domain/devices/disk.
 + * elements within //domain/devices/disk. Some drivers might also
 + * accept the empty string for the @disk parameter, and then yield
 + * summary stats for the entire domain.
   *
   * Domains may have more than one block device.  To get stats for
   * each you should make multiple calls to this function.
 @@ -7847,7 +7849,9 @@ error:
   * an unambiguous source name of the block device (the source
   * file='...'/ sub-element, such as /path/to/image).  Valid names
   * can be found by calling virDomainGetXMLDesc() and inspecting
 - * elements within //domain/devices/disk.
 + * elements within //domain/devices/disk. Some drivers might also
 + * accept the empty string for the @disk parameter, and then yield
 + * summary stats for the entire domain.
   *
   * Domains may have more than one block device.  To get stats for
   * each you should make multiple calls to this function.
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index bf6fd5c..31f1625 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -2046,6 +2046,16 @@ lxcDomainBlockStats(virDomainPtr dom,
  goto cleanup;
  }
  
 +if (!*path) {
 +/* empty path - return entire domain blkstats instead */
 +ret = virCgroupGetBlkioIoServiced(priv-cgroup,
 +  stats-rd_bytes,
 +  stats-wr_bytes,
 +  stats-rd_req,
 +  stats-wr_req);
 +goto cleanup;
 +}
 +
  if ((idx = virDomainDiskIndexByName(vm-def, path, false))  0) {
  virReportError(VIR_ERR_INVALID_ARG,
 _(invalid path: %s), path);
 diff --git a/tools/virsh-domain-monitor.c b/tools/virsh-domain-monitor.c
 index b29b82a..6be253f 100644
 --- a/tools/virsh-domain-monitor.c
 +++ b/tools/virsh-domain-monitor.c
 @@ -880,7 +880,7 @@ static const vshCmdOptDef opts_domblkstat[] = {
  },
  {.name = device,
   .type = VSH_OT_DATA,
 - .flags = VSH_OFLAG_REQ,
 + .flags = VSH_OFLAG_EMPTY_OK,
   .help = N_(block device)
  },
  {.name = human,
 @@ -946,8 +946,13 @@ cmdDomblkstat(vshControl *ctl, const vshCmd *cmd)
  if (!(dom = vshCommandOptDomain(ctl, cmd, name)))
  return false;
  
 -if (vshCommandOptStringReq(ctl, cmd, device, device)  0)
 -goto cleanup;
 +/* device argument is optional now. if it's missing, supply empty
 +   string to denote 'all devices'. A NULL device arg would violate
 +   API contract.
 + */
 +rc = vshCommandOptStringReq(ctl, cmd, device, device); /* and ignore 
 rc */
 +if (!device)
 +device = ;
  
  rc = virDomainBlockStatsFlags(dom, device, NULL, nparams, 0);
  
 diff --git a/tools/virsh.pod b/tools/virsh.pod
 index 3534b54..c3ca016 100644
 --- a/tools/virsh.pod
 +++ b/tools/virsh.pod
 @@ -616,12 +616,13 @@ If I--graceful is specified, don't resort to extreme 
 measures
  (e.g. SIGKILL) when the guest doesn't stop after a reasonable timeout;
  return an error instead.
  
 -=item Bdomblkstat Idomain Iblock-device [I--human]
 +=item Bdomblkstat Idomain [Iblock-device] [I--human]
  
  Get device block stats for a running domain.  A Iblock-device corresponds
  to a unique target name (target dev='name'/) or source file (source
  file='name'/) for one of the disk devices attached to Idomain (see
 -also Bdomblklist for listing these names).
 +also Bdomblklist for listing these names). On a lxc domain, omitting the
 +Iblock-device yields device block stats summarily for the entire domain.
  
  Use I--human for a more human readable output.
  
 

Looks good to me

ACK

thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] What is the robust/recommended way to retrieve the PID of a VM's init process ?

2014-01-22 Thread Gao feng
On 01/21/2014 01:59 AM, Daniel P. Berrange wrote:
 On Mon, Jan 20, 2014 at 06:53:28PM +0100, Thierry Parmentelat wrote:
 Hi

 Well if I need to run anything in the container context, short of entering
 it through e.g. ssh - hoping this is properly set up - well, I can’t..

 I am using libvirt / lxc to set up a build box; essentially every night
 I would spawn a set of fresh VMs of some flavours (fedora18, ubuntu,
 what not) and use this to rebuild my system from scratch
 In this context it’s a real hassle to have to even set up ssh, there is
 no good reason for the build VM to run an ssh service at all, and I am
 concerned it might pull dependencies that I do not need/want
 I’d much rather have a direct means to just run some command inside the 
 container.

 Admittedly I’m brain-damaged after having used vservers for too long, and 
 their
 ‘vserver container exec command to run’ feature is in my genes now ;)

 Now maybe I am the one who is missing something and there already is 
 something to do that ?
 Using the trick below I essentially have what I need mind you, I’m just 
 concerned that it
 kind of works by accident :-)
 
 Yes, it already exists, albeit as a lxc specific custom command/api:
 
   $ virsh lxc-enter-namespace $CONTAINER /path/to/command/to/run arg1 arg2...
 
 There's a corresponding API in the libvirt-lxc.so library
 virDomainLxcEnterNamspace
 
 Eventually we'll turn this into a proper libvirt API with a less sucky
 virsh command name.

Sorry for another question, lxc-enter-namespace is unavailable on the remote 
machine,
you can't use virsh -c lxc+ssh://root@x.x.x.x/ lxc-enter-namespace $CONTAINER 
/path/to/command/to/run arg1 arg2.

I want to know what's the major handicap that we cannot support 
lxc-enter-namespace on
remote machine.

Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH V2 RESEND 8/8] lxc: allow to setup throttle blkio cgroup through virsh

2014-01-20 Thread Gao feng
On 01/20/2014 07:46 PM, Daniel P. Berrange wrote:
 On Mon, Jan 20, 2014 at 11:25:56AM +0800, Gao feng wrote:
 On 12/20/2013 12:34 AM, Daniel P. Berrange wrote:
 On Fri, Dec 13, 2013 at 11:09:01AM +0800, Gao feng wrote:
 With this patch,user can set throttle blkio cgroup for
 lxc domain through virsh tool.

 Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_driver.c | 671 
 +--
  1 file changed, 646 insertions(+), 25 deletions(-)


 ACK

 @@ -4623,6 +5243,7 @@ static virDriver lxcDriver = {
  .name = LXC_DRIVER_NAME,
  .connectOpen = lxcConnectOpen, /* 0.4.2 */
  .connectClose = lxcConnectClose, /* 0.4.2 */
 +.connectSupportsFeature = lxcConnectSupportsFeature, /* 1.1.4 */

 But change this to 1.2.1



 So this should be 1.2.2 now, right?
 
 Yes, correct.
 

Thanks! change and pushed.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/8] rename virDomainBlkioDeviceWeightParseXML to virDomainBlkioDeviceParseXML

2014-01-20 Thread Gao feng
On 01/21/2014 12:50 AM, Eric Blake wrote:
 On 01/20/2014 09:44 AM, Eric Blake wrote:
 On 12/11/2013 01:29 AM, Gao feng wrote:
 virDomainBlkioDeviceWeightParseXML will be used to parse
 the xml element read_bps, write_bps, read_iops, write_iops.

 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/conf/domain_conf.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

 ACK.
 
 Oh, I never saw this get reviewed, but I see it has already been in
 libvirt.git for more than a month.  Sorry for the noise.
 

:) thanks for your ack

I just pushed this patchset.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH V2 RESEND 8/8] lxc: allow to setup throttle blkio cgroup through virsh

2014-01-19 Thread Gao feng
On 12/20/2013 12:34 AM, Daniel P. Berrange wrote:
 On Fri, Dec 13, 2013 at 11:09:01AM +0800, Gao feng wrote:
 With this patch,user can set throttle blkio cgroup for
 lxc domain through virsh tool.

 Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_driver.c | 671 
 +--
  1 file changed, 646 insertions(+), 25 deletions(-)
 
 
 ACK
 
 @@ -4623,6 +5243,7 @@ static virDriver lxcDriver = {
  .name = LXC_DRIVER_NAME,
  .connectOpen = lxcConnectOpen, /* 0.4.2 */
  .connectClose = lxcConnectClose, /* 0.4.2 */
 +.connectSupportsFeature = lxcConnectSupportsFeature, /* 1.1.4 */
 
 But change this to 1.2.1
 


So this should be 1.2.2 now, right?

Thanks
Gao

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 4/8] domain: introduce xml elements for throttle blkio cgroup

2014-01-19 Thread Gao feng
On 12/12/2013 08:21 PM, Daniel P. Berrange wrote:
 On Wed, Dec 11, 2013 at 04:29:49PM +0800, Gao feng wrote:
 This patch introduces new xml elements under blkiotune,
 we use these new elements to setup the throttle blkio
 cgroup for domain. The new blkiotune node looks like this:

 blkiotune
   device
 path/path/to/block/path
 weight1000/weight
 read_iops_sec1/read_iops_sec
 write_iops_sec1/write_iops_sec
 read_bytes_sec100/read_bytes_sec
 write_bytes_sec100/write_bytes_sec
   /device
 /blkiotune

 Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  docs/schemas/domaincommon.rng | 28 --
  src/conf/domain_conf.c| 85 
 +--
  src/conf/domain_conf.h|  4 ++
  3 files changed, 103 insertions(+), 14 deletions(-)
 
 ACK
 
Pushed, thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 5/8] blkio: Setting throttle blkio cgroup for domain

2014-01-19 Thread Gao feng
On 12/12/2013 08:24 PM, Daniel P. Berrange wrote:
 On Wed, Dec 11, 2013 at 04:29:50PM +0800, Gao feng wrote:
 This patch introduces virCgroupSetBlkioDeviceReadIops,
 virCgroupSetBlkioDeviceWriteIops,
 virCgroupSetBlkioDeviceReadBps and
 virCgroupSetBlkioDeviceWriteBps,

 we can use these interfaces to set up throttle
 blkio cgroup for domain.

 This patch also adds the new throttle blkio cgroup
 elements to the test xml.

 Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/libvirt_private.syms   |   4 +
  src/lxc/lxc_cgroup.c   |  27 ++-
  src/qemu/qemu_cgroup.c |  27 ++-
  src/util/vircgroup.c   | 224 
 -
  src/util/vircgroup.h   |  16 ++
  .../qemuxml2argv-blkiotune-device.xml  |   8 +
  6 files changed, 295 insertions(+), 11 deletions(-)
 
 ACK
 
Pushed, thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 6/8] virsh: add setting throttle blkio cgroup option to blkiotune

2014-01-19 Thread Gao feng
On 12/12/2013 08:26 PM, Daniel P. Berrange wrote:
 On Wed, Dec 11, 2013 at 04:29:51PM +0800, Gao feng wrote:
 With this patch, user can setup the throttle blkio cgorup
 for domain through the virsh cmd, such as:

 virsh blkiotune domain1 --device-read-bytes-sec 
 /dev/sda1,100,/dev/sda2,200
 --device-write-bytes-sec /dev/sda1,100 --device-read-iops-sec 
 /dev/sda1,1
 --device-write-iops-sec /dev/sda1,1,/dev/sda2,0

 This patch also add manpage for these new options.

 Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  include/libvirt/libvirt.h.in | 45 +++
  tools/virsh-domain.c | 64 
 
  tools/virsh.pod  | 36 +++--
  3 files changed, 143 insertions(+), 2 deletions(-)
 
 ACK
 
Pushed, thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] qemu: remove memset params array to zero in qemuDomainGetPercpuStats

2014-01-16 Thread Gao feng
On 01/15/2014 06:50 PM, Daniel P. Berrange wrote:
 On Wed, Jan 15, 2014 at 04:47:21PM +0800, Gao feng wrote:
 the array params is allocated by VIR_ALLOC_N in cmdCPUStats.
 it had been set to zero. No need to reset it to zero again,
 and this reset here is incorrect too, nparams * ncpus is the
 array length not the size of params array.
 
 'cmdCPUStats' is virsh client code. The QEMU driver is
 running server side in libvirtd. So whatever memory was
 allocated in cmdCPUStats isn't the same as the memory
 used in qemu_driver.c
 

ohh.. this memory is allocated by VIR_ALLOC_N
in remoteDispatchDomainGetCPUStats..

I will change the changelog and resend this patch.

Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 4/6] Implement domainGetCPUStats for lxc driver.

2014-01-15 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 ---
  src/lxc/lxc_driver.c | 132 
 +++
  1 file changed, 132 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 9f586af..1e9c77a 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -75,6 +75,8 @@
  
  
  #define LXC_NB_MEM_PARAM  3
 +#define LXC_NB_PER_CPU_STAT_PARAM 1
 +
  
  static int lxcStateInitialize(bool privileged,
virStateInhibitCallback callback,
 @@ -4775,6 +4777,135 @@ cleanup:
  }
  
  
 +static int
 +lxcDomainGetPercpuStats(virDomainObjPtr vm,
 +virTypedParameterPtr params,
 +unsigned int nparams,
 +int start_cpu,
 +unsigned int ncpus)
 +{
 +int rv = -1;
 +size_t i;
 +int id, max_id;
 +char *pos;
 +char *buf = NULL;
 +unsigned int n = 0;
 +virLXCDomainObjPrivatePtr priv = vm-privateData;
 +virTypedParameterPtr ent;
 +int param_idx;
 +unsigned long long cpu_time;
 +
 +/* TODO: share api contract code with other drivers here */
 +
 +/* return the number of supported params */
 +if (nparams == 0  ncpus != 0)
 +return LXC_NB_PER_CPU_STAT_PARAM;
 +
 +/* To parse account file, we need to know how many cpus are present.  */
 +max_id = nodeGetCPUCount();
 +if (max_id  0)
 +return rv;
 +
 +if (ncpus == 0) { /* returns max cpu ID */
 +rv = max_id;
 +goto cleanup;
 +}
 +
 +if (start_cpu  max_id) {
 +virReportError(VIR_ERR_INVALID_ARG,
 +   _(start_cpu %d larger than maximum of %d),
 +   start_cpu, max_id);
 +goto cleanup;
 +}
 +
 +/* we get percpu cputime accounting info. */
 +if (virCgroupGetCpuacctPercpuUsage(priv-cgroup, buf))
 +goto cleanup;
 +pos = buf;
 +memset(params, 0, nparams * ncpus);

this is unnecessary. we alreay filled params to zero when we allocate it
by VIR_ALLOC_N.

 +
 +/* return percpu cputime in index 0 */
 +param_idx = 0;
 +
 +/* number of cpus to compute */
 +if (start_cpu = max_id - ncpus)
 +id = max_id - 1;
 +else
 +id = start_cpu + ncpus - 1;
 +
 +for (i = 0; i = id; i++) {
 +if (virStrToLong_ull(pos, pos, 10, cpu_time)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR, %s,
 +   _(cpuacct parse error));
 +goto cleanup;
 +} else {
 +n++;

n is useless.

 +}
 +if (i  start_cpu)
 +continue;
 +ent = params[(i - start_cpu) * nparams + param_idx];
 +if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME,
 +VIR_TYPED_PARAM_ULLONG, cpu_time)  0)
 +goto cleanup;
 +}
 +
 +rv = nparams;
 +
 +cleanup:
 +VIR_FREE(buf);
 +return rv;
 +}
 +
 +
 +static int
 +lxcDomainGetCPUStats(virDomainPtr dom,
 + virTypedParameterPtr params,
 + unsigned int nparams,
 + int start_cpu,
 + unsigned int ncpus,
 + unsigned int flags)
 +{
 +virDomainObjPtr vm = NULL;
 +int ret = -1;
 +bool isActive;
 +virLXCDomainObjPrivatePtr priv;
 +
 +virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY, -1);
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +return ret;
 +
 +priv = vm-privateData;
 +
 +if (virDomainGetCPUStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +
 +isActive = virDomainObjIsActive(vm);
 +if (!isActive) {
 +virReportError(VIR_ERR_OPERATION_INVALID, %s,
 +   _(domain is not running));
 +goto cleanup;
 +}
 +
 +if (!virCgroupHasController(priv-cgroup, 
 VIR_CGROUP_CONTROLLER_CPUACCT)) {
 +virReportError(VIR_ERR_OPERATION_INVALID,
 +   %s, _(cgroup CPUACCT controller is not mounted));
 +goto cleanup;
 +}
 +
 +if (start_cpu == -1)
 +ret = virCgroupGetDomainTotalCPUStats(priv-cgroup,
 +  params, nparams);
 +else
 +ret = lxcDomainGetPercpuStats(vm, params, nparams,
 +  start_cpu, ncpus);
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
  /* Function Tables */
  static virDriver lxcDriver = {
  .no = VIR_DRV_LXC,
 @@ -4852,6 +4983,7 @@ static virDriver lxcDriver = {
  .nodeSuspendForDuration = lxcNodeSuspendForDuration, /* 0.9.8 */
  .domainSetMetadata = lxcDomainSetMetadata, /* 1.1.3 */
  .domainGetMetadata = lxcDomainGetMetadata, /* 1.1.3 */
 +.domainGetCPUStats = lxcDomainGetCPUStats, /* 0.9.11 */

1.2.1 here

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] qemu: remove memset params array to zero in qemuDomainGetPercpuStats

2014-01-15 Thread Gao feng
the array params is allocated by VIR_ALLOC_N in cmdCPUStats.
it had been set to zero. No need to reset it to zero again,
and this reset here is incorrect too, nparams * ncpus is the
array length not the size of params array.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/qemu/qemu_driver.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 9f71160..7a329f0 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -15820,7 +15820,6 @@ qemuDomainGetPercpuStats(virDomainObjPtr vm,
 if (virCgroupGetCpuacctPercpuUsage(priv-cgroup, buf))
 goto cleanup;
 pos = buf;
-memset(params, 0, nparams * ncpus);
 
 /* return percpu cputime in index 0 */
 param_idx = 0;
-- 
1.8.4.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 5/6] Implemet lxcDomainBlockStats for lxc driver

2014-01-15 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 ---
  src/lxc/lxc_driver.c | 51 +++
  1 file changed, 51 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 1e9c77a..1d2a457 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -2021,6 +2021,56 @@ lxcDomainGetSchedulerParameters(virDomainPtr domain,
  
  
  static int
 +lxcDomainBlockStats(virDomainPtr dom,
 +const char *path,
 +struct _virDomainBlockStats *stats)
 +{
 +int ret = -1, idx;
 +virDomainObjPtr vm;
 +virDomainDiskDefPtr disk = NULL;
 +virLXCDomainObjPrivatePtr priv;
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +return ret;
 +
 +priv = vm-privateData;
 +
 +if (virDomainBlockStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +
 +if (!virDomainObjIsActive(vm)) {
 +virReportError(VIR_ERR_OPERATION_INVALID,
 +   %s, _(domain is not running));
 +goto cleanup;
 +}
 +
 +if ((idx = virDomainDiskIndexByName(vm-def, path, false))  0) {
 +virReportError(VIR_ERR_INVALID_ARG,
 +   _(invalid path: %s), path);
 +goto cleanup;
 +}
 +disk = vm-def-disks[idx];
 +
 +if (!disk-info.alias) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(missing disk device alias name for %s), 
 disk-dst);
 +goto cleanup;
 +}
 +
 +ret = virCgroupGetBlkioIoDeviceServiced(priv-cgroup,
 +disk-info.alias,
 +stats-rd_bytes,
 +stats-wr_bytes,
 +stats-rd_req,
 +stats-wr_req);
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
 +static int
  lxcDomainSetBlkioParameters(virDomainPtr dom,
  virTypedParameterPtr params,
  int nparams,
 @@ -4962,6 +5012,7 @@ static virDriver lxcDriver = {
  .domainGetSchedulerParametersFlags = 
 lxcDomainGetSchedulerParametersFlags, /* 0.9.2 */
  .domainSetSchedulerParameters = lxcDomainSetSchedulerParameters, /* 
 0.5.0 */
  .domainSetSchedulerParametersFlags = 
 lxcDomainSetSchedulerParametersFlags, /* 0.9.2 */
 +.domainBlockStats = lxcDomainBlockStats, /* 0.4.1 */

Just one question.
Can't we implement the new API domainBlockStatsFlags?

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 6/6] Widening API change - accept empty path for virDomainBlockStats

2014-01-15 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 And provide domain summary stats in that case, for lxc backend.
 Use case is a container domain using passthrough bind mounts of the
 host filesystem, which is a common case for lxc.
 ---
  src/libvirt.c  |  1 -
  src/lxc/lxc_driver.c   | 10 ++
  src/qemu/qemu_driver.c |  2 ++
  src/remote/remote_driver.c |  2 ++
  src/test/test_driver.c |  2 ++
  src/xen/xen_driver.c   |  2 ++
  6 files changed, 18 insertions(+), 1 deletion(-)
 
 diff --git a/src/libvirt.c b/src/libvirt.c
 index 87a4d46..14ffca0 100644
 --- a/src/libvirt.c
 +++ b/src/libvirt.c
 @@ -7804,7 +7804,6 @@ virDomainBlockStats(virDomainPtr dom, const char *disk,
  virResetLastError();
  
  virCheckDomainReturn(dom, -1);
 -virCheckNonNullArgGoto(disk, error);
  virCheckNonNullArgGoto(stats, error);
  if (size  sizeof(stats2)) {
  virReportInvalidArg(size,
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 1d2a457..fba9c12 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -2044,6 +2044,16 @@ lxcDomainBlockStats(virDomainPtr dom,
  goto cleanup;
  }
  
 +if (!path || !*path) {
 +/* empty/NULL path - return entire domain blkstats instead */
 +ret = virCgroupGetBlkioIoServiced(priv-cgroup,
 +  stats-rd_bytes,
 +  stats-wr_bytes,
 +  stats-rd_req,
 +  stats-wr_req);
 +goto cleanup;
 +}
 +

I'm ok with this one, Let's see if others will object.
but you should check if we can use the NEW API as
I mehtioned in prev thread, and change the manpage of virsh domblkstat.

  if ((idx = virDomainDiskIndexByName(vm-def, path, false))  0) {
  virReportError(VIR_ERR_INVALID_ARG,
 _(invalid path: %s), path);
 diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
 index 2d92873..4dcf12b 100644
 --- a/src/qemu/qemu_driver.c
 +++ b/src/qemu/qemu_driver.c
 @@ -9021,6 +9021,8 @@ qemuDomainBlockStats(virDomainPtr dom,
  virDomainDiskDefPtr disk = NULL;
  qemuDomainObjPrivatePtr priv;
  
 +virCheckNonNullArgReturn(path, -1);
 +
  if (!(vm = qemuDomObjFromDomain(dom)))
  goto cleanup;
  
 diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c
 index da9c1c9..160bdd4 100644
 --- a/src/remote/remote_driver.c
 +++ b/src/remote/remote_driver.c
 @@ -1713,6 +1713,8 @@ remoteDomainBlockStatsFlags(virDomainPtr domain,
  remote_domain_block_stats_flags_ret ret;
  struct private_data *priv = domain-conn-privateData;
  
 +virCheckNonNullArgReturn(path, -1);
 +
  remoteDriverLock(priv);
  
  make_nonnull_domain(args.dom, domain);
 diff --git a/src/test/test_driver.c b/src/test/test_driver.c
 index b724f82..7c637bb 100644
 --- a/src/test/test_driver.c
 +++ b/src/test/test_driver.c
 @@ -3362,6 +3362,8 @@ static int testDomainBlockStats(virDomainPtr domain,
  unsigned long long statbase;
  int ret = -1;
  
 +virCheckNonNullArgReturn(path, -1);
 +
  testDriverLock(privconn);
  privdom = virDomainObjListFindByName(privconn-domains,
   domain-name);
 diff --git a/src/xen/xen_driver.c b/src/xen/xen_driver.c
 index c45d10f..2b9ac21 100644
 --- a/src/xen/xen_driver.c
 +++ b/src/xen/xen_driver.c
 @@ -2217,6 +2217,8 @@ xenUnifiedDomainBlockStats(virDomainPtr dom, const char 
 *path,
  virDomainDefPtr def = NULL;
  int ret = -1;
  
 +virCheckNonNullArgReturn(path, -1);
 +
  if (!(def = xenGetDomainDefForDom(dom)))
  goto cleanup;
  
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/6] Add util virCgroupGetBlkioIo*Serviced methods.

2014-01-14 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 This reads blkio stats from blkio.throttle.io_service_bytes and
 blkio.throttle.io_serviced.
 ---
  src/libvirt_private.syms |   2 +
  src/util/vircgroup.c | 208 
 +++
  src/util/vircgroup.h |  12 +++
  3 files changed, 222 insertions(+)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index 3b3de15..edbf6ba 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1003,6 +1003,8 @@ virCgroupDenyDevice;
  virCgroupDenyDeviceMajor;
  virCgroupDenyDevicePath;
  virCgroupFree;
 +virCgroupGetBlkioIoDeviceServiced;
 +virCgroupGetBlkioIoServiced;
  virCgroupGetBlkioWeight;
  virCgroupGetCpuacctPercpuUsage;
  virCgroupGetCpuacctStat;
 diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
 index 43eb649..1a579f0 100644
 --- a/src/util/vircgroup.c
 +++ b/src/util/vircgroup.c
 @@ -1826,6 +1826,191 @@ virCgroupGetBlkioWeight(virCgroupPtr group, unsigned 
 int *weight)
  
  
  /**
 + * virCgroupGetBlkioIoServiced:
 + *
 + * @group: The cgroup to get throughput for
 + * @kb: Pointer to returned serviced io in kilobytes
 + *
 + * Returns: 0 on success, -1 on error
 + */
 +int
 +virCgroupGetBlkioIoServiced(virCgroupPtr group,
 +long long *bytes_read,
 +long long *bytes_write,
 +long long *requests_read,
 +long long *requests_write)
 +{
 +long long stats_val;
 +char *str1=NULL, *str2=NULL, *p1, *p2;
 +int i;
 +int ret = -1;
 +
 +const char *value_names[] = {
 +Read ,
 +Write 
 +};
 +long long *bytes_ptrs[] = {
 +bytes_read,
 +bytes_write
 +};
 +long long *requests_ptrs[] = {
 +requests_read,
 +requests_write
 +};
 +
 +*bytes_read = 0;
 +*bytes_write = 0;
 +*requests_read = 0;
 +*requests_write = 0;
 +
 +if (virCgroupGetValueStr(group,
 + VIR_CGROUP_CONTROLLER_BLKIO,
 + blkio.throttle.io_service_bytes, str1)  0)
 +goto cleanup;
 +
 +if (virCgroupGetValueStr(group,
 + VIR_CGROUP_CONTROLLER_BLKIO,
 + blkio.throttle.io_serviced, str2)  0)
 +goto cleanup;
 +
 +p1 = str1;
 +p2 = str2;
 +
 +/* sum up all entries of the same kind, from all devices */

You only operate the first device below, you need a cycle to
operate all the devices.

 +for (i = 0; i  ARRAY_CARDINALITY(value_names); i++) {
 +if (!(p1 = strstr(p1, value_names[i])) ||
 +virStrToLong_ll(p1 + strlen(value_names[i]), p1, 10, 
 stats_val)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Cannot parse byte stat '%s'),
 +   p1 + strlen(value_names[i]));

had better report the value_names too.

 +goto cleanup;
 +}
 +*bytes_ptrs[i] += stats_val;
 +

had better add the overflow check and report error.

 +if (!(p2 = strstr(p2, value_names[i])) ||
 +virStrToLong_ll(p2 + strlen(value_names[i]), p2, 10, 
 stats_val)  0) {
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Cannot parse request stat '%s'),
 +   p2 + strlen(value_names[i]));
 +goto cleanup;
 +}
 +*requests_ptrs[i] += stats_val;

overflow check

 +}
 +
 +ret = 0;
 +
 +cleanup:
 +VIR_FREE(str2);
 +VIR_FREE(str1);
 +return ret;
 +}
 +
 +
 +/**
 + * virCgroupGetBlkioIoDeviceServiced:
 + *
 + * @group: The cgroup to get throughput for
 + * @path: The device to get throughput for
 + * @kb_read: Pointer to serviced read io in kilobytes
 + * @kb_write: Pointer to serviced write io in kilobytes
 + * @kb_total: Pointer to serviced io in kilobytes
 + *
 + * Returns: 0 on success, -1 on error
 + */
 +int
 +virCgroupGetBlkioIoDeviceServiced(virCgroupPtr group,
 +  const char *path,
 +  long long *bytes_read,
 +  long long *bytes_write,
 +  long long *requests_read,
 +  long long *requests_write)
 +{
 +char *str1=NULL, *str2=NULL, *str3=NULL, *p;
 +struct stat sb;
 +int i;
 +int ret = -1;
 +
 +const char *value_names[] = {
 +Read ,
 +Write 
 +};
 +long long *bytes_ptrs[] = {
 +bytes_read,
 +bytes_write
 +};
 +long long *requests_ptrs[] = {
 +requests_read,
 +requests_write
 +};
 +
 +if (stat(path, sb)  0) {
 +virReportSystemError(errno,
 + _(Path '%s' is not accessible),
 + path);
 +return -1;
 +}
 +
 +if (!S_ISBLK(sb.st_mode)) {
 +

Re: [libvirt] [PATCH 2/6] Implement domainMemoryStats API slot for LXC driver.

2014-01-14 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 ---
  src/lxc/lxc_driver.c | 52 
 
  1 file changed, 52 insertions(+)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 7e56a59..9f586af 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -4554,6 +4554,57 @@ lxcNodeGetInfo(virConnectPtr conn,
  
  
  static int
 +lxcDomainMemoryStats(virDomainPtr dom,
 + struct _virDomainMemoryStat *stats,
 + unsigned int nr_stats,
 + unsigned int flags)
 +{
 +virDomainObjPtr vm;
 +int ret = -1;
 +virLXCDomainObjPrivatePtr priv;
 +int got = 0;
 +
 +virCheckFlags(0, -1);
 +
 +if (!(vm = lxcDomObjFromDomain(dom)))
 +goto cleanup;
 +
 +priv = vm-privateData;
 +
 +if (virDomainMemoryStatsEnsureACL(dom-conn, vm-def)  0)
 +goto cleanup;
 +

You should make sure domain is running and the return value of
lxcDomainMemoryStats seem incorrect.


 +if (got  nr_stats) {
 +stats[got].tag = VIR_DOMAIN_MEMORY_STAT_ACTUAL_BALLOON;
 +stats[got].val = vm-def-mem.cur_balloon;
 +got++;
 +}
 +if (got  nr_stats) {
 +stats[got].tag = VIR_DOMAIN_MEMORY_STAT_SWAP_IN;
 +virCgroupGetMemSwapUsage(priv-cgroup, stats[got].val);
 +got++;
 +}
 +if (got  nr_stats) {
 +unsigned long kb;
 +stats[got].tag = VIR_DOMAIN_MEMORY_STAT_RSS;
 +virCgroupGetMemoryUsage(priv-cgroup, kb);
 +stats[got].val = kb;
 +ret++;
 +}
 +
 +cleanup:
 +if (vm)
 +virObjectUnlock(vm);
 +return ret;
 +}
 +
 +
 +static int
  lxcNodeGetCPUStats(virConnectPtr conn,
 int cpuNum,
 virNodeCPUStatsPtr params,
 @@ -4781,6 +4832,7 @@ static virDriver lxcDriver = {
  .domainSetSchedulerParameters = lxcDomainSetSchedulerParameters, /* 
 0.5.0 */
  .domainSetSchedulerParametersFlags = 
 lxcDomainSetSchedulerParametersFlags, /* 0.9.2 */
  .domainInterfaceStats = lxcDomainInterfaceStats, /* 0.7.3 */
 +.domainMemoryStats = lxcDomainMemoryStats, /* 0.7.5 */

0.7.5 is incorrect, it should be 1.2.1.

  .nodeGetCPUStats = lxcNodeGetCPUStats, /* 0.9.3 */
  .nodeGetMemoryStats = lxcNodeGetMemoryStats, /* 0.9.3 */
  .nodeGetCellsFreeMemory = lxcNodeGetCellsFreeMemory, /* 0.6.5 */
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/6] Add util virCgroupGetBlkioIo*Serviced methods.

2014-01-14 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 This reads blkio stats from blkio.throttle.io_service_bytes and
 blkio.throttle.io_serviced.
 ---
  src/libvirt_private.syms |   2 +
  src/util/vircgroup.c | 208 
 +++
  src/util/vircgroup.h |  12 +++
  3 files changed, 222 insertions(+)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index 3b3de15..edbf6ba 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1003,6 +1003,8 @@ virCgroupDenyDevice;
  virCgroupDenyDeviceMajor;
  virCgroupDenyDevicePath;
  virCgroupFree;
 +virCgroupGetBlkioIoDeviceServiced;
 +virCgroupGetBlkioIoServiced;
  virCgroupGetBlkioWeight;
  virCgroupGetCpuacctPercpuUsage;
  virCgroupGetCpuacctStat;
 diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
 index 43eb649..1a579f0 100644
 --- a/src/util/vircgroup.c
 +++ b/src/util/vircgroup.c
 @@ -1826,6 +1826,191 @@ virCgroupGetBlkioWeight(virCgroupPtr group, unsigned 
 int *weight)
  
  
  /**
 + * virCgroupGetBlkioIoServiced:
 + *
 + * @group: The cgroup to get throughput for
 + * @kb: Pointer to returned serviced io in kilobytes
 + *
 + * Returns: 0 on success, -1 on error
 + */
 +int
 +virCgroupGetBlkioIoServiced(virCgroupPtr group,
 +long long *bytes_read,
 +long long *bytes_write,
 +long long *requests_read,
 +long long *requests_write)
 +{
 +long long stats_val;
 +char *str1=NULL, *str2=NULL, *p1, *p2;
please add blank.
char *str1 = NULL, *str2 = NULL, *p1, *p2;

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 3/6] Make qemuGetDomainTotalCPUStats a virCgroup function.

2014-01-14 Thread Gao feng
On 01/15/2014 07:23 AM, Thorsten Behrens wrote:
 To reuse this from other drivers, like lxc.
 ---
  src/libvirt_private.syms |  1 +
  src/qemu/qemu_driver.c   | 54 
 ++--
  src/util/vircgroup.c | 53 +++
  src/util/vircgroup.h |  5 +
  4 files changed, 61 insertions(+), 52 deletions(-)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index edbf6ba..048d9a0 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1014,6 +1014,7 @@ virCgroupGetCpuCfsQuota;
  virCgroupGetCpusetCpus;
  virCgroupGetCpusetMems;
  virCgroupGetCpuShares;
 +virCgroupGetDomainTotalCPUStats;

please change virCgroupGetDomainTotalCPUStats to virCgroupGetDomainTotalCpuStats
the other part looks good to me.

  virCgroupGetFreezerState;
  virCgroupGetMemoryHardLimit;
  virCgroupGetMemorySoftLimit;
 diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
 index 1949abe..2d92873 100644
 --- a/src/qemu/qemu_driver.c
 +++ b/src/qemu/qemu_driver.c
 @@ -105,7 +105,6 @@
  
  #define QEMU_NB_NUMA_PARAM 2
  
 -#define QEMU_NB_TOTAL_CPU_STAT_PARAM 3
  #define QEMU_NB_PER_CPU_STAT_PARAM 2
  
  #define QEMU_SCHED_MIN_PERIOD  1000LL
 @@ -15302,56 +15301,6 @@ cleanup:
  return ret;
  }
  
 -/* qemuDomainGetCPUStats() with start_cpu == -1 */
 -static int
 -qemuDomainGetTotalcpuStats(virDomainObjPtr vm,
 -   virTypedParameterPtr params,
 -   int nparams)
 -{
 -unsigned long long cpu_time;
 -int ret;
 -qemuDomainObjPrivatePtr priv = vm-privateData;
 -
 -if (nparams == 0) /* return supported number of params */
 -return QEMU_NB_TOTAL_CPU_STAT_PARAM;
 -/* entry 0 is cputime */
 -ret = virCgroupGetCpuacctUsage(priv-cgroup, cpu_time);
 -if (ret  0) {
 -virReportSystemError(-ret, %s, _(unable to get cpu account));
 -return -1;
 -}
 -
 -if (virTypedParameterAssign(params[0], VIR_DOMAIN_CPU_STATS_CPUTIME,
 -VIR_TYPED_PARAM_ULLONG, cpu_time)  0)
 -return -1;
 -
 -if (nparams  1) {
 -unsigned long long user;
 -unsigned long long sys;
 -
 -ret = virCgroupGetCpuacctStat(priv-cgroup, user, sys);
 -if (ret  0) {
 -virReportSystemError(-ret, %s, _(unable to get cpu account));
 -return -1;
 -}
 -
 -if (virTypedParameterAssign(params[1],
 -VIR_DOMAIN_CPU_STATS_USERTIME,
 -VIR_TYPED_PARAM_ULLONG, user)  0)
 -return -1;
 -if (nparams  2 
 -virTypedParameterAssign(params[2],
 -VIR_DOMAIN_CPU_STATS_SYSTEMTIME,
 -VIR_TYPED_PARAM_ULLONG, sys)  0)
 -return -1;
 -
 -if (nparams  QEMU_NB_TOTAL_CPU_STAT_PARAM)
 -nparams = QEMU_NB_TOTAL_CPU_STAT_PARAM;
 -}
 -
 -return nparams;
 -}
 -
  /* This function gets the sums of cpu time consumed by all vcpus.
   * For example, if there are 4 physical cpus, and 2 vcpus in a domain,
   * then for each vcpu, the cpuacct.usage_percpu looks like this:
 @@ -15550,7 +15499,8 @@ qemuDomainGetCPUStats(virDomainPtr domain,
  }
  
  if (start_cpu == -1)
 -ret = qemuDomainGetTotalcpuStats(vm, params, nparams);
 +ret = virCgroupGetDomainTotalCPUStats(priv-cgroup,
 +  params, nparams);
  else
  ret = qemuDomainGetPercpuStats(vm, params, nparams,
 start_cpu, ncpus);
 diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
 index 1a579f0..e02b473 100644
 --- a/src/util/vircgroup.c
 +++ b/src/util/vircgroup.c
 @@ -51,11 +51,14 @@
  #include virhashcode.h
  #include virstring.h
  #include virsystemd.h
 +#include virtypedparam.h
  
  #define CGROUP_MAX_VAL 512
  
  #define VIR_FROM_THIS VIR_FROM_CGROUP
  
 +#define CGROUP_NB_TOTAL_CPU_STAT_PARAM 3
 +
  #if defined(__linux__)  defined(HAVE_GETMNTENT_R)  \
  defined(_DIRENT_HAVE_D_TYPE)  defined(_SC_CLK_TCK)
  # define VIR_CGROUP_SUPPORTED
 @@ -2603,6 +2606,56 @@ virCgroupDenyDevicePath(virCgroupPtr group, const char 
 *path, int perms)
  }
  
  
 +
 +int
 +virCgroupGetDomainTotalCPUStats(virCgroupPtr group,
 +virTypedParameterPtr params,
 +int nparams)
 +{
 +unsigned long long cpu_time;
 +int ret;
 +
 +if (nparams == 0) /* return supported number of params */
 +return CGROUP_NB_TOTAL_CPU_STAT_PARAM;
 +/* entry 0 is cputime */
 +ret = virCgroupGetCpuacctUsage(group, cpu_time);
 +if (ret  0) {
 +virReportSystemError(-ret, %s, _(unable to get cpu account));
 +return -1;
 +}
 +
 +if (virTypedParameterAssign(params[0], VIR_DOMAIN_CPU_STATS_CPUTIME,
 +

Re: [libvirt] CVE-2013-6456 Re: [PATCHv2 0/7] lxc: honor mount namespaces

2014-01-07 Thread Gao feng
On 01/07/2014 12:18 PM, Eric Blake wrote:
 On 12/24/2013 06:45 AM, Reco wrote:
 On Tue, 24 Dec 2013 06:29:11 -0700
 Eric Blake ebl...@redhat.com wrote:

 diff --git i/src/util/virprocess.c w/src/util/virprocess.c
 index c99b75a..e069483 100644
 --- i/src/util/virprocess.c
 +++ w/src/util/virprocess.c
 @@ -879,7 +879,7 @@ virProcessRunInMountNamespace(pid_t pid,
  goto cleanup;
  }

 -if ((cpid = virFork()  0))
 +if ((cpid = virFork())  0)
  goto cleanup;
  if (cpid == 0) {
  /* child */

 Thanks, that solves it. With this extra patch libvirtd writes to the
 container's /dev/initctl only and terminates child process only.
 
 Thanks again for the functional review.  I'm still waiting for a code
 review from anyone willing, since this does fix a security issue and I
 don't want to introduce an unintentional regression.  And I guess
 there's still the need to fix the access to the namespace /dev during
 device hotplog...
 

Yes, device hotplug has the same problem.
ACK to this serial.
Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: don't set socket create selinux context in virLXCProcessConnectMonitor

2014-01-07 Thread Gao feng
On 01/07/2014 10:37 PM, Michal Privoznik wrote:
 On 25.12.2013 08:02, Gao feng wrote:
 the unix socket /var/run/libvirt/lxc/domain.sock is not created
 under the selinux context which configured by seclabel.

 If we try to connect the domain.sock under the selinux context
 of domain in virtLXCProcessConnectMonitor,selinux will deny
 this connect operation.

 type=AVC msg=audit(1387953696.067:662): avc:  denied  { connectto } for  
 pid=21206 comm=libvirtd path=/usr/local/var/run/libvirt/lxc/systemd.sock 
 scontext=unconfined_u:system_r:svirt_lxc_net_t:s0:c770,c848 
 tcontext=unconfined_u:system_r:unconfined_t:s0-s0:c0.c1023 
 tclass=unix_stream_socket

 Since there is no harm to access doamin.sock outof domain's
 context, this patch removes the setsockcreatecon in
 virLXCProcessConnectMonitor.

 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_process.c | 12 
  1 file changed, 12 deletions(-)

 diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
 index cc9c1a2..b336ade 100644
 --- a/src/lxc/lxc_process.c
 +++ b/src/lxc/lxc_process.c
 @@ -640,9 +640,6 @@ static virLXCMonitorPtr 
 virLXCProcessConnectMonitor(virLXCDriverPtr driver,
  virLXCMonitorPtr monitor = NULL;
  virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
  
 -if (virSecurityManagerSetSocketLabel(driver-securityManager, vm-def) 
  0)
 -goto cleanup;
 -
  /* Hold an extra reference because we can't allow 'vm' to be
   * deleted while the monitor is active */
  virObjectRef(vm);
 @@ -652,15 +649,6 @@ static virLXCMonitorPtr 
 virLXCProcessConnectMonitor(virLXCDriverPtr driver,
  if (monitor == NULL)
  virObjectUnref(vm);
  
 -if (virSecurityManagerClearSocketLabel(driver-securityManager, 
 vm-def)  0) {
 -if (monitor) {
 -virObjectUnref(monitor);
 -monitor = NULL;
 -}
 -goto cleanup;
 -}
 -
 -cleanup:
  virObjectUnref(cfg);
  return monitor;
  }

 
 This patch looks good, but just one question - shouldn't the monitor
 socket be created with the correct selinux label instead? You know, the
 other approach to fix this issue.
 

Yes, Maybe this will be better, will send v2 patch.
Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: create monitor socket under selinux context of domain

2014-01-07 Thread Gao feng
the unix socket /var/run/libvirt/lxc/domain.sock is not created
under the selinux context which configured by seclabel.

If we try to connect the domain.sock under the selinux context
of domain in virtLXCProcessConnectMonitor,selinux will deny
this connect operation.

type=AVC msg=audit(1387953696.067:662): avc:  denied  { connectto } for  
pid=21206 comm=libvirtd path=/usr/local/var/run/libvirt/lxc/systemd.sock 
scontext=unconfined_u:system_r:svirt_lxc_net_t:s0:c770,c848 
tcontext=unconfined_u:system_r:unconfined_t:s0-s0:c0.c1023 
tclass=unix_stream_socket

fix this problem by creating socket under selinux context of domain.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_controller.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index a2ae599..5ca960f 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -745,6 +745,9 @@ static int virLXCControllerSetupServer(virLXCControllerPtr 
ctrl)
  ctrl)))
 goto error;
 
+if (virSecurityManagerSetSocketLabel(ctrl-securityManager, ctrl-def)  0)
+goto error;
+
 if (!(svc = virNetServerServiceNewUNIX(sockpath,
0700,
0,
@@ -757,6 +760,9 @@ static int virLXCControllerSetupServer(virLXCControllerPtr 
ctrl)
5)))
 goto error;
 
+if (virSecurityManagerClearSocketLabel(ctrl-securityManager, ctrl-def)  
0)
+goto error;
+
 if (virNetServerAddService(ctrl-server, svc, NULL)  0)
 goto error;
 virObjectUnref(svc);
-- 
1.8.4.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] docs: add LXC multi console command docs and a example

2014-01-07 Thread Gao feng
On 01/08/2014 11:17 AM, Chen Hanxiao wrote:
 From: Chen Hanxiao chenhanx...@cn.fujitsu.com
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
  docs/drvlxc.html.in | 17 +
  1 file changed, 13 insertions(+), 4 deletions(-)
 
 diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
 index 7494eb3..0f3efb0 100644
 --- a/docs/drvlxc.html.in
 +++ b/docs/drvlxc.html.in
 @@ -659,16 +659,25 @@ running, this will turn it into a transient guest.
  
  p
  The codevirsh console/code command can be used to connect
 -to the text console associated with a container. If the container
 -has been configured with multiple console devices, then the
 -code--devname/code argument can be used to choose the
 -console to connect to
 +to the text console associated with a container.
  /p
  
  pre
  # virsh -c lxc:/// console myguest
  /pre
  
 +p
 +If the container has been configured with multiple console devices,
 +then the code--devname/code argument can be used to choose the
 +console to connect to.
 +In LXC, multiple consoles will be named
 +as 'console0', 'console1', 'console2', etc.
 +/p
 +
 +pre
 +# virsh -c lxc:/// console myguest --devname console1
 +/pre
 +
  h3a name=usageEnterRunning commands in a container/a/h3
  
  p
 

ACK

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: don't set socket create selinux context in virLXCProcessConnectMonitor

2013-12-24 Thread Gao feng
the unix socket /var/run/libvirt/lxc/domain.sock is not created
under the selinux context which configured by seclabel.

If we try to connect the domain.sock under the selinux context
of domain in virtLXCProcessConnectMonitor,selinux will deny
this connect operation.

type=AVC msg=audit(1387953696.067:662): avc:  denied  { connectto } for  
pid=21206 comm=libvirtd path=/usr/local/var/run/libvirt/lxc/systemd.sock 
scontext=unconfined_u:system_r:svirt_lxc_net_t:s0:c770,c848 
tcontext=unconfined_u:system_r:unconfined_t:s0-s0:c0.c1023 
tclass=unix_stream_socket

Since there is no harm to access doamin.sock outof domain's
context, this patch removes the setsockcreatecon in
virLXCProcessConnectMonitor.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_process.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index cc9c1a2..b336ade 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -640,9 +640,6 @@ static virLXCMonitorPtr 
virLXCProcessConnectMonitor(virLXCDriverPtr driver,
 virLXCMonitorPtr monitor = NULL;
 virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
 
-if (virSecurityManagerSetSocketLabel(driver-securityManager, vm-def)  0)
-goto cleanup;
-
 /* Hold an extra reference because we can't allow 'vm' to be
  * deleted while the monitor is active */
 virObjectRef(vm);
@@ -652,15 +649,6 @@ static virLXCMonitorPtr 
virLXCProcessConnectMonitor(virLXCDriverPtr driver,
 if (monitor == NULL)
 virObjectUnref(vm);
 
-if (virSecurityManagerClearSocketLabel(driver-securityManager, vm-def)  
0) {
-if (monitor) {
-virObjectUnref(monitor);
-monitor = NULL;
-}
-goto cleanup;
-}
-
-cleanup:
 virObjectUnref(cfg);
 return monitor;
 }
-- 
1.8.4.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] lxc: simplify command when connecting lxc multi console

2013-12-18 Thread Gao feng
On 12/18/2013 04:54 PM, Chen Hanxiao wrote:
 From: Chen Hanxiao chenhanx...@cn.fujitsu.com
 
 If we config more than one consoles for container,
 we always named them as consoleN.
 We had to type a complex option --devname consoleN.
 This patch enables option --devname N
 to be equal to  --devname consoleN.
 

Do we really need this?

 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
  src/lxc/lxc_driver.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
 index 61a90ca..13213a5 100644
 --- a/src/lxc/lxc_driver.c
 +++ b/src/lxc/lxc_driver.c
 @@ -2439,6 +2439,7 @@ lxcDomainOpenConsole(virDomainPtr dom,
  int ret = -1;
  virDomainChrDefPtr chr = NULL;
  size_t i;
 +char *tmp_name = NULL;
  
  virCheckFlags(0, -1);
  
 @@ -2455,9 +2456,17 @@ lxcDomainOpenConsole(virDomainPtr dom,
  }
  
  if (dev_name) {
 +if (STRPREFIX(dev_name, console)) {
 +if (VIR_STRDUP(tmp_name, dev_name)  0)
 +goto cleanup;
 +} else {
 +if (virAsprintf(tmp_name, console%s, dev_name)  0)
 +goto cleanup;
 +}
 +
  for (i = 0; i  vm-def-nconsoles; i++) {
  if (vm-def-consoles[i]-info.alias 
 -STREQ(vm-def-consoles[i]-info.alias, dev_name)) {
 +STREQ(vm-def-consoles[i]-info.alias, tmp_name)) {
  chr = vm-def-consoles[i];
  break;
  }
 @@ -2490,6 +2499,7 @@ lxcDomainOpenConsole(virDomainPtr dom,
  cleanup:
  if (vm)
  virObjectUnlock(vm);
 +VIR_FREE(tmp_name);
  return ret;
  }
  
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2] Set the 'container_ttys' env variable for LXC consoles

2013-12-16 Thread Gao feng
On 12/17/2013 03:04 AM, Daniel P. Berrange wrote:
 From: Daniel P. Berrange berra...@redhat.com
 
 Systemd specified that any /dev/pts/NNN device on which it
 is expected to spawn a agetty login, should be listed in
 the 'container_ttys' env variable. It should just contain
 the relative paths, eg 'pts/0' not '/dev/pts/0' and should
 be space separated.
 
 http://cgit.freedesktop.org/systemd/systemd/commit/?id=1d97ff7dd71902a5604c2fed8964925d54e09de9
 
 In v2:
  - Rewrite loop with virBufferTrim
  - Don't include first pty in env
  - Only set env if at least one pty is listed
 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com
 ---
  src/lxc/lxc_container.c | 32 ++--
  1 file changed, 30 insertions(+), 2 deletions(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 51fa1b3..c885782 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -196,10 +196,33 @@ int lxcContainerHasReboot(void)
   *
   * Returns a virCommandPtr
   */
 -static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef)
 +static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
 +  char **ttyPaths,
 +  size_t nttyPaths)
  {
  char uuidstr[VIR_UUID_STRING_BUFLEN];
  virCommandPtr cmd;
 +virBuffer buf = VIR_BUFFER_INITIALIZER;
 +size_t i;
 +
 +/* 'container_ptys' must exclude the PTY associated with
 + * the /dev/console device, hence start at 1 not 0
 + */
 +for (i = 1; i  nttyPaths; i++) {
 +if (!STRPREFIX(ttyPaths[0], /dev/)) {


So the ttyPaths[0] is right here??

 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Expected a /dev path for '%s'),
 +   ttyPaths[0]);
 +virBufferFreeAndReset(buf);
 +return NULL;
 +}
 +virBufferAdd(buf, ttyPaths[i] + 5, -1);
 +virBufferAddChar(buf, ' ');
 +}
 +virBufferTrim(buf, NULL, 1);
 +
 +if (virBufferError(buf))
 +return NULL;
  
  virUUIDFormat(vmDef-uuid, uuidstr);
  
 @@ -212,11 +235,14 @@ static virCommandPtr 
 lxcContainerBuildInitCmd(virDomainDefPtr vmDef)
  virCommandAddEnvString(cmd, TERM=linux);
  virCommandAddEnvString(cmd, container=lxc-libvirt);
  virCommandAddEnvPair(cmd, container_uuid, uuidstr);
 +if (nttyPaths  1)
 +virCommandAddEnvPair(cmd, container_ttys, 
 virBufferCurrentContent(buf));
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_UUID, uuidstr);
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_NAME, vmDef-name);
  if (vmDef-os.cmdline)
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_CMDLINE, vmDef-os.cmdline);
  
 +virBufferFreeAndReset(buf);
  return cmd;
  }
  
 @@ -1789,7 +1815,9 @@ static int lxcContainerChild(void *data)
  if ((hasReboot = lxcContainerHasReboot())  0)
  goto cleanup;
  
 -cmd = lxcContainerBuildInitCmd(vmDef);
 +cmd = lxcContainerBuildInitCmd(vmDef,
 +   argv-ttyPaths,
 +   argv-nttyPaths);
  virCommandWriteArgLog(cmd, 1);
  
  if (lxcContainerSetID(vmDef)  0)
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] Set the 'container_ttys' env variable for LXC consoles

2013-12-15 Thread Gao feng
On 12/14/2013 12:52 AM, Daniel P. Berrange wrote:
 From: Daniel P. Berrange berra...@redhat.com
 
 Systemd specified that any /dev/pts/NNN device on which it
 is expected to spawn a agetty login, should be listed in
 the 'container_ttys' env variable. It should just contain
 the relative paths, eg 'pts/0' not '/dev/pts/0' and should
 be space separated.

Just FYI systemd will skrip off /dev,so /dev/pts/0 is ok too.
 
 http://cgit.freedesktop.org/systemd/systemd/commit/?id=1d97ff7dd71902a5604c2fed8964925d54e09de9
 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com
 ---
  src/lxc/lxc_container.c | 28 ++--
  1 file changed, 26 insertions(+), 2 deletions(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 51fa1b3..f0b526f 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -196,10 +196,30 @@ int lxcContainerHasReboot(void)
   *
   * Returns a virCommandPtr
   */
 -static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef)
 +static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
 +  char **ttyPaths,
 +  size_t nttyPaths)
  {
  char uuidstr[VIR_UUID_STRING_BUFLEN];
  virCommandPtr cmd;
 +virBuffer buf = VIR_BUFFER_INITIALIZER;
 +size_t i;
 +
 +for (i = 0 ; i  nttyPaths ; i++) {
 +if (!STRPREFIX(ttyPaths[0], /dev/)) {
  ^^
  ttyPaths[i]

 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Expected a /dev path for '%s'),
 +   ttyPaths[0]);
 +virBufferFreeAndReset(buf);
 +return NULL;
 +}
 +if (i)
 +virBufferAddLit(buf,  );
 +virBufferAdd(buf, ttyPaths[i] + 5, -1);

had better to make sure there is no /dev//pts/x case.

 +}
 +
 +if (virBufferError(buf))
 +return NULL;
  
  virUUIDFormat(vmDef-uuid, uuidstr);
  
 @@ -214,9 +234,11 @@ static virCommandPtr 
 lxcContainerBuildInitCmd(virDomainDefPtr vmDef)
  virCommandAddEnvPair(cmd, container_uuid, uuidstr);
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_UUID, uuidstr);
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_NAME, vmDef-name);
 +virCommandAddEnvPair(cmd, container_ttys, 
 virBufferCurrentContent(buf));
  if (vmDef-os.cmdline)
  virCommandAddEnvPair(cmd, LIBVIRT_LXC_CMDLINE, vmDef-os.cmdline);
  
 +virBufferFreeAndReset(buf);
  return cmd;
  }
  
 @@ -1789,7 +1811,9 @@ static int lxcContainerChild(void *data)
  if ((hasReboot = lxcContainerHasReboot())  0)
  goto cleanup;
  
 -cmd = lxcContainerBuildInitCmd(vmDef);
 +cmd = lxcContainerBuildInitCmd(vmDef,
 +   argv-ttyPaths,
 +   argv-nttyPaths);
  virCommandWriteArgLog(cmd, 1);
  
  if (lxcContainerSetID(vmDef)  0)
 

ACK

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: Change incorrect error report in lxcContainerPivotRoot

2013-12-15 Thread Gao feng
Does somebody pickup this small patch?

On 11/22/2013 03:11 PM, Gao feng wrote:
 The newroot is not mounted as tmpfs, we bind root-src to it.
 
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index fbce8e8..86705a7 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -706,8 +706,8 @@ static int lxcContainerPivotRoot(virDomainFSDefPtr root)
  /* ... and mount our root onto it */
  if (mount(root-src, newroot, NULL, MS_BIND|MS_REC, NULL)  0) {
  virReportSystemError(errno,
 - _(Failed to bind new root %s into tmpfs),
 - root-src);
 + _(Failed to bind %s to new root %s),
 + root-src, newroot);
  goto err;
  }
  
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH V2 RESEND 7/8] qemu: allow to setup throttle blkio cgroup through virsh

2013-12-12 Thread Gao feng
With this patch, user can setup throttle blkio cgroup
through virsh for qemu domain.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/qemu/qemu_driver.c | 442 -
 1 file changed, 404 insertions(+), 38 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 45d11cd..8aac133 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -131,7 +131,7 @@
 # define KVM_CAP_NR_VCPUS 9   /* returns max vcpus per vm */
 #endif
 
-#define QEMU_NB_BLKIO_PARAM  2
+#define QEMU_NB_BLKIO_PARAM  6
 
 #define QEMU_NB_BANDWIDTH_PARAM 6
 
@@ -7427,12 +7427,12 @@ cleanup:
 return ret;
 }
 
-/* deviceWeightStr in the form of /device/path,weight,/device/path,weight
+/* blkioDeviceStr in the form of /device/path,weight,/device/path,weight
  * for example, /dev/disk/by-path/pci-:00:1f.2-scsi-0:0:0:0,800
  */
 static int
-qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
-   virBlkioDevicePtr *dev, size_t *size)
+qemuDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+  virBlkioDevicePtr *dev, size_t *size)
 {
 char *temp;
 int ndevices = 0;
@@ -7443,10 +7443,10 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 *dev = NULL;
 *size = 0;
 
-if (STREQ(deviceWeightStr, ))
+if (STREQ(blkioDeviceStr, ))
 return 0;
 
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 temp = strchr(temp, ',');
 if (temp) {
@@ -7466,7 +7466,7 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 return -1;
 
 i = 0;
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 char *p = temp;
 
@@ -7478,11 +7478,27 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
 goto cleanup;
 
-/* weight */
+/* value */
 temp = p + 1;
 
-if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].wbps)  0)
+goto error;
+} else {
 goto error;
+}
 
 i++;
 
@@ -7503,20 +7519,22 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 
 error:
 virReportError(VIR_ERR_INVALID_ARG,
-   _(unable to parse device weight '%s'), deviceWeightStr);
+   _(unable to parse blkio device '%s' '%s'),
+   type, blkioDeviceStr);
 cleanup:
 virBlkioDeviceArrayClear(result, ndevices);
 VIR_FREE(result);
 return -1;
 }
 
-/* Modify dest_array to reflect all device weight changes described in
+/* Modify dest_array to reflect all blkio device changes described in
  * src_array.  */
 static int
-qemuDomainMergeDeviceWeights(virBlkioDevicePtr *dest_array,
- size_t *dest_size,
- virBlkioDevicePtr src_array,
- size_t src_size)
+qemuDomainMergeBlkioDevice(virBlkioDevicePtr *dest_array,
+   size_t *dest_size,
+   virBlkioDevicePtr src_array,
+   size_t src_size,
+   const char *type)
 {
 size_t i, j;
 virBlkioDevicePtr dest, src;
@@ -7529,18 +7547,48 @@ qemuDomainMergeDeviceWeights(virBlkioDevicePtr 
*dest_array,
 dest = (*dest_array)[j];
 if (STREQ(src-path, dest-path)) {
 found = true;
-dest-weight = src-weight;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT))
+dest-weight = src-weight;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS))
+dest-riops = src-riops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS))
+dest-wiops = src-wiops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS))
+dest-rbps = src-rbps;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS))
+dest-wbps = src-wbps;
+else

[libvirt] [PATCH V2 RESEND 8/8] lxc: allow to setup throttle blkio cgroup through virsh

2013-12-12 Thread Gao feng
With this patch,user can set throttle blkio cgroup for
lxc domain through virsh tool.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_driver.c | 671 +--
 1 file changed, 646 insertions(+), 25 deletions(-)

diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index c499182..0e9c011 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1545,6 +1545,20 @@ static int lxcStateCleanup(void)
 return 0;
 }
 
+static int
+lxcConnectSupportsFeature(virConnectPtr conn, int feature)
+{
+if (virConnectSupportsFeatureEnsureACL(conn)  0)
+return -1;
+
+switch (feature) {
+case VIR_DRV_FEATURE_TYPED_PARAM_STRING:
+return 1;
+default:
+return 0;
+}
+}
+
 
 static int lxcConnectGetVersion(virConnectPtr conn, unsigned long *version)
 {
@@ -1910,6 +1924,171 @@ lxcDomainGetSchedulerParameters(virDomainPtr domain,
 return lxcDomainGetSchedulerParametersFlags(domain, params, nparams, 0);
 }
 
+static int
+lxcDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+ virBlkioDevicePtr *dev, size_t *size)
+{
+char *temp;
+int ndevices = 0;
+int nsep = 0;
+size_t i;
+virBlkioDevicePtr result = NULL;
+
+*dev = NULL;
+*size = 0;
+
+if (STREQ(blkioDeviceStr, ))
+return 0;
+
+temp = blkioDeviceStr;
+while (temp) {
+temp = strchr(temp, ',');
+if (temp) {
+temp++;
+nsep++;
+}
+}
+
+/* A valid string must have even number of fields, hence an odd
+ * number of commas.  */
+if (!(nsep  1))
+goto error;
+
+ndevices = (nsep + 1) / 2;
+
+if (VIR_ALLOC_N(result, ndevices)  0)
+return -1;
+
+i = 0;
+temp = blkioDeviceStr;
+while (temp) {
+char *p = temp;
+
+/* device path */
+p = strchr(p, ',');
+if (!p)
+goto error;
+
+if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
+goto cleanup;
+
+/* value */
+temp = p + 1;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS)){
+if (virStrToLong_ull(temp, p, 10, result[i].wbps)  0)
+goto error;
+} else {
+goto error;
+}
+
+i++;
+
+if (*p == '\0')
+break;
+else if (*p != ',')
+goto error;
+temp = p + 1;
+}
+
+if (!i)
+VIR_FREE(result);
+
+*dev = result;
+*size = i;
+
+return 0;
+
+error:
+virReportError(VIR_ERR_INVALID_ARG,
+   _(unable to parse blkio device '%s' '%s'),
+   type, blkioDeviceStr);
+cleanup:
+virBlkioDeviceArrayClear(result, ndevices);
+VIR_FREE(result);
+return -1;
+}
+
+static int
+lxcDomainMergeBlkioDevice(virBlkioDevicePtr *dest_array,
+  size_t *dest_size,
+  virBlkioDevicePtr src_array,
+  size_t src_size,
+  const char *type)
+{
+size_t i, j;
+virBlkioDevicePtr dest, src;
+
+for (i = 0; i  src_size; i++) {
+bool found = false;
+
+src = src_array[i];
+for (j = 0; j  *dest_size; j++) {
+dest = (*dest_array)[j];
+if (STREQ(src-path, dest-path)) {
+found = true;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT))
+dest-weight = src-weight;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS))
+dest-riops = src-riops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS))
+dest-wiops = src-wiops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS))
+dest-rbps = src-rbps;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS))
+dest-wbps = src-wbps;
+else {
+virReportError(VIR_ERR_INVALID_ARG, _(Unknown parameter 
%s),
+   type);
+return -1;
+}
+
+break;
+}
+}
+if (!found

[libvirt] [PATCH v2 0/8] Add throttle blkio cgroup support for libvirt

2013-12-11 Thread Gao feng
Right now, libvirt only supports the cfq based blkio cgorup,
this means if the block devices doesn't use cfq scheduler, the
blkio cgroup will loss effect.

This patchset adds the throttle blkio cgroup support for libvirt,
introduces four elements for domain configuration and extend the
virsh command blkiotune.

This patchset is a new version of Guan Qiang's patchset
://www.redhat.com/archives/libvir-list/2013-October/msg01066.html

Change from v1:
1, rearrange the order of patches
2, change the options/elements of throttle blkio cgroup to consist
   with disk iotune.
3, fix complie error when cgroup is unavailable.
4, remove virCgroupSetBlkioDevice, split virCgroupSetBlkioDeviceBps
   and virCgroupSetBlkioDeviceIops

Change from Guan Qiang's patchset:
1, split to 8 patches, make logic more clear
2, change the type of read/write iops form unsigned long long to unsigned int,
   trying to set read/write iops to the value which bigger than max number of
   unsigned int will fail.
3, fix some logic shortage.

Gao feng (8):
  rename virDomainBlkioDeviceWeightParseXML to
virDomainBlkioDeviceParseXML
  rename virBlkioDeviceWeightArrayClear to virBlkioDeviceArrayClear
  rename virBlkioDeviceWeightPtr to virBlkioDevicePtr
  domain: introduce xml elements for throttle blkio cgroup
  blkio: Setting throttle blkio cgroup for domain
  virsh: add setting throttle blkio cgroup option to blkiotune
  qemu: allow to setup throttle blkio cgroup through virsh
  lxc: allow to setup throttle blkio cgroup through virsh

 docs/schemas/domaincommon.rng  |  28 +-
 include/libvirt/libvirt.h.in   |  45 ++
 src/conf/domain_conf.c | 113 +++-
 src/conf/domain_conf.h |  16 +-
 src/libvirt_private.syms   |   6 +-
 src/lxc/lxc_cgroup.c   |  29 +-
 src/lxc/lxc_driver.c   | 649 -
 src/qemu/qemu_cgroup.c |  29 +-
 src/qemu/qemu_driver.c | 443 --
 src/util/vircgroup.c   | 224 ++-
 src/util/vircgroup.h   |  16 +
 .../qemuxml2argv-blkiotune-device.xml  |   8 +
 tools/virsh-domain.c   |  64 ++
 tools/virsh.pod|  36 +-
 14 files changed, 1583 insertions(+), 123 deletions(-)

-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 2/8] rename virBlkioDeviceWeightArrayClear to virBlkioDeviceArrayClear

2013-12-11 Thread Gao feng
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c   | 8 
 src/conf/domain_conf.h   | 4 ++--
 src/libvirt_private.syms | 2 +-
 src/qemu/qemu_driver.c   | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index e2219f2..397671a 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -882,8 +882,8 @@ virDomainXMLOptionGetNamespace(virDomainXMLOptionPtr xmlopt)
 
 
 void
-virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights,
-   int ndevices)
+virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+ int ndevices)
 {
 size_t i;
 
@@ -2050,8 +2050,8 @@ void virDomainDefFree(virDomainDefPtr def)
 VIR_FREE(def-description);
 VIR_FREE(def-title);
 
-virBlkioDeviceWeightArrayClear(def-blkio.devices,
-   def-blkio.ndevices);
+virBlkioDeviceArrayClear(def-blkio.devices,
+ def-blkio.ndevices);
 VIR_FREE(def-blkio.devices);
 
 virDomainWatchdogDefFree(def-watchdog);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 4934911..96598b5 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1911,8 +1911,8 @@ struct _virDomainIdMapDef {
 };
 
 
-void virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights,
-int ndevices);
+void virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+  int ndevices);
 
 typedef struct _virDomainResourceDef virDomainResourceDef;
 typedef virDomainResourceDef *virDomainResourceDefPtr;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 3ca20e4..d8f8f7e 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -104,7 +104,7 @@ virDomainAuditVcpu;
 
 
 # conf/domain_conf.h
-virBlkioDeviceWeightArrayClear;
+virBlkioDeviceArrayClear;
 virDiskNameToBusDeviceIndex;
 virDiskNameToIndex;
 virDomainActualNetDefFree;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 1c90798..a7d96a9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -7505,7 +7505,7 @@ error:
 virReportError(VIR_ERR_INVALID_ARG,
_(unable to parse device weight '%s'), deviceWeightStr);
 cleanup:
-virBlkioDeviceWeightArrayClear(result, ndevices);
+virBlkioDeviceArrayClear(result, ndevices);
 VIR_FREE(result);
 return -1;
 }
@@ -7636,7 +7636,7 @@ qemuDomainSetBlkioParameters(virDomainPtr dom,
  vm-def-blkio.ndevices,
  devices, ndevices)  0)
 ret = -1;
-virBlkioDeviceWeightArrayClear(devices, ndevices);
+virBlkioDeviceArrayClear(devices, ndevices);
 VIR_FREE(devices);
 }
 }
@@ -7673,7 +7673,7 @@ qemuDomainSetBlkioParameters(virDomainPtr dom,
  
persistentDef-blkio.ndevices,
  devices, ndevices)  0)
 ret = -1;
-virBlkioDeviceWeightArrayClear(devices, ndevices);
+virBlkioDeviceArrayClear(devices, ndevices);
 VIR_FREE(devices);
 }
 }
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 1/8] rename virDomainBlkioDeviceWeightParseXML to virDomainBlkioDeviceParseXML

2013-12-11 Thread Gao feng
virDomainBlkioDeviceWeightParseXML will be used to parse
the xml element read_bps, write_bps, read_iops, write_iops.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index e0ab4b1..e2219f2 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -892,7 +892,7 @@ virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
 }
 
 /**
- * virDomainBlkioDeviceWeightParseXML
+ * virDomainBlkioDeviceParseXML
  *
  * this function parses a XML node:
  *
@@ -904,8 +904,8 @@ virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
  * and fills a virBlkioDeviceWeight struct.
  */
 static int
-virDomainBlkioDeviceWeightParseXML(xmlNodePtr root,
-   virBlkioDeviceWeightPtr dw)
+virDomainBlkioDeviceParseXML(xmlNodePtr root,
+ virBlkioDeviceWeightPtr dw)
 {
 char *c;
 xmlNodePtr node;
@@ -11097,8 +11097,8 @@ virDomainDefParseXML(xmlDocPtr xml,
 
 for (i = 0; i  n; i++) {
 size_t j;
-if (virDomainBlkioDeviceWeightParseXML(nodes[i],
-   def-blkio.devices[i])  0)
+if (virDomainBlkioDeviceParseXML(nodes[i],
+ def-blkio.devices[i])  0)
 goto error;
 def-blkio.ndevices++;
 for (j = 0; j  i; j++) {
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 3/8] rename virBlkioDeviceWeightPtr to virBlkioDevicePtr

2013-12-11 Thread Gao feng
The throttle blkio cgroup will reuse this struct.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c | 18 +-
 src/conf/domain_conf.h | 10 +-
 src/lxc/lxc_cgroup.c   |  6 +++---
 src/qemu/qemu_cgroup.c |  8 
 src/qemu/qemu_driver.c | 18 +-
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 397671a..26242b6 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -882,13 +882,13 @@ virDomainXMLOptionGetNamespace(virDomainXMLOptionPtr 
xmlopt)
 
 
 void
-virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+virBlkioDeviceArrayClear(virBlkioDevicePtr devices,
  int ndevices)
 {
 size_t i;
 
 for (i = 0; i  ndevices; i++)
-VIR_FREE(deviceWeights[i].path);
+VIR_FREE(devices[i].path);
 }
 
 /**
@@ -901,11 +901,11 @@ virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
  * weightweight/weight
  *   /device
  *
- * and fills a virBlkioDeviceWeight struct.
+ * and fills a virBlkioDeviceTune struct.
  */
 static int
 virDomainBlkioDeviceParseXML(xmlNodePtr root,
- virBlkioDeviceWeightPtr dw)
+ virBlkioDevicePtr dev)
 {
 char *c;
 xmlNodePtr node;
@@ -913,16 +913,16 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 node = root-children;
 while (node) {
 if (node-type == XML_ELEMENT_NODE) {
-if (xmlStrEqual(node-name, BAD_CAST path)  !dw-path) {
-dw-path = (char *)xmlNodeGetContent(node);
+if (xmlStrEqual(node-name, BAD_CAST path)  !dev-path) {
+dev-path = (char *)xmlNodeGetContent(node);
 } else if (xmlStrEqual(node-name, BAD_CAST weight)) {
 c = (char *)xmlNodeGetContent(node);
-if (virStrToLong_ui(c, NULL, 10, dw-weight)  0) {
+if (virStrToLong_ui(c, NULL, 10, dev-weight)  0) {
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_(could not parse weight %s),
c);
 VIR_FREE(c);
-VIR_FREE(dw-path);
+VIR_FREE(dev-path);
 return -1;
 }
 VIR_FREE(c);
@@ -930,7 +930,7 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 }
 node = node-next;
 }
-if (!dw-path) {
+if (!dev-path) {
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, %s,
_(missing per-device path));
 return -1;
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 96598b5..b410fd0 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1860,9 +1860,9 @@ virDomainVcpuPinDefPtr 
virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
   int nvcpupin,
   int vcpu);
 
-typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight;
-typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr;
-struct _virBlkioDeviceWeight {
+typedef struct _virBlkioDevice virBlkioDevice;
+typedef virBlkioDevice *virBlkioDevicePtr;
+struct _virBlkioDevice {
 char *path;
 unsigned int weight;
 };
@@ -1911,7 +1911,7 @@ struct _virDomainIdMapDef {
 };
 
 
-void virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights,
   int ndevices);
 
 typedef struct _virDomainResourceDef virDomainResourceDef;
@@ -1940,7 +1940,7 @@ struct _virDomainDef {
 unsigned int weight;
 
 size_t ndevices;
-virBlkioDeviceWeightPtr devices;
+virBlkioDevicePtr devices;
 } blkio;
 
 struct {
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 275e250..310a476 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -112,10 +112,10 @@ static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
 
 if (def-blkio.ndevices) {
 for (i = 0; i  def-blkio.ndevices; i++) {
-virBlkioDeviceWeightPtr dw = def-blkio.devices[i];
-if (!dw-weight)
+virBlkioDevicePtr dev = def-blkio.devices[i];
+if (!dev-weight)
 continue;
-if (virCgroupSetBlkioDeviceWeight(cgroup, dw-path, dw-weight)  
0)
+if (virCgroupSetBlkioDeviceWeight(cgroup, dev-path, dev-weight) 
 0)
 return -1;
 }
 }
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f0cacd0..a18955e 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -399,11 +399,11 @@ qemuSetupBlkioCgroup(virDomainObjPtr vm)
 
 if (vm-def-blkio.ndevices) {
 for (i = 0; i  vm-def-blkio.ndevices; i++) {
-virBlkioDeviceWeightPtr dw = vm-def-blkio.devices[i];
-if (!dw-weight

[libvirt] [PATCH 6/8] virsh: add setting throttle blkio cgroup option to blkiotune

2013-12-11 Thread Gao feng
With this patch, user can setup the throttle blkio cgorup
for domain through the virsh cmd, such as:

virsh blkiotune domain1 --device-read-bytes-sec 
/dev/sda1,100,/dev/sda2,200
--device-write-bytes-sec /dev/sda1,100 --device-read-iops-sec 
/dev/sda1,1
--device-write-iops-sec /dev/sda1,1,/dev/sda2,0

This patch also add manpage for these new options.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 include/libvirt/libvirt.h.in | 45 +++
 tools/virsh-domain.c | 64 
 tools/virsh.pod  | 36 +++--
 3 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 29d4dce..9ebf3c4 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1806,6 +1806,51 @@ char *  
virDomainGetSchedulerType(virDomainPtr domain,
 
 #define VIR_DOMAIN_BLKIO_DEVICE_WEIGHT device_weight
 
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the number of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, read_iops elements,
+ * separated by ','.
+ */
+
+#define VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS device_read_iops_sec
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS:
+ *
+ * Macro for the blkio tunable throttle.write_iops_device: it represents
+ * the number of writing the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, write_iops elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS device_write_iops_sec
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_READ_BPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the bytes of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, read_bps elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_READ_BPS device_read_bytes_sec
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the number of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, write_bps elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS device_write_bytes_sec
+
+
 /* Set Blkio tunables for the domain*/
 int virDomainSetBlkioParameters(virDomainPtr domain,
 virTypedParameterPtr params,
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 8b80e1e..f7e7959 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -1250,6 +1250,22 @@ static const vshCmdOptDef opts_blkiotune[] = {
  .type = VSH_OT_STRING,
  .help = N_(per-device IO Weights, in the form of 
/path/to/device,weight,...)
 },
+{.name = device-read-iops-sec,
+ .type = VSH_OT_STRING,
+ .help = N_(per-device read I/O limit per second, in the form of 
/path/to/device,read_iops_sec,...)
+},
+{.name = device-write-iops-sec,
+ .type = VSH_OT_STRING,
+ .help = N_(per-device write I/O limit per second, in the form of 
/path/to/device,write_iops_sec,...)
+},
+{.name = device-read-bytes-sec,
+ .type = VSH_OT_STRING,
+ .help = N_(per-device bytes read per second, in the form of 
/path/to/device,read_bytes_sec,...)
+},
+{.name = device-write-bytes-sec,
+ .type = VSH_OT_STRING,
+ .help = N_(per-device bytes wrote per second, in the form of 
/path/to/device,write_bytes_sec,...)
+},
 {.name = config,
  .type = VSH_OT_BOOL,
  .help = N_(affect next boot)
@@ -1270,6 +1286,10 @@ cmdBlkiotune(vshControl * ctl, const vshCmd * cmd)
 {
 virDomainPtr dom;
 const char *device_weight = NULL;
+const char *device_riops = NULL;
+const char *device_wiops = NULL;
+const char *device_rbps = NULL;
+const char *device_wbps = NULL;
 int weight = 0;
 int nparams = 0;
 int maxparams = 0;
@@ -1317,6 +1337,50 @@ cmdBlkiotune(vshControl * ctl, const vshCmd * cmd)
 goto save_error;
 }
 
+rv = vshCommandOptString(cmd, device-read-iops-sec, device_riops);
+if (rv  0) {
+vshError(ctl, %s, _(Unable to parse string parameter));
+goto cleanup;
+} else if (rv  0) {
+if (virTypedParamsAddString(params, nparams, maxparams,
+VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS,
+device_riops)  0)
+goto save_error;
+}
+
+rv = vshCommandOptString(cmd, device-write-iops-sec, device_wiops);
+if (rv  0) {
+vshError(ctl, %s, _(Unable to parse string parameter));
+goto cleanup;
+} else if (rv  0) {
+if (virTypedParamsAddString(params, nparams, maxparams

[libvirt] [PATCH 8/8] lxc: allow to setup throttle blkio cgroup through virsh

2013-12-11 Thread Gao feng
With this patch,user can set throttle blkio cgroup for
lxc domain through virsh tool.

The functions are copied from qemu_driver.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_driver.c | 649 +--
 1 file changed, 624 insertions(+), 25 deletions(-)

diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index c499182..1f73b52 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1545,6 +1545,20 @@ static int lxcStateCleanup(void)
 return 0;
 }
 
+static int
+lxcConnectSupportsFeature(virConnectPtr conn, int feature)
+{
+if (virConnectSupportsFeatureEnsureACL(conn)  0)
+return -1;
+
+switch (feature) {
+case VIR_DRV_FEATURE_TYPED_PARAM_STRING:
+return 1;
+default:
+return 0;
+}
+}
+
 
 static int lxcConnectGetVersion(virConnectPtr conn, unsigned long *version)
 {
@@ -1910,6 +1924,159 @@ lxcDomainGetSchedulerParameters(virDomainPtr domain,
 return lxcDomainGetSchedulerParametersFlags(domain, params, nparams, 0);
 }
 
+static int
+lxcDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+ virBlkioDevicePtr *dev, size_t *size)
+{
+char *temp;
+int ndevices = 0;
+int nsep = 0;
+size_t i;
+virBlkioDevicePtr result = NULL;
+
+*dev = NULL;
+*size = 0;
+
+if (STREQ(blkioDeviceStr, ))
+return 0;
+
+temp = blkioDeviceStr;
+while (temp) {
+temp = strchr(temp, ',');
+if (temp) {
+temp++;
+nsep++;
+}
+}
+
+/* A valid string must have even number of fields, hence an odd
+ * number of commas.  */
+if (!(nsep  1))
+goto error;
+
+ndevices = (nsep + 1) / 2;
+
+if (VIR_ALLOC_N(result, ndevices)  0)
+return -1;
+
+i = 0;
+temp = blkioDeviceStr;
+while (temp) {
+char *p = temp;
+
+/* device path */
+p = strchr(p, ',');
+if (!p)
+goto error;
+
+if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
+goto cleanup;
+
+/* value */
+temp = p + 1;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0)
+goto error;
+} else {
+if (virStrToLong_ull(temp, p, 10, result[i].wbps)  0)
+goto error;
+}
+
+i++;
+
+if (*p == '\0')
+break;
+else if (*p != ',')
+goto error;
+temp = p + 1;
+}
+
+if (!i)
+VIR_FREE(result);
+
+*dev = result;
+*size = i;
+
+return 0;
+
+error:
+virReportError(VIR_ERR_INVALID_ARG,
+   _(unable to parse device weight '%s'), blkioDeviceStr);
+cleanup:
+virBlkioDeviceArrayClear(result, ndevices);
+VIR_FREE(result);
+return -1;
+}
+
+static int
+lxcDomainMergeBlkioDevice(virBlkioDevicePtr *dest_array,
+  size_t *dest_size,
+  virBlkioDevicePtr src_array,
+  size_t src_size,
+  const char *type)
+{
+size_t i, j;
+virBlkioDevicePtr dest, src;
+
+for (i = 0; i  src_size; i++) {
+bool found = false;
+
+src = src_array[i];
+for (j = 0; j  *dest_size; j++) {
+dest = (*dest_array)[j];
+if (STREQ(src-path, dest-path)) {
+found = true;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT))
+dest-weight = src-weight;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS))
+dest-riops = src-riops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS))
+dest-wiops = src-wiops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS))
+dest-rbps = src-rbps;
+else
+dest-wbps = src-wbps;
+
+break;
+}
+}
+if (!found) {
+if (!src-weight  !src-riops  !src-wiops  !src-rbps  
!src-wbps)
+continue;
+if (VIR_EXPAND_N(*dest_array, *dest_size, 1)  0)
+return -1;
+dest = (*dest_array)[*dest_size - 1];
+dest-path = src-path;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT

[libvirt] [PATCH 4/8] domain: introduce xml elements for throttle blkio cgroup

2013-12-11 Thread Gao feng
This patch introduces new xml elements under blkiotune,
we use these new elements to setup the throttle blkio
cgroup for domain. The new blkiotune node looks like this:

blkiotune
  device
path/path/to/block/path
weight1000/weight
read_iops_sec1/read_iops_sec
write_iops_sec1/write_iops_sec
read_bytes_sec100/read_bytes_sec
write_bytes_sec100/write_bytes_sec
  /device
/blkiotune

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 docs/schemas/domaincommon.rng | 28 --
 src/conf/domain_conf.c| 85 +--
 src/conf/domain_conf.h|  4 ++
 3 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 38c6801..bc8ed5d 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -621,9 +621,31 @@
   element name=path
 ref name=absFilePath/
   /element
-  element name=weight
-ref name=weight/
-  /element
+  optional
+element name=weight
+  ref name=weight/
+/element
+  /optional
+  optional
+element name=read_iops_sec
+  data type='unsignedInt'/
+/element
+  /optional
+  optional
+element name=write_iops_sec
+  data type='unsignedInt'/
+/element
+  /optional
+  optional
+element name=read_bytes_sec
+  data type='unsignedLong'/
+/element
+  /optional
+  optional
+element name=write_bytes_sec
+  data type='unsignedLong'/
+/element
+  /optional
 /interleave
   /element
 /zeroOrMore
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 26242b6..c4d51b4 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -899,15 +899,19 @@ virBlkioDeviceArrayClear(virBlkioDevicePtr devices,
  *   device
  * path/fully/qualified/device/path/path
  * weightweight/weight
+ * read_bytes_secbps/read_bytes_sec
+ * write_bytes_secbps/write_bytes_sec
+ * read_iops_seciops/read_iops_sec
+ * write_iops_seciops/write_iops_sec
  *   /device
  *
- * and fills a virBlkioDeviceTune struct.
+ * and fills a virBlkioDevicePtr struct.
  */
 static int
 virDomainBlkioDeviceParseXML(xmlNodePtr root,
  virBlkioDevicePtr dev)
 {
-char *c;
+char *c = NULL;
 xmlNodePtr node;
 
 node = root-children;
@@ -921,9 +925,43 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_(could not parse weight %s),
c);
-VIR_FREE(c);
-VIR_FREE(dev-path);
-return -1;
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST read_bytes_sec)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ull(c, NULL, 10, dev-rbps)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse read bytes sec %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST write_bytes_sec)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ull(c, NULL, 10, dev-wbps)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse write bytes sec %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST read_iops_sec)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ui(c, NULL, 10, dev-riops)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse read iops sec %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST write_iops_sec)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ui(c, NULL, 10, dev-wiops)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse write iops sec %s),
+   c);
+goto error

[libvirt] [PATCH 5/8] blkio: Setting throttle blkio cgroup for domain

2013-12-11 Thread Gao feng
This patch introduces virCgroupSetBlkioDeviceReadIops,
virCgroupSetBlkioDeviceWriteIops,
virCgroupSetBlkioDeviceReadBps and
virCgroupSetBlkioDeviceWriteBps,

we can use these interfaces to set up throttle
blkio cgroup for domain.

This patch also adds the new throttle blkio cgroup
elements to the test xml.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/libvirt_private.syms   |   4 +
 src/lxc/lxc_cgroup.c   |  27 ++-
 src/qemu/qemu_cgroup.c |  27 ++-
 src/util/vircgroup.c   | 224 -
 src/util/vircgroup.h   |  16 ++
 .../qemuxml2argv-blkiotune-device.xml  |   8 +
 6 files changed, 295 insertions(+), 11 deletions(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d8f8f7e..a535bf3 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1031,7 +1031,11 @@ virCgroupNewVcpu;
 virCgroupPathOfController;
 virCgroupRemove;
 virCgroupRemoveRecursively;
+virCgroupSetBlkioDeviceReadBps;
+virCgroupSetBlkioDeviceReadIops;
 virCgroupSetBlkioDeviceWeight;
+virCgroupSetBlkioDeviceWriteBps;
+virCgroupSetBlkioDeviceWriteIops;
 virCgroupSetBlkioWeight;
 virCgroupSetCpuCfsPeriod;
 virCgroupSetCpuCfsQuota;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 310a476..cc0d5e8 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -113,9 +113,30 @@ static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
 if (def-blkio.ndevices) {
 for (i = 0; i  def-blkio.ndevices; i++) {
 virBlkioDevicePtr dev = def-blkio.devices[i];
-if (!dev-weight)
-continue;
-if (virCgroupSetBlkioDeviceWeight(cgroup, dev-path, dev-weight) 
 0)
+
+if (dev-weight 
+(virCgroupSetBlkioDeviceWeight(cgroup, dev-path,
+   dev-weight)  0))
+return -1;
+
+if (dev-riops 
+(virCgroupSetBlkioDeviceReadIops(cgroup, dev-path,
+ dev-riops)  0))
+return -1;
+
+if (dev-wiops 
+(virCgroupSetBlkioDeviceWriteIops(cgroup, dev-path,
+  dev-wiops)  0))
+return -1;
+
+if (dev-rbps 
+(virCgroupSetBlkioDeviceReadBps(cgroup, dev-path,
+dev-rbps)  0))
+return -1;
+
+if (dev-wbps 
+(virCgroupSetBlkioDeviceWriteBps(cgroup, dev-path,
+ dev-wbps)  0))
 return -1;
 }
 }
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index a18955e..1cc929c 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -400,10 +400,29 @@ qemuSetupBlkioCgroup(virDomainObjPtr vm)
 if (vm-def-blkio.ndevices) {
 for (i = 0; i  vm-def-blkio.ndevices; i++) {
 virBlkioDevicePtr dev = vm-def-blkio.devices[i];
-if (!dev-weight)
-continue;
-if (virCgroupSetBlkioDeviceWeight(priv-cgroup, dev-path,
-  dev-weight)  0)
+if (dev-weight 
+(virCgroupSetBlkioDeviceWeight(priv-cgroup, dev-path,
+   dev-weight)  0))
+return -1;
+
+if (dev-riops 
+(virCgroupSetBlkioDeviceReadIops(priv-cgroup, dev-path,
+ dev-riops)  0))
+return -1;
+
+if (dev-wiops 
+(virCgroupSetBlkioDeviceWriteIops(priv-cgroup, dev-path,
+  dev-wiops)  0))
+return -1;
+
+if (dev-rbps 
+(virCgroupSetBlkioDeviceReadBps(priv-cgroup, dev-path,
+dev-rbps)  0))
+return -1;
+
+if (dev-wbps 
+(virCgroupSetBlkioDeviceWriteBps(priv-cgroup, dev-path,
+ dev-wbps)  0))
 return -1;
 }
 }
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 43eb649..a6d60c5 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1824,12 +1824,189 @@ virCgroupGetBlkioWeight(virCgroupPtr group, unsigned 
int *weight)
 return ret;
 }
 
+/**
+ * virCgroupSetBlkioDeviceReadIops:
+ * @group: The cgroup to change block io setting for
+ * @path: The path of device
+ * @riops: The new device read iops throttle, or 0 to clear
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virCgroupSetBlkioDeviceReadIops(virCgroupPtr group,
+const char *path

[libvirt] [PATCH 7/8] qemu: allow to setup throttle blkio cgroup through virsh

2013-12-11 Thread Gao feng
With this patch, user can setup throttle blkio cgroup
through virsh for qemu domain.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/qemu/qemu_driver.c | 425 -
 1 file changed, 386 insertions(+), 39 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 45d11cd..0300db9 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -131,7 +131,7 @@
 # define KVM_CAP_NR_VCPUS 9   /* returns max vcpus per vm */
 #endif
 
-#define QEMU_NB_BLKIO_PARAM  2
+#define QEMU_NB_BLKIO_PARAM  6
 
 #define QEMU_NB_BANDWIDTH_PARAM 6
 
@@ -7427,12 +7427,12 @@ cleanup:
 return ret;
 }
 
-/* deviceWeightStr in the form of /device/path,weight,/device/path,weight
+/* blkioDeviceStr in the form of /device/path,weight,/device/path,weight
  * for example, /dev/disk/by-path/pci-:00:1f.2-scsi-0:0:0:0,800
  */
 static int
-qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
-   virBlkioDevicePtr *dev, size_t *size)
+qemuDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+  virBlkioDevicePtr *dev, size_t *size)
 {
 char *temp;
 int ndevices = 0;
@@ -7443,10 +7443,10 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 *dev = NULL;
 *size = 0;
 
-if (STREQ(deviceWeightStr, ))
+if (STREQ(blkioDeviceStr, ))
 return 0;
 
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 temp = strchr(temp, ',');
 if (temp) {
@@ -7466,7 +7466,7 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 return -1;
 
 i = 0;
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 char *p = temp;
 
@@ -7478,11 +7478,25 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
 goto cleanup;
 
-/* weight */
+/* value */
 temp = p + 1;
 
-if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
-goto error;
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0)
+goto error;
+} else {
+if (virStrToLong_ull(temp, p, 10, result[i].wbps)  0)
+goto error;
+}
 
 i++;
 
@@ -7503,20 +7517,21 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 
 error:
 virReportError(VIR_ERR_INVALID_ARG,
-   _(unable to parse device weight '%s'), deviceWeightStr);
+   _(unable to parse blkio device '%s'), blkioDeviceStr);
 cleanup:
 virBlkioDeviceArrayClear(result, ndevices);
 VIR_FREE(result);
 return -1;
 }
 
-/* Modify dest_array to reflect all device weight changes described in
+/* Modify dest_array to reflect all blkio device changes described in
  * src_array.  */
 static int
-qemuDomainMergeDeviceWeights(virBlkioDevicePtr *dest_array,
- size_t *dest_size,
- virBlkioDevicePtr src_array,
- size_t src_size)
+qemuDomainMergeBlkioDevice(virBlkioDevicePtr *dest_array,
+   size_t *dest_size,
+   virBlkioDevicePtr src_array,
+   size_t src_size,
+   const char *type)
 {
 size_t i, j;
 virBlkioDevicePtr dest, src;
@@ -7529,18 +7544,40 @@ qemuDomainMergeDeviceWeights(virBlkioDevicePtr 
*dest_array,
 dest = (*dest_array)[j];
 if (STREQ(src-path, dest-path)) {
 found = true;
-dest-weight = src-weight;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT))
+dest-weight = src-weight;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS))
+dest-riops = src-riops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS))
+dest-wiops = src-wiops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS))
+dest-rbps = src-rbps;
+else
+dest-wbps = src-wbps;
+
 break;
 }
 }
 if (!found) {
-if (!src-weight)
+if (!src-weight  !src-riops  src-wiops  src-rbps  
src-wbps

Re: [libvirt] [PATCH 0/9] Add throttle blkio cgroup support for libvirt

2013-12-10 Thread Gao feng
Ping

On 12/02/2013 02:47 PM, Gao feng wrote:
 Right now, libvirt only supports the cfq based blkio cgorup,
 this means if the block devices doesn't use cfq scheduler, the
 blkio cgroup will loss effect.
 
 This patchset adds the throttle blkio cgroup support for libvirt,
 intoduces four elements for domain configuration and extend the
 virsh command blkiotune.
 
 This patchset is a new version of Guan Qiang's patchset
 ://www.redhat.com/archives/libvir-list/2013-October/msg01066.html
 
 Change form Guan Qiang's patchset:
 1, split to 8 patches, make logic more clear
 2, change the type of read/write iops form unsigned long long to unsigned int,
trying to set read/write iops to the value which bigger than max number of
unsigned int will fail.
 3, fix some logic shortage.
 
 Gao feng (9):
   rename virDomainBlkioDeviceWeightParseXML to
 virDomainBlkioDeviceParseXML
   rename virBlkioDeviceWeightArrayClear to virBlkioDeviceArrayClear
   rename virBlkioDeviceWeightPtr to virBlkioDevicePtr
   domain: introduce xml elements for throttle blkio cgroup
   blkio: Setting throttle blkio cgroup for domain
   qemu: allow to setup throttle blkio cgroup through virsh
   virsh: add virsh manual for setting throttle blkio cgroup
   lxc: allow to setup throttle blkio cgroup through virsh
   qemu: add new throttle blkio cgroup elements to the test xml
 
  docs/schemas/domaincommon.rng  |  28 +-
  include/libvirt/libvirt.h.in   |  45 ++
  src/conf/domain_conf.c | 113 +++-
  src/conf/domain_conf.h |  16 +-
  src/libvirt_private.syms   |   5 +-
  src/lxc/lxc_cgroup.c   |  12 +-
  src/lxc/lxc_driver.c   | 649 
 -
  src/qemu/qemu_cgroup.c |  13 +-
  src/qemu/qemu_driver.c | 432 --
  src/util/vircgroup.c   | 170 +-
  src/util/vircgroup.h   |  18 +
  .../qemuxml2argv-blkiotune-device.xml  |   8 +
  tools/virsh-domain.c   |  64 ++
  tools/virsh.pod|  36 +-
  14 files changed, 1485 insertions(+), 124 deletions(-)
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 4/9] domain: introduce xml elements for throttle blkio cgroup

2013-12-10 Thread Gao feng
On 12/10/2013 11:30 PM, Daniel P. Berrange wrote:
 On Mon, Dec 02, 2013 at 02:47:59PM +0800, Gao feng wrote:
 This patch introduces new xml elements under blkiotune,
 we use these new elements to setup the throttle blkio
 cgroup for domain. The new blkiotune node looks like this:

 blkiotune
   device
 path/path/to/block/path
 weight1000/weight
 read_iops1/read_iops
 write_iops1/write_iops
 read_bps1/read_bps
 write_bps1/write_bps
   /device
 /blkiotune
 
 Under the disk element we have
 
   iotune
 total_bytes_sec1000/total_bytes_sec
 read_iops_sec40/read_iops_sec
 write_iops_sec10/write_iops_sec
   /iotune
 
 Please use a consistent naming convention for these new
 elements - ie  read_iops_sec not read_iops.
 

iops means i/o per second. iops_sec looks very strange.
since this alreay in and it is an user interface.

I will change my patchset.

Thanks

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 0/9] Add throttle blkio cgroup support for libvirt

2013-12-05 Thread Gao feng
Hi Guan Qiang,

On 12/06/2013 12:58 PM, hzguanqi...@corp.netease.com wrote:
 I've tested the patchset successful with virsh command for both kvm and lxc 
 vm.

Thanks a lot! :)

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 0/9] Add throttle blkio cgroup support for libvirt

2013-12-03 Thread Gao feng
On 12/03/2013 10:48 AM, hzguanqi...@corp.netease.com wrote:
 On 2013-12-02 14:47 , Gao feng mailto:gaof...@cn.fujitsu.com wrote:
 
 Right now, libvirt only supports the cfq based blkio cgorup,
 this means if the block devices doesn't use cfq scheduler, the
 blkio cgroup will loss effect.
  
 This patchset adds the throttle blkio cgroup support for libvirt,
 intoduces four elements for domain configuration and extend the
 virsh command blkiotune.
 
  
 intoduces/introduces/s
  


Oh, my hand slip. Thanks!

BTW, chould you help to review or test this patchset?
It runs well in my test box.

Thanks.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH]lxc: remove redundant mount operation

2013-12-03 Thread Gao feng
On 11/08/2013 04:30 PM, Chen Hanxiao wrote:
 From: Chen Hanxiao chenhanx...@cn.fujitsu.com
 
 If we do not specify a readonly mount, we don't need to
 re-mount it again.
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---

Looks good to me.
ACK

  src/lxc/lxc_container.c | 7 ---
  1 file changed, 7 deletions(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 255c711..351217a 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -1126,13 +1126,6 @@ static int lxcContainerMountFSBind(virDomainFSDefPtr 
 fs,
   _(Failed to make directory %s readonly),
   fs-dst);
  }
 -} else {
 -VIR_DEBUG(Binding %s readwrite, fs-dst);
 -if (mount(src, fs-dst, NULL, MS_BIND|MS_REMOUNT, NULL)  0) {
 -virReportSystemError(errno,
 - _(Failed to make directory %s readwrite),
 - fs-dst);
 -}
  }
  
  ret = 0;
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 1/9] rename virDomainBlkioDeviceWeightParseXML to virDomainBlkioDeviceParseXML

2013-12-01 Thread Gao feng
virDomainBlkioDeviceWeightParseXML will be used to parse
the xml element read_bps, write_bps, read_iops, write_iops.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 140eb80..5eb0278 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -892,7 +892,7 @@ virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
 }
 
 /**
- * virDomainBlkioDeviceWeightParseXML
+ * virDomainBlkioDeviceParseXML
  *
  * this function parses a XML node:
  *
@@ -904,8 +904,8 @@ virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
  * and fills a virBlkioDeviceWeight struct.
  */
 static int
-virDomainBlkioDeviceWeightParseXML(xmlNodePtr root,
-   virBlkioDeviceWeightPtr dw)
+virDomainBlkioDeviceParseXML(xmlNodePtr root,
+ virBlkioDeviceWeightPtr dw)
 {
 char *c;
 xmlNodePtr node;
@@ -11037,8 +11037,8 @@ virDomainDefParseXML(xmlDocPtr xml,
 
 for (i = 0; i  n; i++) {
 size_t j;
-if (virDomainBlkioDeviceWeightParseXML(nodes[i],
-   def-blkio.devices[i])  0)
+if (virDomainBlkioDeviceParseXML(nodes[i],
+ def-blkio.devices[i])  0)
 goto error;
 def-blkio.ndevices++;
 for (j = 0; j  i; j++) {
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 2/9] rename virBlkioDeviceWeightArrayClear to virBlkioDeviceArrayClear

2013-12-01 Thread Gao feng
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c   | 8 
 src/conf/domain_conf.h   | 4 ++--
 src/libvirt_private.syms | 2 +-
 src/qemu/qemu_driver.c   | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 5eb0278..3cb1187 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -882,8 +882,8 @@ virDomainXMLOptionGetNamespace(virDomainXMLOptionPtr xmlopt)
 
 
 void
-virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights,
-   int ndevices)
+virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+ int ndevices)
 {
 size_t i;
 
@@ -1990,8 +1990,8 @@ void virDomainDefFree(virDomainDefPtr def)
 VIR_FREE(def-description);
 VIR_FREE(def-title);
 
-virBlkioDeviceWeightArrayClear(def-blkio.devices,
-   def-blkio.ndevices);
+virBlkioDeviceArrayClear(def-blkio.devices,
+ def-blkio.ndevices);
 VIR_FREE(def-blkio.devices);
 
 virDomainWatchdogDefFree(def-watchdog);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 4561ccc..383eb27 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1910,8 +1910,8 @@ struct _virDomainIdMapDef {
 };
 
 
-void virBlkioDeviceWeightArrayClear(virBlkioDeviceWeightPtr deviceWeights,
-int ndevices);
+void virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+  int ndevices);
 
 typedef struct _virDomainResourceDef virDomainResourceDef;
 typedef virDomainResourceDef *virDomainResourceDefPtr;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 50fe00e..e5f7cbd 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -104,7 +104,7 @@ virDomainAuditVcpu;
 
 
 # conf/domain_conf.h
-virBlkioDeviceWeightArrayClear;
+virBlkioDeviceArrayClear;
 virDiskNameToBusDeviceIndex;
 virDiskNameToIndex;
 virDomainActualNetDefFree;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 47d8a09..8a833c7 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -7497,7 +7497,7 @@ error:
 virReportError(VIR_ERR_INVALID_ARG,
_(unable to parse device weight '%s'), deviceWeightStr);
 cleanup:
-virBlkioDeviceWeightArrayClear(result, ndevices);
+virBlkioDeviceArrayClear(result, ndevices);
 VIR_FREE(result);
 return -1;
 }
@@ -7628,7 +7628,7 @@ qemuDomainSetBlkioParameters(virDomainPtr dom,
  vm-def-blkio.ndevices,
  devices, ndevices)  0)
 ret = -1;
-virBlkioDeviceWeightArrayClear(devices, ndevices);
+virBlkioDeviceArrayClear(devices, ndevices);
 VIR_FREE(devices);
 }
 }
@@ -7665,7 +7665,7 @@ qemuDomainSetBlkioParameters(virDomainPtr dom,
  
persistentDef-blkio.ndevices,
  devices, ndevices)  0)
 ret = -1;
-virBlkioDeviceWeightArrayClear(devices, ndevices);
+virBlkioDeviceArrayClear(devices, ndevices);
 VIR_FREE(devices);
 }
 }
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 3/9] rename virBlkioDeviceWeightPtr to virBlkioDevicePtr

2013-12-01 Thread Gao feng
The throttle blkio cgroup will reuse this struct.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/conf/domain_conf.c | 18 +-
 src/conf/domain_conf.h | 10 +-
 src/lxc/lxc_cgroup.c   |  6 +++---
 src/qemu/qemu_cgroup.c |  8 
 src/qemu/qemu_driver.c | 18 +-
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 3cb1187..98754e5 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -882,13 +882,13 @@ virDomainXMLOptionGetNamespace(virDomainXMLOptionPtr 
xmlopt)
 
 
 void
-virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+virBlkioDeviceArrayClear(virBlkioDevicePtr devices,
  int ndevices)
 {
 size_t i;
 
 for (i = 0; i  ndevices; i++)
-VIR_FREE(deviceWeights[i].path);
+VIR_FREE(devices[i].path);
 }
 
 /**
@@ -901,11 +901,11 @@ virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr 
deviceWeights,
  * weightweight/weight
  *   /device
  *
- * and fills a virBlkioDeviceWeight struct.
+ * and fills a virBlkioDeviceTune struct.
  */
 static int
 virDomainBlkioDeviceParseXML(xmlNodePtr root,
- virBlkioDeviceWeightPtr dw)
+ virBlkioDevicePtr dev)
 {
 char *c;
 xmlNodePtr node;
@@ -913,16 +913,16 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 node = root-children;
 while (node) {
 if (node-type == XML_ELEMENT_NODE) {
-if (xmlStrEqual(node-name, BAD_CAST path)  !dw-path) {
-dw-path = (char *)xmlNodeGetContent(node);
+if (xmlStrEqual(node-name, BAD_CAST path)  !dev-path) {
+dev-path = (char *)xmlNodeGetContent(node);
 } else if (xmlStrEqual(node-name, BAD_CAST weight)) {
 c = (char *)xmlNodeGetContent(node);
-if (virStrToLong_ui(c, NULL, 10, dw-weight)  0) {
+if (virStrToLong_ui(c, NULL, 10, dev-weight)  0) {
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_(could not parse weight %s),
c);
 VIR_FREE(c);
-VIR_FREE(dw-path);
+VIR_FREE(dev-path);
 return -1;
 }
 VIR_FREE(c);
@@ -930,7 +930,7 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 }
 node = node-next;
 }
-if (!dw-path) {
+if (!dev-path) {
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, %s,
_(missing per-device path));
 return -1;
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 383eb27..c53084c 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1859,9 +1859,9 @@ virDomainVcpuPinDefPtr 
virDomainVcpuPinFindByVcpu(virDomainVcpuPinDefPtr *def,
   int nvcpupin,
   int vcpu);
 
-typedef struct _virBlkioDeviceWeight virBlkioDeviceWeight;
-typedef virBlkioDeviceWeight *virBlkioDeviceWeightPtr;
-struct _virBlkioDeviceWeight {
+typedef struct _virBlkioDevice virBlkioDevice;
+typedef virBlkioDevice *virBlkioDevicePtr;
+struct _virBlkioDevice {
 char *path;
 unsigned int weight;
 };
@@ -1910,7 +1910,7 @@ struct _virDomainIdMapDef {
 };
 
 
-void virBlkioDeviceArrayClear(virBlkioDeviceWeightPtr deviceWeights,
+void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights,
   int ndevices);
 
 typedef struct _virDomainResourceDef virDomainResourceDef;
@@ -1939,7 +1939,7 @@ struct _virDomainDef {
 unsigned int weight;
 
 size_t ndevices;
-virBlkioDeviceWeightPtr devices;
+virBlkioDevicePtr devices;
 } blkio;
 
 struct {
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 275e250..310a476 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -112,10 +112,10 @@ static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
 
 if (def-blkio.ndevices) {
 for (i = 0; i  def-blkio.ndevices; i++) {
-virBlkioDeviceWeightPtr dw = def-blkio.devices[i];
-if (!dw-weight)
+virBlkioDevicePtr dev = def-blkio.devices[i];
+if (!dev-weight)
 continue;
-if (virCgroupSetBlkioDeviceWeight(cgroup, dw-path, dw-weight)  
0)
+if (virCgroupSetBlkioDeviceWeight(cgroup, dev-path, dev-weight) 
 0)
 return -1;
 }
 }
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index f0cacd0..a18955e 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -399,11 +399,11 @@ qemuSetupBlkioCgroup(virDomainObjPtr vm)
 
 if (vm-def-blkio.ndevices) {
 for (i = 0; i  vm-def-blkio.ndevices; i++) {
-virBlkioDeviceWeightPtr dw = vm-def-blkio.devices[i];
-if (!dw-weight

[libvirt] [PATCH 5/9] blkio: Setting throttle blkio cgroup for domain

2013-12-01 Thread Gao feng
This patch extends virCgroupSetBlkioWeightDevice and
rename it the virCgroupSetBlkioDevice, now we can use
this interface to set up throttle blkio cgroup too.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/libvirt_private.syms |   3 +
 src/lxc/lxc_cgroup.c |  10 ++-
 src/qemu/qemu_cgroup.c   |  11 +--
 src/qemu/qemu_driver.c   |  10 ++-
 src/util/vircgroup.c | 170 ---
 src/util/vircgroup.h |  18 +
 6 files changed, 203 insertions(+), 19 deletions(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index e5f7cbd..c008e2b 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1024,6 +1024,9 @@ virCgroupNewVcpu;
 virCgroupPathOfController;
 virCgroupRemove;
 virCgroupRemoveRecursively;
+virCgroupSetBlkioDevice;
+virCgroupSetBlkioDeviceBps;
+virCgroupSetBlkioDeviceIops;
 virCgroupSetBlkioDeviceWeight;
 virCgroupSetBlkioWeight;
 virCgroupSetCpuCfsPeriod;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 310a476..1c4bee5 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -113,9 +113,13 @@ static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
 if (def-blkio.ndevices) {
 for (i = 0; i  def-blkio.ndevices; i++) {
 virBlkioDevicePtr dev = def-blkio.devices[i];
-if (!dev-weight)
-continue;
-if (virCgroupSetBlkioDeviceWeight(cgroup, dev-path, dev-weight) 
 0)
+if (virCgroupSetBlkioDevice(cgroup,
+dev-path,
+dev-weight,
+dev-riops,
+dev-wiops,
+dev-rbps,
+dev-wbps)  0)
 return -1;
 }
 }
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index a18955e..3b579ca 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -400,10 +400,13 @@ qemuSetupBlkioCgroup(virDomainObjPtr vm)
 if (vm-def-blkio.ndevices) {
 for (i = 0; i  vm-def-blkio.ndevices; i++) {
 virBlkioDevicePtr dev = vm-def-blkio.devices[i];
-if (!dev-weight)
-continue;
-if (virCgroupSetBlkioDeviceWeight(priv-cgroup, dev-path,
-  dev-weight)  0)
+if (virCgroupSetBlkioDevice(priv-cgroup,
+dev-path,
+dev-weight,
+dev-riops,
+dev-wiops,
+dev-rbps,
+dev-wbps)  0)
 return -1;
 }
 }
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 5c0c5e5..61dbe7f 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -7616,9 +7616,13 @@ qemuDomainSetBlkioParameters(virDomainPtr dom,
 continue;
 }
 for (j = 0; j  ndevices; j++) {
-if (virCgroupSetBlkioDeviceWeight(priv-cgroup,
-  devices[j].path,
-  devices[j].weight)  0) {
+if (virCgroupSetBlkioDevice(priv-cgroup,
+devices[j].path,
+devices[j].weight,
+devices[j].riops,
+devices[j].wiops,
+devices[j].rbps,
+devices[j].wbps)  0) {
 ret = -1;
 break;
 }
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 5c43e10..7f9ba49 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1824,18 +1824,110 @@ virCgroupGetBlkioWeight(virCgroupPtr group, unsigned 
int *weight)
 return ret;
 }
 
+/**
+ * virCgroupSetBlkioDeviceIops:
+ * @group: The cgroup to change block io setting for
+ * @path: The path of device
+ * @read: setup the read iops or write iops
+ * @iops: The new device iops throttle, or 0 to clear
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virCgroupSetBlkioDeviceIops(virCgroupPtr group,
+const char *path,
+bool read,
+unsigned int iops)
+{
+char *str;
+struct stat sb;
+int ret;
+
+if (stat(path, sb)  0) {
+virReportSystemError(errno,
+ _(Path '%s' is not accessible),
+ path);
+return -1;
+}
+
+if (!S_ISBLK(sb.st_mode

[libvirt] [PATCH 0/9] Add throttle blkio cgroup support for libvirt

2013-12-01 Thread Gao feng
Right now, libvirt only supports the cfq based blkio cgorup,
this means if the block devices doesn't use cfq scheduler, the
blkio cgroup will loss effect.

This patchset adds the throttle blkio cgroup support for libvirt,
intoduces four elements for domain configuration and extend the
virsh command blkiotune.

This patchset is a new version of Guan Qiang's patchset
://www.redhat.com/archives/libvir-list/2013-October/msg01066.html

Change form Guan Qiang's patchset:
1, split to 8 patches, make logic more clear
2, change the type of read/write iops form unsigned long long to unsigned int,
   trying to set read/write iops to the value which bigger than max number of
   unsigned int will fail.
3, fix some logic shortage.

Gao feng (9):
  rename virDomainBlkioDeviceWeightParseXML to
virDomainBlkioDeviceParseXML
  rename virBlkioDeviceWeightArrayClear to virBlkioDeviceArrayClear
  rename virBlkioDeviceWeightPtr to virBlkioDevicePtr
  domain: introduce xml elements for throttle blkio cgroup
  blkio: Setting throttle blkio cgroup for domain
  qemu: allow to setup throttle blkio cgroup through virsh
  virsh: add virsh manual for setting throttle blkio cgroup
  lxc: allow to setup throttle blkio cgroup through virsh
  qemu: add new throttle blkio cgroup elements to the test xml

 docs/schemas/domaincommon.rng  |  28 +-
 include/libvirt/libvirt.h.in   |  45 ++
 src/conf/domain_conf.c | 113 +++-
 src/conf/domain_conf.h |  16 +-
 src/libvirt_private.syms   |   5 +-
 src/lxc/lxc_cgroup.c   |  12 +-
 src/lxc/lxc_driver.c   | 649 -
 src/qemu/qemu_cgroup.c |  13 +-
 src/qemu/qemu_driver.c | 432 --
 src/util/vircgroup.c   | 170 +-
 src/util/vircgroup.h   |  18 +
 .../qemuxml2argv-blkiotune-device.xml  |   8 +
 tools/virsh-domain.c   |  64 ++
 tools/virsh.pod|  36 +-
 14 files changed, 1485 insertions(+), 124 deletions(-)

-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 4/9] domain: introduce xml elements for throttle blkio cgroup

2013-12-01 Thread Gao feng
This patch introduces new xml elements under blkiotune,
we use these new elements to setup the throttle blkio
cgroup for domain. The new blkiotune node looks like this:

blkiotune
  device
path/path/to/block/path
weight1000/weight
read_iops1/read_iops
write_iops1/write_iops
read_bps1/read_bps
write_bps1/write_bps
  /device
/blkiotune

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 docs/schemas/domaincommon.rng | 28 +--
 src/conf/domain_conf.c| 83 +--
 src/conf/domain_conf.h|  4 +++
 3 files changed, 102 insertions(+), 13 deletions(-)

diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 80848d2..e3e4766 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -621,9 +621,31 @@
   element name=path
 ref name=absFilePath/
   /element
-  element name=weight
-ref name=weight/
-  /element
+  optional
+element name=weight
+  ref name=weight/
+/element
+  /optional
+  optional
+element name=read_iops
+  data type='unsignedInt'/
+/element
+  /optional
+  optional
+element name=write_iops
+  data type='unsignedInt'/
+/element
+  /optional
+  optional
+element name=read_bps
+  data type='unsignedLong'/
+/element
+  /optional
+  optional
+element name=write_bps
+  data type='unsignedLong'/
+/element
+  /optional
 /interleave
   /element
 /zeroOrMore
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 98754e5..9bcc14f 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -899,6 +899,10 @@ virBlkioDeviceArrayClear(virBlkioDevicePtr devices,
  *   device
  * path/fully/qualified/device/path/path
  * weightweight/weight
+ * read_bpsbps/read_bps
+ * read_iopsiops/read_iops
+ * write_bpsbps/write_bps
+ * write_iopsiops/write_iops
  *   /device
  *
  * and fills a virBlkioDeviceTune struct.
@@ -907,7 +911,7 @@ static int
 virDomainBlkioDeviceParseXML(xmlNodePtr root,
  virBlkioDevicePtr dev)
 {
-char *c;
+char *c = NULL;
 xmlNodePtr node;
 
 node = root-children;
@@ -921,9 +925,43 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
_(could not parse weight %s),
c);
-VIR_FREE(c);
-VIR_FREE(dev-path);
-return -1;
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST read_bps)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ull(c, NULL, 10, dev-rbps)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse read bps %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST write_bps)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ull(c, NULL, 10, dev-wbps)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse write bps %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST read_iops)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ui(c, NULL, 10, dev-riops)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse read iops %s),
+   c);
+goto error;
+}
+VIR_FREE(c);
+} else if (xmlStrEqual(node-name, BAD_CAST write_iops)) {
+c = (char *)xmlNodeGetContent(node);
+if (virStrToLong_ui(c, NULL, 10, dev-wiops)  0) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _(could not parse write iops %s),
+   c);
+goto error;
 }
 VIR_FREE(c);
 }
@@ -937,6 +975,11 @@ virDomainBlkioDeviceParseXML(xmlNodePtr root,
 }
 
 return 0;
+
+error

[libvirt] [PATCH 8/9] lxc: allow to setup throttle blkio cgroup through virsh

2013-12-01 Thread Gao feng
With this patch,user can set throttle blkio cgroup for
lxc domain through virsh tool.

The functions are copied from qemu_driver.

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_driver.c | 649 +--
 1 file changed, 624 insertions(+), 25 deletions(-)

diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 61a90ca..873e14b 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1545,6 +1545,20 @@ static int lxcStateCleanup(void)
 return 0;
 }
 
+static int
+lxcConnectSupportsFeature(virConnectPtr conn, int feature)
+{
+if (virConnectSupportsFeatureEnsureACL(conn)  0)
+return -1;
+
+switch (feature) {
+case VIR_DRV_FEATURE_TYPED_PARAM_STRING:
+return 1;
+default:
+return 0;
+}
+}
+
 
 static int lxcConnectGetVersion(virConnectPtr conn, unsigned long *version)
 {
@@ -1910,6 +1924,159 @@ lxcDomainGetSchedulerParameters(virDomainPtr domain,
 return lxcDomainGetSchedulerParametersFlags(domain, params, nparams, 0);
 }
 
+static int
+lxcDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+ virBlkioDevicePtr *dev, size_t *size)
+{
+char *temp;
+int ndevices = 0;
+int nsep = 0;
+size_t i;
+virBlkioDevicePtr result = NULL;
+
+*dev = NULL;
+*size = 0;
+
+if (STREQ(blkioDeviceStr, ))
+return 0;
+
+temp = blkioDeviceStr;
+while (temp) {
+temp = strchr(temp, ',');
+if (temp) {
+temp++;
+nsep++;
+}
+}
+
+/* A valid string must have even number of fields, hence an odd
+ * number of commas.  */
+if (!(nsep  1))
+goto error;
+
+ndevices = (nsep + 1) / 2;
+
+if (VIR_ALLOC_N(result, ndevices)  0)
+return -1;
+
+i = 0;
+temp = blkioDeviceStr;
+while (temp) {
+char *p = temp;
+
+/* device path */
+p = strchr(p, ',');
+if (!p)
+goto error;
+
+if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
+goto cleanup;
+
+/* value */
+temp = p + 1;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0)
+goto error;
+} else {
+if (virStrToLong_ull(temp, p, 10, result[i].wbps)  0)
+goto error;
+}
+
+i++;
+
+if (*p == '\0')
+break;
+else if (*p != ',')
+goto error;
+temp = p + 1;
+}
+
+if (!i)
+VIR_FREE(result);
+
+*dev = result;
+*size = i;
+
+return 0;
+
+error:
+virReportError(VIR_ERR_INVALID_ARG,
+   _(unable to parse device weight '%s'), blkioDeviceStr);
+cleanup:
+virBlkioDeviceArrayClear(result, ndevices);
+VIR_FREE(result);
+return -1;
+}
+
+static int
+lxcDomainMergeBlkioDevice(virBlkioDevicePtr *dest_array,
+  size_t *dest_size,
+  virBlkioDevicePtr src_array,
+  size_t src_size,
+  const char *type)
+{
+size_t i, j;
+virBlkioDevicePtr dest, src;
+
+for (i = 0; i  src_size; i++) {
+bool found = false;
+
+src = src_array[i];
+for (j = 0; j  *dest_size; j++) {
+dest = (*dest_array)[j];
+if (STREQ(src-path, dest-path)) {
+found = true;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT))
+dest-weight = src-weight;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS))
+dest-riops = src-riops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS))
+dest-wiops = src-wiops;
+else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS))
+dest-rbps = src-rbps;
+else
+dest-wbps = src-wbps;
+
+break;
+}
+}
+if (!found) {
+if (!src-weight  !src-riops  !src-wiops  !src-rbps  
!src-wbps)
+continue;
+if (VIR_EXPAND_N(*dest_array, *dest_size, 1)  0)
+return -1;
+dest = (*dest_array)[*dest_size - 1];
+dest-path = src-path;
+
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT

[libvirt] [PATCH 7/9] virsh: add virsh manual for setting throttle blkio cgroup

2013-12-01 Thread Gao feng
Add the manual for the new blkiotune options, --device-read-iops,
--device-write-iops, --device-read-bps, --device-write-bps.

Singed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Singed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 tools/virsh.pod | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/tools/virsh.pod b/tools/virsh.pod
index dac9a08..8ecc27c 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -1625,8 +1625,12 @@ The guaranteed minimum memory allocation for the guest.
 Specifying -1 as a value for these limits is interpreted as unlimited.
 
 =item Bblkiotune Idomain [I--weight Bweight]
-[I--device-weights Bdevice-weights] [[I--config]
-[I--live] | [I--current]]
+[I--device-weights Bdevice-weights]
+[I--device-read-iops Bdevice-read-iops]
+[I--device-write-iops Bdevice-write-iops]
+[I--device-read-bps Bdevice-read-bps]
+[I--device-write-bps Bdevice-write-bps]
+[[I--config] [I--live] | [I--current]]
 
 Display or set the blkio parameters. QEMU/KVM supports I--weight.
 I--weight is in range [100, 1000]. After kernel 2.6.39, the value
@@ -1639,6 +1643,34 @@ or the value 0 to remove that device from per-device 
listings.
 Only the devices listed in the string are modified;
 any existing per-device weights for other devices remain unchanged.
 
+Bdevice-read-iops is a single string listing one or more device/read_iops
+pairs, int the format of /path/to/device,read_iops,/path/to/device,read_iops.
+Each read_iops is a number which type is unsigned int, value 0 to remove that
+device from per-decice listing.
+Only the devices listed in the string are modified;
+any existing per-device read_iops for other devices remain unchange.
+
+Bdevice-write-iops is a single string listing one or more device/write_iops
+pairs, int the format of /path/to/device,write_iops,/path/to/device,write_iops.
+Each write_iops is a number which type is unsigned int, value 0 to remove that
+device from per-decice listing.
+Only the devices listed in the string are modified;
+any existing per-device write_iops for other devices remain unchange.
+
+Bdevice-read-bps is a single string listing one or more device/read_bps
+pairs, int the format of /path/to/device,read_bps,/path/to/device,read_bps.
+Each read_bps is a number which type is unsigned long long, value 0 to remove
+that device from per-decice listing.
+Only the devices listed in the string are modified;
+any existing per-device read_bps for other devices remain unchange.
+
+Bdevice-write-bps is a single string listing one or more device/write_bps
+pairs, int the format of /path/to/device,write_bps,/path/to/device,write_bps.
+Each write_bps is a number which type is unsigned long long, value 0 to remove
+that device from per-decice listing.
+Only the devices listed in the string are modified;
+any existing per-device write_bps for other devices remain unchange.
+
 If I--live is specified, affect a running guest.
 If I--config is specified, affect the next boot of a persistent guest.
 If I--current is specified, affect the current guest state.
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 9/9] qemu: add new throttle blkio cgroup elements to the test xml

2013-12-01 Thread Gao feng
Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 tests/qemuxml2argvdata/qemuxml2argv-blkiotune-device.xml | 8 
 1 file changed, 8 insertions(+)

diff --git a/tests/qemuxml2argvdata/qemuxml2argv-blkiotune-device.xml 
b/tests/qemuxml2argvdata/qemuxml2argv-blkiotune-device.xml
index 743cf29..a113efb 100644
--- a/tests/qemuxml2argvdata/qemuxml2argv-blkiotune-device.xml
+++ b/tests/qemuxml2argvdata/qemuxml2argv-blkiotune-device.xml
@@ -8,10 +8,18 @@
 device
   path/dev/sda/path
   weight400/weight
+  read_iops1/read_iops
+  write_iops1/write_iops
+  read_bps1/read_bps
+  write_bps1/write_bps
 /device
 device
   path/dev/sdb/path
   weight900/weight
+  read_iops2/read_iops
+  write_iops2/write_iops
+  read_bps2/read_bps
+  write_bps2/write_bps
 /device
   /blkiotune
   vcpu placement='static'1/vcpu
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 6/9] qemu: allow to setup throttle blkio cgroup through virsh

2013-12-01 Thread Gao feng
With this patch, user can setup the throttle blkio cgorup
for domain through the virsh cmd, such as:

virsh blkiotune domain1 --device-read-bps /dev/sda1,1,/dev/sda2,2
--device-write-bps /dev/sda1,1 --device-read-iops /dev/sda1,1
--device-write-iops /dev/sda1,1,/dev/sda2,0

Signed-off-by: Guan Qiang hzguanqi...@corp.netease.com
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 include/libvirt/libvirt.h.in |  45 +
 src/qemu/qemu_driver.c   | 418 +++
 tools/virsh-domain.c |  64 +++
 3 files changed, 488 insertions(+), 39 deletions(-)

diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
index 5aad75c..d054900 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -1806,6 +1806,51 @@ char *  
virDomainGetSchedulerType(virDomainPtr domain,
 
 #define VIR_DOMAIN_BLKIO_DEVICE_WEIGHT device_weight
 
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the number of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, read_iops elements,
+ * separated by ','.
+ */
+
+#define VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS device_read_iops
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS:
+ *
+ * Macro for the blkio tunable throttle.write_iops_device: it represents
+ * the number of writing the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, write_iops elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS device_write_iops
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_READ_BPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the bytes of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, read_bps elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_READ_BPS device_read_bps
+
+
+/**
+ * VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS:
+ *
+ * Macro for the blkio tunable throttle.read_iops_device: it represents
+ * the number of reading the block device per second, as a string. The
+ * string is parsed as a series of /path/to/device, read_iops elements,
+ * separated by ','.
+ */
+#define VIR_DOMAIN_BLKIO_DEVICE_WRITE_BPS device_write_bps
+
 /* Set Blkio tunables for the domain*/
 int virDomainSetBlkioParameters(virDomainPtr domain,
 virTypedParameterPtr params,
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 61dbe7f..1ad5b94 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -131,7 +131,7 @@
 # define KVM_CAP_NR_VCPUS 9   /* returns max vcpus per vm */
 #endif
 
-#define QEMU_NB_BLKIO_PARAM  2
+#define QEMU_NB_BLKIO_PARAM  6
 
 #define QEMU_NB_BANDWIDTH_PARAM 6
 
@@ -7419,12 +7419,12 @@ cleanup:
 return ret;
 }
 
-/* deviceWeightStr in the form of /device/path,weight,/device/path,weight
+/* blkioDeviceStr in the form of /device/path,weight,/device/path,weight
  * for example, /dev/disk/by-path/pci-:00:1f.2-scsi-0:0:0:0,800
  */
 static int
-qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
-   virBlkioDevicePtr *dev, size_t *size)
+qemuDomainParseBlkioDeviceStr(char *blkioDeviceStr, const char *type,
+  virBlkioDevicePtr *dev, size_t *size)
 {
 char *temp;
 int ndevices = 0;
@@ -7435,10 +7435,10 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 *dev = NULL;
 *size = 0;
 
-if (STREQ(deviceWeightStr, ))
+if (STREQ(blkioDeviceStr, ))
 return 0;
 
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 temp = strchr(temp, ',');
 if (temp) {
@@ -7458,7 +7458,7 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 return -1;
 
 i = 0;
-temp = deviceWeightStr;
+temp = blkioDeviceStr;
 while (temp) {
 char *p = temp;
 
@@ -7470,11 +7470,25 @@ qemuDomainParseDeviceWeightStr(char *deviceWeightStr,
 if (VIR_STRNDUP(result[i].path, temp, p - temp)  0)
 goto cleanup;
 
-/* weight */
+/* value */
 temp = p + 1;
 
-if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
-goto error;
+if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WEIGHT)) {
+if (virStrToLong_ui(temp, p, 10, result[i].weight)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].riops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_WRITE_IOPS)) {
+if (virStrToLong_ui(temp, p, 10, result[i].wiops)  0)
+goto error;
+} else if (STREQ(type, VIR_DOMAIN_BLKIO_DEVICE_READ_BPS)) {
+if (virStrToLong_ull(temp, p, 10, result[i].rbps)  0

[libvirt] [PATCH] LXC: Change incorrect error report in lxcContainerPivotRoot

2013-11-21 Thread Gao feng
The newroot is not mounted as tmpfs, we bind root-src to it.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index fbce8e8..86705a7 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -706,8 +706,8 @@ static int lxcContainerPivotRoot(virDomainFSDefPtr root)
 /* ... and mount our root onto it */
 if (mount(root-src, newroot, NULL, MS_BIND|MS_REC, NULL)  0) {
 virReportSystemError(errno,
- _(Failed to bind new root %s into tmpfs),
- root-src);
+ _(Failed to bind %s to new root %s),
+ root-src, newroot);
 goto err;
 }
 
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 1/2] LXC: fix the problem that libvirt lxc fail to start on latest kernel

2013-11-19 Thread Gao feng
After kernel commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
vfs: Lock in place mounts from more privileged users,

unprivileged user has no rights to move the mounts that
inherited from parent mountns. we use this feature to move
the /stateDir/domain-name.{dev, devpts} to the /dev/ and
/dev/pts directroy of container. this commit breaks libvirt lxc.

this patch do the moving on host side, we are privileged user
at this moment.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c  | 81 +---
 src/lxc/lxc_controller.c | 53 +++
 2 files changed, 54 insertions(+), 80 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 2bdf957..61283e4 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -953,76 +953,6 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def 
ATTRIBUTE_UNUSED,
 }
 #endif
 
-static int lxcContainerMountFSDev(virDomainDefPtr def,
-  const char *stateDir)
-{
-int ret = -1;
-char *path = NULL;
-
-VIR_DEBUG(Mount /dev/ stateDir=%s, stateDir);
-
-if ((ret = virAsprintf(path, /.oldroot/%s/%s.dev,
-   stateDir, def-name))  0)
-return ret;
-
-if (virFileMakePath(/dev)  0) {
-virReportSystemError(errno, %s,
- _(Cannot create /dev));
-goto cleanup;
-}
-
-VIR_DEBUG(Trying to move %s to /dev, path);
-
-if (mount(path, /dev, NULL, MS_MOVE, NULL)  0) {
-virReportSystemError(errno,
- _(Failed to mount %s on /dev),
- path);
-goto cleanup;
-}
-
-ret = 0;
-
-cleanup:
-VIR_FREE(path);
-return ret;
-}
-
-static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
- const char *stateDir)
-{
-int ret;
-char *path = NULL;
-
-VIR_DEBUG(Mount /dev/pts stateDir=%s, stateDir);
-
-if ((ret = virAsprintf(path,
-   /.oldroot/%s/%s.devpts,
-   stateDir,
-   def-name))  0)
-return ret;
-
-if (virFileMakePath(/dev/pts)  0) {
-virReportSystemError(errno, %s,
- _(Cannot create /dev/pts));
-goto cleanup;
-}
-
-VIR_DEBUG(Trying to move %s to /dev/pts, path);
-
-if ((ret = mount(path, /dev/pts,
- NULL, MS_MOVE, NULL))  0) {
-virReportSystemError(errno,
- _(Failed to mount %s on /dev/pts),
- path);
-goto cleanup;
-}
-
-cleanup:
-VIR_FREE(path);
-
-return ret;
-}
-
 static int lxcContainerSetupDevices(char **ttyPaths, size_t nttyPaths)
 {
 size_t i;
@@ -1683,14 +1613,6 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
vmDef,
 if (virCgroupIsolateMount(cgroup, /.oldroot/, sec_mount_options)  0)
 goto cleanup;
 
-/* Mounts /dev */
-if (lxcContainerMountFSDev(vmDef, stateDir)  0)
-goto cleanup;
-
-/* Mounts /dev/pts */
-if (lxcContainerMountFSDevPTS(vmDef, stateDir)  0)
-goto cleanup;
-
 /* Setup device nodes in /dev/ */
 if (lxcContainerSetupDevices(ttyPaths, nttyPaths)  0)
 goto cleanup;
@@ -1853,8 +1775,7 @@ static int lxcContainerChild(void *data)
 const char *tty = argv-ttyPaths[0];
 if (STRPREFIX(tty, /dev/pts/))
 tty += strlen(/dev/pts/);
-if (virAsprintf(ttyPath, %s/%s.devpts/%s,
-LXC_STATE_DIR, vmDef-name, tty)  0)
+if (virAsprintf(ttyPath, %s/dev/pts/%s, root-src, tty)  0)
 goto cleanup;
 } else if (VIR_STRDUP(ttyPath, /dev/null)  0) {
 goto cleanup;
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index c013147..f7b4127 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -2020,6 +2020,56 @@ cleanup:
 }
 
 
+static int
+virLXCControllerMoveMount(char *name, char *root,
+  const char *s, const char *d)
+{
+int ret = -1;
+char *src = NULL;
+char *dst = NULL;
+
+if ((ret = virAsprintf(src, %s/%s.%s,
+   LXC_STATE_DIR, name, s))  0)
+return ret;
+
+if ((ret = virAsprintf(dst, %s%s, root, d))  0)
+goto cleanup;
+
+if (virFileMakePath(dst)  0) {
+virReportSystemError(errno, _(Cannot create %s), dst);
+goto cleanup;
+}
+
+if (mount(src, dst, NULL, MS_MOVE, NULL)  0) {
+virReportSystemError(errno,
+ _(Failed to mount %s on %s),
+ src, dst);
+goto cleanup;
+}
+
+ret = 0;
+cleanup:
+VIR_FREE(src);
+VIR_FREE(dst);
+return ret;
+}
+
+static int
+virLXCControllerMoveMounts(virDomainDefPtr def)
+{
+virDomainFSDefPtr root = virDomainGetRootFilesystem(def

[libvirt] [PATCH 2/2] LXC: don't unmount mounts for shared root

2013-11-19 Thread Gao feng
Also after commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
vfs: Lock in place mounts from more privileged users,

unprivileged user has no rights to umount the mounts that
inherited from parent mountns.

right now, I have no good idea to fix this problem, we need
to do more research. this patch just skip unmounting these
mounts for shared root.

BTW, I think when libvirt lxc enables user namespace, the
configuation that shares root with host is very rara.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 61283e4..8003594 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1591,7 +1591,9 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
vmDef,
 if (lxcContainerPivotRoot(root)  0)
 goto cleanup;
 
-if (STREQ(root-src, /) 
+/* FIXME: we should find a way to unmount these mounts for container
+ * even user namespace is enabled. */
+if (STREQ(root-src, /)  (!vmDef-idmap.nuidmap) 
 lxcContainerUnmountForSharedRoot(stateDir, vmDef-name)  0)
 goto cleanup;
 
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 2/2] LXC: don't unmount mounts for shared root

2013-11-19 Thread Gao feng
On 11/19/2013 11:00 PM, Daniel P. Berrange wrote:
 On Tue, Nov 19, 2013 at 05:53:21PM +0800, Gao feng wrote:
 Also after commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
 vfs: Lock in place mounts from more privileged users,

 unprivileged user has no rights to umount the mounts that
 inherited from parent mountns.

 right now, I have no good idea to fix this problem, we need
 to do more research. this patch just skip unmounting these
 mounts for shared root.

 BTW, I think when libvirt lxc enables user namespace, the
 configuation that shares root with host is very rara.

 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 61283e4..8003594 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -1591,7 +1591,9 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
 vmDef,
  if (lxcContainerPivotRoot(root)  0)
  goto cleanup;
  
 -if (STREQ(root-src, /) 
 +/* FIXME: we should find a way to unmount these mounts for container
 + * even user namespace is enabled. */
 +if (STREQ(root-src, /)  (!vmDef-idmap.nuidmap) 
  lxcContainerUnmountForSharedRoot(stateDir, vmDef-name)  0)
  goto cleanup;
 
 If unmounting fails for these few temporary filesystems, then how is
 unmount succeeding for everything under /.oldroot after we do the
 pivot root ?  Does the pivot_root() confuse the kernel into thinking
 stuff under /.oldroot was owned by this process  thus allowed to be
 unmounted ?  Or is it falling back to the MNT_DETACH scenario instead ?
 
 
./oldroot is mounted in container, so container has rights to umount it,
but for sub mounts under ./oldroot, container has no rights to umount them,
so it falls back to MNT_DETACH scenario.

hmm, So I think for the [PATCH 1/2], use MS_BIND instead of MS_MOVE may be
a simple way, since even we move the /dev /dev/pts on host side, the unmount
./oldroot will fall back to MNT_DETACH scenario too.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 2/2] LXC: don't unmount mounts for shared root

2013-11-19 Thread Gao feng
On 11/20/2013 09:50 AM, Gao feng wrote:
 On 11/19/2013 11:00 PM, Daniel P. Berrange wrote:
 On Tue, Nov 19, 2013 at 05:53:21PM +0800, Gao feng wrote:
 Also after commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
 vfs: Lock in place mounts from more privileged users,

 unprivileged user has no rights to umount the mounts that
 inherited from parent mountns.

 right now, I have no good idea to fix this problem, we need
 to do more research. this patch just skip unmounting these
 mounts for shared root.

 BTW, I think when libvirt lxc enables user namespace, the
 configuation that shares root with host is very rara.

 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 61283e4..8003594 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -1591,7 +1591,9 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
 vmDef,
  if (lxcContainerPivotRoot(root)  0)
  goto cleanup;
  
 -if (STREQ(root-src, /) 
 +/* FIXME: we should find a way to unmount these mounts for container
 + * even user namespace is enabled. */
 +if (STREQ(root-src, /)  (!vmDef-idmap.nuidmap) 
  lxcContainerUnmountForSharedRoot(stateDir, vmDef-name)  0)
  goto cleanup;

 If unmounting fails for these few temporary filesystems, then how is
 unmount succeeding for everything under /.oldroot after we do the
 pivot root ?  Does the pivot_root() confuse the kernel into thinking
 stuff under /.oldroot was owned by this process  thus allowed to be
 unmounted ?  Or is it falling back to the MNT_DETACH scenario instead ?


 ./oldroot is mounted in container, so container has rights to umount it,
 but for sub mounts under ./oldroot, container has no rights to umount them,
 so it falls back to MNT_DETACH scenario.
 
 hmm, So I think for the [PATCH 1/2], use MS_BIND instead of MS_MOVE may be
 a simple way, since even we move the /dev /dev/pts on host side, the unmount
 ./oldroot will fall back to MNT_DETACH scenario too.
 

Maybe in furture we can find a way to unmount all sub mounts under ./oldroot and
unmount the temporary mounts, but now, I have no idea how to implement this.

I will repost this patchset.
Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v2 1/2] LXC: fix the problem that libvirt lxc fail to start on latest kernel

2013-11-19 Thread Gao feng
After kernel commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
vfs: Lock in place mounts from more privileged users,

unprivileged user has no rights to move the mounts that
inherited from parent mountns. we use this feature to move
the /stateDir/domain-name.{dev, devpts} to the /dev/ and
/dev/pts directroy of container. this commit breaks libvirt lxc.

this patch changes the behavior to bind these mounts when
user namespace is enabled and move these mounts when user
namespace is disabled.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 2bdf957..3d9b491 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -958,6 +958,7 @@ static int lxcContainerMountFSDev(virDomainDefPtr def,
 {
 int ret = -1;
 char *path = NULL;
+int flags = def-idmap.nuidmap ? MS_BIND : MS_MOVE;
 
 VIR_DEBUG(Mount /dev/ stateDir=%s, stateDir);
 
@@ -971,9 +972,10 @@ static int lxcContainerMountFSDev(virDomainDefPtr def,
 goto cleanup;
 }
 
-VIR_DEBUG(Trying to move %s to /dev, path);
+VIR_DEBUG(Trying to %s %s to /dev, def-idmap.nuidmap ?
+  bind : move, path);
 
-if (mount(path, /dev, NULL, MS_MOVE, NULL)  0) {
+if (mount(path, /dev, NULL, flags, NULL)  0) {
 virReportSystemError(errno,
  _(Failed to mount %s on /dev),
  path);
@@ -992,6 +994,7 @@ static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
 {
 int ret;
 char *path = NULL;
+int flags = def-idmap.nuidmap ? MS_BIND : MS_MOVE;
 
 VIR_DEBUG(Mount /dev/pts stateDir=%s, stateDir);
 
@@ -1007,10 +1010,10 @@ static int lxcContainerMountFSDevPTS(virDomainDefPtr 
def,
 goto cleanup;
 }
 
-VIR_DEBUG(Trying to move %s to /dev/pts, path);
+VIR_DEBUG(Trying to %s %s to /dev/pts, def-idmap.nuidmap ?
+  bind : move, path);
 
-if ((ret = mount(path, /dev/pts,
- NULL, MS_MOVE, NULL))  0) {
+if ((ret = mount(path, /dev/pts, NULL, flags, NULL))  0) {
 virReportSystemError(errno,
  _(Failed to mount %s on /dev/pts),
  path);
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v2 2/2] LXC: don't unmount mounts for shared root

2013-11-19 Thread Gao feng
Also after commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
vfs: Lock in place mounts from more privileged users,

unprivileged user has no rights to umount the mounts that
inherited from parent mountns.

right now, I have no good idea to fix this problem, we need
to do more research. this patch just skip unmounting these
mounts for shared root.

BTW, I think when libvirt lxc enables user namespace, the
configuation that shares root with host is very rara.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 3d9b491..fbce8e8 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1664,7 +1664,9 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
vmDef,
 if (lxcContainerPivotRoot(root)  0)
 goto cleanup;
 
-if (STREQ(root-src, /) 
+/* FIXME: we should find a way to unmount these mounts for container
+ * even user namespace is enabled. */
+if (STREQ(root-src, /)  (!vmDef-idmap.nuidmap) 
 lxcContainerUnmountForSharedRoot(stateDir, vmDef-name)  0)
 goto cleanup;
 
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2 2/2] LXC: don't unmount mounts for shared root

2013-11-19 Thread Gao feng
On 11/20/2013 10:11 AM, Gao feng wrote:
 Also after commit 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942
 vfs: Lock in place mounts from more privileged users,
 
 unprivileged user has no rights to umount the mounts that
 inherited from parent mountns.
 
 right now, I have no good idea to fix this problem, we need
 to do more research. this patch just skip unmounting these
 mounts for shared root.
 
 BTW, I think when libvirt lxc enables user namespace, the
 configuation that shares root with host is very rara.

s/rara/rare
 
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 3d9b491..fbce8e8 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -1664,7 +1664,9 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr 
 vmDef,
  if (lxcContainerPivotRoot(root)  0)
  goto cleanup;
  
 -if (STREQ(root-src, /) 
 +/* FIXME: we should find a way to unmount these mounts for container
 + * even user namespace is enabled. */
 +if (STREQ(root-src, /)  (!vmDef-idmap.nuidmap) 
  lxcContainerUnmountForSharedRoot(stateDir, vmDef-name)  0)
  goto cleanup;
  
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] Planning for next release

2013-11-18 Thread Gao feng
On 11/19/2013 10:09 AM, Daniel Veillard wrote:
   If we want to land a release on Mon Dec 2, I would suggest entering
 freeze next week. The amount of patches since 1.1.4 is not very large
 so we could start the freeze say on Wed 27, but IMHO it all depennds if
 Dan patches about splitting out the python bindings gets in for that
 release. If yes I would probably prefer to freeze one more day (start
 Tues 26), and bump release name to 1.2.0 as this is a significant change
 from an user perspective, otherwise 1.1.5 and freeze next Wed.
 
 Opinions ? Dan do you think you can/want to land this set in the
 coming week ?
 

libvirt lxc is broken by the latest kernel, we need to do some fix in
libvirt. I'm working on this problem and patch will be sent in this
week. I hope we can fix this problem before the release.


Thanks!

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2]lxc: don't mount dir if ownership couldn't be known

2013-11-17 Thread Gao feng
On 11/16/2013 12:24 AM, Daniel P. Berrange wrote:
 On Thu, Nov 14, 2013 at 05:44:40PM +0800, Chen Hanxiao wrote:


 -Original Message-
 From: Daniel P. Berrange [mailto:berra...@redhat.com]
 Sent: Wednesday, November 13, 2013 6:35 PM
 To: Chen Hanxiao
 Cc: libvir-list@redhat.com
 Subject: Re: [libvirt] [PATCH v2]lxc: don't mount dir if ownership couldn't 
 be
 known

 On Wed, Nov 13, 2013 at 04:51:43PM +0800, Chen Hanxiao wrote:
 From: Chen Hanxiao chenhanx...@cn.fujitsu.com

 If we enable userns, we could bind mount
 some dirs from host to guest, which don't belong to
 the target mapped uid/gid.

 Such as we could bind mount root's dirs to guest.
 What is worse, we could even modify root's files
 in that bind dir inside container.

 I still can't see what the problem is from the description
 here. Please can you give a clear example of the config
 used and exactly what goes wrong.


 1. enable user namespace
   idmap
 uid start='0' target='1001' count='10'/
 gid start='0' target='1001' count='10'/
   /idmap

 2. bind mount some dirs to container, which belongs to root or other users.
 filesystem type='mount' accessmode='passthrough'
   source dir='/media/LXC1'/
   target dir='/mnt'/
 /filesystem

 # ll /media/
 ...
 drwxr-xr-x.  3 root root4096 Nov 13 17:21 LXC1
 ...

 3. start container

 I used to encounter issues: inside container, we could modify files under 
 /mnt

 So I think inside user namespace, if we do not have a proper id mapping,
 we should not bind mount it for containers, or at least set it as readonly.
 
 FYI, I'm trying to reproduce the problem myself, but have discovered
 that current kernels cause a regression which prevents libirt starting
 any user namespace kernels - it fails mounting /proc and /dev/pts
 

FYI,I'm working on fix this bug. both kernel and libvirt need to be changed.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] Avoid async signal safety problem in glibc's setxid

2013-11-17 Thread Gao feng
On 11/16/2013 12:20 AM, Daniel P. Berrange wrote:
 The glibc setxid is supposed to be async signal safe, but
 libc developers confirm that it is not. This causes a problem
 when libvirt_lxc starts the FUSE thread and then runs clone()
 to start the container. If the clone() was done before the
 FUSE thread has completely started up, then the container
 will hang in setxid after clone().
 
 The fix is to avoid creating any threads until after the
 container has been clone()'d. By avoiding any threads in
 the parent, the child is no longer required to run in an
 async signal safe context, and we thus avoid the glibc
 bug.
 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com

AC
thanks

 ---
  src/lxc/lxc_controller.c | 11 +--
  src/lxc/lxc_fuse.c   | 21 +++--
  src/lxc/lxc_fuse.h   |  1 +
  3 files changed, 25 insertions(+), 8 deletions(-)
 
 diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
 index 232af54..c013147 100644
 --- a/src/lxc/lxc_controller.c
 +++ b/src/lxc/lxc_controller.c
 @@ -1983,6 +1983,12 @@ virLXCControllerSetupFuse(virLXCControllerPtr ctrl)
  }
  
  static int
 +virLXCControllerStartFuse(virLXCControllerPtr ctrl)
 +{
 +return lxcStartFuse(ctrl-fuse);
 +}
 +
 +static int
  virLXCControllerSetupConsoles(virLXCControllerPtr ctrl,
char **containerTTYPaths)
  {
 @@ -2187,6 +2193,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
  if (virLXCControllerMoveInterfaces(ctrl)  0)
  goto cleanup;
  
 +if (virLXCControllerStartFuse(ctrl)  0)
 +goto cleanup;
 +
  if (lxcContainerSendContinue(control[0])  0) {
  virReportSystemError(errno, %s,
   _(Unable to send container continue message));
 @@ -2199,8 +2208,6 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
  goto cleanup;
  }
  
 -/* Now the container is fully setup... */
 -
  /* ...and reduce our privileges */
  if (lxcControllerClearCapabilities()  0)
  goto cleanup;
 diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c
 index 9d12832..88e122e 100644
 --- a/src/lxc/lxc_fuse.c
 +++ b/src/lxc/lxc_fuse.c
 @@ -322,12 +322,6 @@ int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def)
  goto cleanup1;
  }
  
 -if (virThreadCreate(fuse-thread, false, lxcFuseRun,
 -(void *)fuse)  0) {
 -lxcFuseDestroy(fuse);
 -goto cleanup1;
 -}
 -
  ret = 0;
  cleanup:
  fuse_opt_free_args(args);
 @@ -341,6 +335,17 @@ cleanup2:
  goto cleanup;
  }
  
 +int lxcStartFuse(virLXCFusePtr fuse)
 +{
 +if (virThreadCreate(fuse-thread, false, lxcFuseRun,
 +(void *)fuse)  0) {
 +lxcFuseDestroy(fuse);
 +return -1;
 +}
 +
 +return 0;
 +}
 +
  void lxcFreeFuse(virLXCFusePtr *f)
  {
  virLXCFusePtr fuse = *f;
 @@ -364,6 +369,10 @@ int lxcSetupFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED,
  return 0;
  }
  
 +int lxcStartFuse(virLXCFusePtr f ATTRIBUTE_UNUSED)
 +{
 +}
 +
  void lxcFreeFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED)
  {
  }
 diff --git a/src/lxc/lxc_fuse.h b/src/lxc/lxc_fuse.h
 index b3713af..d60492b 100644
 --- a/src/lxc/lxc_fuse.h
 +++ b/src/lxc/lxc_fuse.h
 @@ -58,6 +58,7 @@ struct virLXCFuse {
  typedef struct virLXCFuse *virLXCFusePtr;
  
  extern int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def);
 +extern int lxcStartFuse(virLXCFusePtr f);
  extern void lxcFreeFuse(virLXCFusePtr *f);
  
  #endif /* LXC_FUSE_H */
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2]lxc: don't mount dir if ownership couldn't be known

2013-11-13 Thread Gao feng
On 11/13/2013 04:51 PM, Chen Hanxiao wrote:
 From: Chen Hanxiao chenhanx...@cn.fujitsu.com
 
 If we enable userns, we could bind mount
 some dirs from host to guest, which don't belong to
 the target mapped uid/gid.
 
 Such as we could bind mount root's dirs to guest.
 What is worse, we could even modify root's files
 in that bind dir inside container.

If this happend, this problem should be a generic permission
problem. it should be fixed in kernel.

 
 So if we couldn't know
 the dir's ownership(without a proper uid/gid mapping),
 don't mount it.
 
 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
 v2: add more description
 
  src/lxc/lxc_container.c | 18 ++
  1 file changed, 18 insertions(+)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 255c711..4cf209e 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -96,6 +96,8 @@
  typedef char lxc_message_t;
  #define LXC_CONTINUE_MSG 'c'
  
 +#define OVERFLOW_UGID 65534
 +
  typedef struct __lxc_child_argv lxc_child_argv_t;
  struct __lxc_child_argv {
  virDomainDefPtr config;
 @@ -1073,6 +1075,22 @@ static int lxcContainerMountFSBind(virDomainFSDefPtr 
 fs,
  if (virAsprintf(src, %s%s, srcprefix, fs-src)  0)
  goto cleanup;
  
 +if (stat(src, st)  0) {
 +virReportSystemError(errno, _(Unable to stat bind source %s),
 + src);
 +goto cleanup;
 +} else {
 +if (OVERFLOW_UGID == st.st_uid || OVERFLOW_UGID == st.st_gid) {
 +errno = EPERM;
 +VIR_DEBUG(Unknown st_uid %d, st_gid %d for %s,
 +  st.st_uid, st.st_gid, fs-src);
 +virReportSystemError(errno,
 + _(Check the permission of src dir '%s' 
 provided for container)
 + ,fs-src);
 +goto cleanup;
 +}
 +}
 +
  if (stat(fs-dst, st)  0) {
  if (errno != ENOENT) {
  virReportSystemError(errno, _(Unable to stat bind target %s),
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: make sure fuse thread start to run before we do clone

2013-11-13 Thread Gao feng
On 11/08/2013 01:30 PM, Daniel P. Berrange wrote:
 On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
 I met a problem that container blocked by seteuid/setegid
 which is call in lxcContainerSetID on UP system and libvirt
 compiled with --with-fuse=yes.

 I looked into the glibc's codes, and found setxid in glibc
 calls futex() to wait for other threads to change their
 setxid_futex to 0(see setxid_mark_thread in glibc).

 since the process created by clone system call will not
 share the memory with the other threads and the context
 of memory doesn't changed until we call execl.(COW)

 So if the process which created by clone is called before
 fuse thread being stated, the new setxid_futex of fuse
 thread will not be saw in this process, it will be blocked
 forever.

 Maybe this problem should be fixed in glibc, but I send
 this patch as a quick fix.
 
 Can you show a stack trace of the threads/processes deadlocking
 

Daniel, chould you apply this patch? since this may not be fixed
in glibc quickly and we should conside libvirt works with buggy glibc.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: make sure fuse thread start to run before we do clone

2013-11-13 Thread Gao feng
On 11/13/2013 10:53 PM, Daniel P. Berrange wrote:
 On Fri, Nov 08, 2013 at 02:42:26PM -0500, Rich Felker wrote:
 On Fri, Nov 08, 2013 at 01:30:09PM +0800, Daniel P. Berrange wrote:
 On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
 I met a problem that container blocked by seteuid/setegid
 which is call in lxcContainerSetID on UP system and libvirt
 compiled with --with-fuse=yes.

 I looked into the glibc's codes, and found setxid in glibc
 calls futex() to wait for other threads to change their
 setxid_futex to 0(see setxid_mark_thread in glibc).

 since the process created by clone system call will not
 share the memory with the other threads and the context
 of memory doesn't changed until we call execl.(COW)

 So if the process which created by clone is called before
 fuse thread being stated, the new setxid_futex of fuse
 thread will not be saw in this process, it will be blocked
 forever.

 Maybe this problem should be fixed in glibc, but I send
 this patch as a quick fix.

 Can you show a stack trace of the threads/processes deadlocking

 I think this is a symptom of setxid not being async-signal-safe like
 it's required to be. I'm not sure if we have a bug tracker entry for
 that; if not, it should be added. But if clone() is being used except
 in a fork-like manner, this is probably invalid application usage too.
 
 We are not using clone() in a manner that is strictly equivalent
 to fork(). Libvirt is using clone() to create Linux containers
 with new namespaces. eg we do 
 
   
 clone(CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|CLONE_NEWNET|SIGCHLD)
 
 
 IIUC, if a process is multi-threaded you should restrict yourself to
 use of async signal safe functions in between fork() and exec(). I
 assume this restriction applies to clone() and exec() pairings too.
 
 Libvirt is in fact violating rules about only using async signal safe
 functions between clone() and exec() in many places. So I think what
 we need to do is avoid starting any threads in the parent until after
 we've clone()'d to create the new child namespace.

Thanks for fuse, any tring to access files exported by fuse will be blocked 
until
the fuse thread starts do fuse_loop.

I will post a update.

Thanks guys.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: do setxid before lxc controller creates fuse thread

2013-11-13 Thread Gao feng
I met a problem that container blocked by seteuid/setegid
which is call in lxcContainerSetID on UP system and libvirt
compiled with --with-fuse=yes.

I looked into the glibc's codes, and found setxid in glibc
calls futex() to wait for other threads to change their
setxid_futex to 0(see setxid_mark_thread in glibc).

since the process created by clone system call will not
share the memory with the other threads and the context
of memory doesn't changed until we call execl.(COW)

So if the process which created by clone is called before
fuse thread being stated, the new setxid_futex of fuse
thread will not be saw in this process, it will be blocked
forever.

This patch makes sure the cloned process calls setxid first,
and then the lxc controller creates fuse thread.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c  | 29 +++--
 src/lxc/lxc_controller.c | 10 +-
 src/lxc/lxc_fuse.c   | 21 +++--
 src/lxc/lxc_fuse.h   |  1 +
 4 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index c000a82..27bdcc0 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1844,9 +1844,24 @@ static int lxcContainerChild(void *data)
 cmd = lxcContainerBuildInitCmd(vmDef);
 virCommandWriteArgLog(cmd, 1);
 
+/* call setxid before libvirt controller creates fuse thread. */
 if (lxcContainerSetID(vmDef)  0)
 goto cleanup;
 
+/* rename and enable interfaces */
+if (lxcContainerRenameAndEnableInterfaces(!!(vmDef-features 
+ (1  
VIR_DOMAIN_FEATURE_PRIVNET)),
+  argv-nveths,
+  argv-veths)  0) {
+goto cleanup;
+}
+
+if (lxcContainerSendContinue(argv-handshakefd)  0) {
+virReportSystemError(errno, %s,
+_(Failed to send continue signal to controller));
+goto cleanup;
+}
+
 root = virDomainGetRootFilesystem(vmDef);
 
 if (argv-nttyPaths) {
@@ -1886,24 +1901,10 @@ static int lxcContainerChild(void *data)
 goto cleanup;
 }
 
-/* rename and enable interfaces */
-if (lxcContainerRenameAndEnableInterfaces(!!(vmDef-features 
- (1  
VIR_DOMAIN_FEATURE_PRIVNET)),
-  argv-nveths,
-  argv-veths)  0) {
-goto cleanup;
-}
-
 /* drop a set of root capabilities */
 if (lxcContainerDropCapabilities(!!hasReboot)  0)
 goto cleanup;
 
-if (lxcContainerSendContinue(argv-handshakefd)  0) {
-virReportSystemError(errno, %s,
-_(Failed to send continue signal to controller));
-goto cleanup;
-}
-
 VIR_DEBUG(Setting up security labeling);
 if (virSecurityManagerSetProcessLabel(argv-securityDriver, vmDef)  0)
 goto cleanup;
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index c8f68c0..5d1ec49 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -1981,6 +1981,12 @@ virLXCControllerSetupFuse(virLXCControllerPtr ctrl)
 }
 
 static int
+virLXCControllerStartFuse(virLXCControllerPtr ctrl)
+{
+return lxcStartFuse(ctrl-fuse);
+}
+
+static int
 virLXCControllerSetupConsoles(virLXCControllerPtr ctrl,
   char **containerTTYPaths)
 {
@@ -2197,7 +2203,9 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
 goto cleanup;
 }
 
-/* Now the container is fully setup... */
+/* container has already called setxid, we can create thread now.*/
+if (virLXCControllerStartFuse(ctrl)  0)
+goto cleanup;
 
 /* ...and reduce our privileges */
 if (lxcControllerClearCapabilities()  0)
diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c
index 9d12832..88e122e 100644
--- a/src/lxc/lxc_fuse.c
+++ b/src/lxc/lxc_fuse.c
@@ -322,12 +322,6 @@ int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def)
 goto cleanup1;
 }
 
-if (virThreadCreate(fuse-thread, false, lxcFuseRun,
-(void *)fuse)  0) {
-lxcFuseDestroy(fuse);
-goto cleanup1;
-}
-
 ret = 0;
 cleanup:
 fuse_opt_free_args(args);
@@ -341,6 +335,17 @@ cleanup2:
 goto cleanup;
 }
 
+int lxcStartFuse(virLXCFusePtr fuse)
+{
+if (virThreadCreate(fuse-thread, false, lxcFuseRun,
+(void *)fuse)  0) {
+lxcFuseDestroy(fuse);
+return -1;
+}
+
+return 0;
+}
+
 void lxcFreeFuse(virLXCFusePtr *f)
 {
 virLXCFusePtr fuse = *f;
@@ -364,6 +369,10 @@ int lxcSetupFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED,
 return 0;
 }
 
+int lxcStartFuse(virLXCFusePtr f ATTRIBUTE_UNUSED)
+{
+}
+
 void lxcFreeFuse(virLXCFusePtr *f ATTRIBUTE_UNUSED)
 {
 }
diff --git a/src/lxc/lxc_fuse.h b/src/lxc/lxc_fuse.h
index b3713af

Re: [libvirt] [PATCH]lxc: make sure root wouldn't be null

2013-11-12 Thread Gao feng
On 11/12/2013 04:03 PM, Chen Hanxiao wrote:
 ping
 

see virLXCProcessEnsureRootFS, we already make sure the root is exist
for libvirt-lxc.

 -Original Message-
 From: Chen Hanxiao [mailto:chenhanx...@cn.fujitsu.com]
 Sent: Tuesday, November 05, 2013 5:57 PM
 To: libvir-list@redhat.com
 Cc: chenhanx...@cn.fujitsu.com
 Subject: [libvirt][PATCH]lxc: make sure root wouldn't be null

 From: Chen Hanxiao chenhanx...@cn.fujitsu.com

 virDomainGetRootFilesystem may return null.
 We should take care of it.

 Signed-off-by: Chen Hanxiao chenhanx...@cn.fujitsu.com
 ---
  src/lxc/lxc_container.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 255c711..e8f7a75 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -1829,7 +1829,8 @@ static int lxcContainerChild(void *data)
  if (lxcContainerSetID(vmDef)  0)
  goto cleanup;

 -root = virDomainGetRootFilesystem(vmDef);
 +if (!(root = virDomainGetRootFilesystem(vmDef)))
 +goto cleanup;

  if (argv-nttyPaths) {
  const char *tty = argv-ttyPaths[0];
 --
 1.8.2.1
 
 
 
 --
 libvir-list mailing list
 libvir-list@redhat.com
 https://www.redhat.com/mailman/listinfo/libvir-list
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: make sure fuse thread start to run before we do clone

2013-11-08 Thread Gao feng
On 11/08/2013 01:30 PM, Daniel P. Berrange wrote:
 On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
 I met a problem that container blocked by seteuid/setegid
 which is call in lxcContainerSetID on UP system and libvirt
 compiled with --with-fuse=yes.

 I looked into the glibc's codes, and found setxid in glibc
 calls futex() to wait for other threads to change their
 setxid_futex to 0(see setxid_mark_thread in glibc).

 since the process created by clone system call will not
 share the memory with the other threads and the context
 of memory doesn't changed until we call execl.(COW)

 So if the process which created by clone is called before
 fuse thread being stated, the new setxid_futex of fuse
 thread will not be saw in this process, it will be blocked
 forever.

 Maybe this problem should be fixed in glibc, but I send
 this patch as a quick fix.
 
 Can you show a stack trace of the threads/processes deadlocking
 
 
Sure

the libvirt_lxc tasks
root  7922  0.0  0.1 118976  3704 ?Ssl  20:55   0:00 
/usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux 
--handshake 20 --background --veth vnet1
root  7927  0.0  0.1  53440  3072 ?S20:55   0:00 
/usr/local/libexec/libvirt_lxc --name chx3 --console 17 --security=selinux 
--handshake 20 --background --veth vnet1

the pid of fuse thread is 7925
[root@localhost ~]# ls /proc/7922/task/
7922  7925

gdb -p 7925
(gdb) bt
#0  0x7f2d39bcb83d in read () at ../sysdeps/unix/syscall-template.S:81
#1  0x7f2d3a5dfb72 in fuse_kern_chan_receive () from /glibc/lib/libfuse.so.2
#2  0x7f2d3a5e0b16 in fuse_ll_receive_buf () from /glibc/lib/libfuse.so.2
#3  0x7f2d3a5dfdd1 in fuse_session_loop () from /glibc/lib/libfuse.so.2
#4  0x7f2d3a5d8468 in fuse_loop () from /glibc/lib/libfuse.so.2
#5  0x7f2d3aa55691 in lxcFuseRun (opaque=opaque@entry=0x7f2d3b13a420) at 
lxc/lxc_fuse.c:276
#6  0x7f2d3aaebb8e in virThreadHelper (data=optimized out) at 
util/virthreadpthread.c:161
#7  0x7f2d39bc4f22 in start_thread (arg=0x7f2d37fbc700) at 
pthread_create.c:309
#8  0x7f2d392ca6ed in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:111

and the arg of start_thread is the struct pthread of fuse thread.
you can see the setxid_futex of fuse pthread has been set to 0.

(gdb) p *(struct pthread*)0x7f2d37fbc700
$1 = {{header = {tcb = 0x7f2d37fbc700, dtv = 0x7f2d3b2c9ae0, self = 
0x7f2d37fbc700, multiple_threads = 1, gscope_flag = 0, sysinfo = 0, stack_guard 
= 5516672127090939392, pointer_guard =
9991483700321457629,
  vgetcpu_cache = {0, 0}, __unused1 = 0, rtld_must_xmm_save = 0, 
__private_tm = {0x0, 0x0, 0x0, 0x0}, __private_ss = 0x0, __unused2 = 0, 
rtld_savespace_sse = {{{i = {0, 0, 0, 0}}, {i = {0, 0, 0,
0}}, {i = {
  0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 
0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = 
{0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0,
0, 0,
  0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 
0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 
0, 0}}, {i = {0, 0, 0, 0}}}, {{i = {0, 0, 0,
0}}, {
i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i = 
{0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}}, {{i 
= {0, 0, 0, 0}}, {i = {0, 0, 0, 0}}, {i =
{0, 0, 0,
  0}}, {i = {0, 0, 0, 0, __padding = {0x0, 0x0, 0x0, 0x0, 0x0, 
0x0, 0x0, 0x0}}, __padding = {0x7f2d37fbc700, 0x7f2d3b2c9ae0, 0x7f2d37fbc700, 
0x1, 0x0, 0x4c8f28122d8dd600,
0x8aa8e17d00c415dd,
  0x0 repeats 17 times}}, list = {next = 0x7f2d39dd5270 stack_used, 
prev = 0x7f2d39dd5270 stack_used}, tid = 7925, pid = 7922, robust_prev = 
0x7f2d37fbc9e0, robust_head = {list =
0x7f2d37fbc9e0,
futex_offset = -32, list_op_pending = 0x0}, cleanup = 0x0, cleanup_jmp_buf 
= 0x7f2d37fbbe30, cancelhandling = 2, flags = 1, specific_1stblock = {{seq = 0, 
data = 0x0}, {seq = 0, data = 0x0}, {seq
= 0,
  data = 0x0}, {seq = 1, data = 0x7f2d30021960}, {seq = 0, data = 0x0} 
repeats 28 times}, specific = {0x7f2d37fbca10, 0x0 repeats 31 times}, 
specific_used = true, report_events = false,
  user_stack = false, stopped_start = false, parent_cancelhandling = 0, lock = 
0, *setxid_futex* = 0, cpuclock_offset = 1398764389412, joinid = 
0x7f2d37fbc700, result = 0x0, schedparam =
{__sched_priority = 0},
  schedpolicy = 0, start_routine = 0x7f2d3aaebb60 virThreadHelper, arg = 
0x7f2d3b2bdce0, eventbuf = {eventmask = {event_bits = {0, 0}}, eventnum = 
TD_ALL_EVENTS, eventdata = 0x0}, nextevent = 0x0,
exc = {
exception_class = 0, exception_cleanup = 0x0, private_1 = 0, private_2 = 
0}, stackblock = 0x7f2d377bc000, stackblock_size = 8392704, guardsize = 4096, 
reported_guardsize = 4096, tpp = 0x0, res = {
retrans = 0, retry = 0, options = 0, nscount = 0, nsaddr_list = 
{{sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = 
\000\000\000\000\000

Re: [libvirt] [PATCH] LXC: make sure fuse thread start to run before we do clone

2013-11-08 Thread Gao feng
On 11/09/2013 03:42 AM, Rich Felker wrote:
 On Fri, Nov 08, 2013 at 01:30:09PM +0800, Daniel P. Berrange wrote:
 On Thu, Nov 07, 2013 at 09:15:43PM +0800, Gao feng wrote:
 I met a problem that container blocked by seteuid/setegid
 which is call in lxcContainerSetID on UP system and libvirt
 compiled with --with-fuse=yes.

 I looked into the glibc's codes, and found setxid in glibc
 calls futex() to wait for other threads to change their
 setxid_futex to 0(see setxid_mark_thread in glibc).

 since the process created by clone system call will not
 share the memory with the other threads and the context
 of memory doesn't changed until we call execl.(COW)

 So if the process which created by clone is called before
 fuse thread being stated, the new setxid_futex of fuse
 thread will not be saw in this process, it will be blocked
 forever.

 Maybe this problem should be fixed in glibc, but I send
 this patch as a quick fix.

 Can you show a stack trace of the threads/processes deadlocking
 
 I think this is a symptom of setxid not being async-signal-safe like
 it's required to be. I'm not sure if we have a bug tracker entry for
 that; if not, it should be added. But if clone() is being used except
 in a fork-like manner, this is probably invalid application usage too.
 

I post a patch to the glibc community, but I can't find my patch on the
mail list archive. the patch is attached. do you think this glibc patch
is needed or we just should add some bug tracker on manpage?


From d9c505032bc869e940bd33dd1e37d8568d33951a Mon Sep 17 00:00:00 2001
From: Gao feng gaof...@cn.fujitsu.com
Date: Fri, 8 Nov 2013 09:22:43 +0800
Subject: [PATCH] nptl: fix block problem caused by setxid called from clone

The process created by clone will have a copy of memory of
parent process, if parent creates some thread before call
clone, the stacked_used, __stack_user list is invalid for
the child process created by clone. and if this child process
runs before the threads, the setxid_futex of these threads
will never be changed to 0 for this child process. since
it only has a copied memory. so the setxid will be blocked
by futex forever.

This patch skips operation of the threads in different thread
group.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 nptl/allocatestack.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 1e0fe1f..8fc2591 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -1075,12 +1075,19 @@ __nptl_setxid (struct xid_command *cmdp)
 
   struct pthread *self = THREAD_SELF;
 
+  INTERNAL_SYSCALL_DECL (err);
+  pid_t pid = INTERNAL_SYSCALL(getpid, err, 0);
+
   /* Iterate over the list with system-allocated threads first.  */
   list_t *runp;
   list_for_each (runp, stack_used)
 {
   struct pthread *t = list_entry (runp, struct pthread, list);
-  if (t == self)
+  /* setxid may be called by the process created by clone,
+   * this process has a copy of parent's memory but it is
+   * running in different thread group, skip the threads
+   * in different thread group. */
+  if (t == self || t-pid != pid)
 	continue;
 
   setxid_mark_thread (cmdp, t);
@@ -1090,7 +1097,7 @@ __nptl_setxid (struct xid_command *cmdp)
   list_for_each (runp, __stack_user)
 {
   struct pthread *t = list_entry (runp, struct pthread, list);
-  if (t == self)
+  if (t == self || t-pid != pid)
 	continue;
 
   setxid_mark_thread (cmdp, t);
@@ -1106,7 +1113,7 @@ __nptl_setxid (struct xid_command *cmdp)
   list_for_each (runp, stack_used)
 	{
 	  struct pthread *t = list_entry (runp, struct pthread, list);
-	  if (t == self)
+	  if (t == self || t-pid != pid)
 	continue;
 
 	  signalled += setxid_signal_thread (cmdp, t);
@@ -1115,7 +1122,7 @@ __nptl_setxid (struct xid_command *cmdp)
   list_for_each (runp, __stack_user)
 	{
 	  struct pthread *t = list_entry (runp, struct pthread, list);
-	  if (t == self)
+	  if (t == self || t-pid != pid)
 	continue;
 
 	  signalled += setxid_signal_thread (cmdp, t);
@@ -1135,7 +1142,7 @@ __nptl_setxid (struct xid_command *cmdp)
   list_for_each (runp, stack_used)
 {
   struct pthread *t = list_entry (runp, struct pthread, list);
-  if (t == self)
+  if (t == self || t-pid != pid)
 	continue;
 
   setxid_unmark_thread (cmdp, t);
@@ -1144,7 +1151,7 @@ __nptl_setxid (struct xid_command *cmdp)
   list_for_each (runp, __stack_user)
 {
   struct pthread *t = list_entry (runp, struct pthread, list);
-  if (t == self)
+  if (t == self || t-pid != pid)
 	continue;
 
   setxid_unmark_thread (cmdp, t);
@@ -1152,7 +1159,7 @@ __nptl_setxid (struct xid_command *cmdp)
 
   /* This must be last, otherwise the current thread might not have
  permissions to send SIGSETXID syscall to the other threads.  */
-  INTERNAL_SYSCALL_DECL (err);
+
   result = INTERNAL_SYSCALL_NCS (cmdp-syscall_no, err, 3,
  cmdp

[libvirt] [PATCH] LXC: make sure fuse thread start to run before we do clone

2013-11-07 Thread Gao feng
I met a problem that container blocked by seteuid/setegid
which is call in lxcContainerSetID on UP system and libvirt
compiled with --with-fuse=yes.

I looked into the glibc's codes, and found setxid in glibc
calls futex() to wait for other threads to change their
setxid_futex to 0(see setxid_mark_thread in glibc).

since the process created by clone system call will not
share the memory with the other threads and the context
of memory doesn't changed until we call execl.(COW)

So if the process which created by clone is called before
fuse thread being stated, the new setxid_futex of fuse
thread will not be saw in this process, it will be blocked
forever.

Maybe this problem should be fixed in glibc, but I send
this patch as a quick fix.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_controller.c | 13 -
 src/lxc/lxc_fuse.c   |  6 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index c8f68c0..ed83bb3 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -1977,7 +1977,18 @@ cleanup:
 static int
 virLXCControllerSetupFuse(virLXCControllerPtr ctrl)
 {
-return lxcSetupFuse(ctrl-fuse, ctrl-def);
+int ret = lxcSetupFuse(ctrl-fuse, ctrl-def);
+
+if (!ret) {
+/* Wait for fuse thread starting run, so we
+ * can make sure the setxid_futex of fuse thread
+ * is 0(see start_thread of glibc), otherwise
+ * the lxcContainerChild will block at setxid. */
+virMutexLock(ctrl-fuse-lock);
+virMutexUnlock(ctrl-fuse-lock);
+}
+
+return ret;
 }
 
 static int
diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c
index 9d12832..8cddfa8 100644
--- a/src/lxc/lxc_fuse.c
+++ b/src/lxc/lxc_fuse.c
@@ -272,6 +272,8 @@ static void lxcFuseDestroy(virLXCFusePtr fuse)
 static void lxcFuseRun(void *opaque)
 {
 virLXCFusePtr fuse = opaque;
+/* Let libvirt_lxc continue. */
+virMutexUnlock(fuse-lock);
 
 if (fuse_loop(fuse-fuse)  0)
 virReportError(VIR_ERR_INTERNAL_ERROR, %s,
@@ -321,7 +323,9 @@ int lxcSetupFuse(virLXCFusePtr *f, virDomainDefPtr def)
 fuse_unmount(fuse-mountpoint, fuse-ch);
 goto cleanup1;
 }
-
+/* Get mutex lock, lxcFuseRun will unlock it. this will
+ * cause libvirt_lxc wait for the fuse thread starting. */
+virMutexLock(fuse-lock);
 if (virThreadCreate(fuse-thread, false, lxcFuseRun,
 (void *)fuse)  0) {
 lxcFuseDestroy(fuse);
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: mount /dev/pts/0 to /dev/console

2013-10-30 Thread Gao feng
On 10/30/2013 07:15 PM, Daniel P. Berrange wrote:
 On Tue, Oct 29, 2013 at 03:37:51PM +0800, Gao feng wrote:
 Now, /dev/console is linked to the /dev/pts/0,
 so for the process agetty, the tty device of
 agetty is pts/0. this will cause login container
 failed.
 since pts/0 is not in the /etc/securetty. so
 pam module pam_securetty will prevent the root
 user logging on the system.

 this patch doesn't make /dev/console a symbol but
 binds /dev/pts/0 to it. so the tty device of
 agetty will be console. root can login the system
 successfully.
 
 This only really fixes it for the first console. If the guest
 has multiple console/ lines defined, we still have the same
 problem with securetty for other consoles.I'm wondering if

Oh, that's /dev/ttyN, not /dev/console.

/dev/console is only one. In my container which running fedora18,
the tty device of agetty is console

  553 pts/0Ss+0:00 /sbin/agetty --noclear -s console 115200 38400 9600

So, this patch resolves the problem I faced. if we found the ttyN devices
also need to be bound to some pts devices, it's another patch.

but actually, /dev/console is pointed to different tty devices in the view of
different process. this linking/binding console to /dev/pts/0 behave may
mess up something. and the multiple console setting seems doesn't work for
container, only the first console is opened/listened.


 there is any scope for just getting the securetty check todo
 the right thing by default for psuedo ttys.
 

Right now I don't face any problem, but I don't know if this will break 
something.
and as I mention above, the linking /dev/console to /dev/pts/0 behave will break
something too.

I don't have any idea to implement the behave which /dev/console /dev/tty0 
provides
now. any idea?

Thanks
Gao

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: don't free tty before using it in lxcContainerSetupDevices

2013-10-29 Thread Gao feng
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 .gnulib | 2 +-
 src/lxc/lxc_container.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gnulib b/.gnulib
index 8f74258..4a5ee89 16
--- a/.gnulib
+++ b/.gnulib
@@ -1 +1 @@
-Subproject commit 8f7425866463f994538584d1dd7211603b8b0550
+Subproject commit 4a5ee89c8a8be7350a8fd8ca1bacb196a190e492
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 1ec59d5..255c711 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1042,10 +1042,10 @@ static int lxcContainerSetupDevices(char **ttyPaths, 
size_t nttyPaths)
 if (virAsprintf(tty, /dev/tty%zu, i+1)  0)
 return -1;
 if (symlink(ttyPaths[i], tty)  0) {
-VIR_FREE(tty);
 virReportSystemError(errno,
  _(Failed to symlink %s to %s),
  ttyPaths[i], tty);
+VIR_FREE(tty);
 return -1;
 }
 VIR_FREE(tty);
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: mount /dev/pts/0 to /dev/console

2013-10-29 Thread Gao feng
Now, /dev/console is linked to the /dev/pts/0,
so for the process agetty, the tty device of
agetty is pts/0. this will cause login container
failed.
since pts/0 is not in the /etc/securetty. so
pam module pam_securetty will prevent the root
user logging on the system.

this patch doesn't make /dev/console a symbol but
binds /dev/pts/0 to it. so the tty device of
agetty will be console. root can login the system
successfully.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 255c711..1cede41 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1049,12 +1049,19 @@ static int lxcContainerSetupDevices(char **ttyPaths, 
size_t nttyPaths)
 return -1;
 }
 VIR_FREE(tty);
-if (i == 0 
-symlink(ttyPaths[i], /dev/console)  0) {
-virReportSystemError(errno,
- _(Failed to symlink %s to /dev/console),
- ttyPaths[i]);
-return -1;
+if (i == 0) {
+if (virFileTouch(/dev/console, 0600)  0) {
+virReportSystemError(errno, %s,
+ _(Failed to create /dev/console));
+return -1;
+}
+
+if (mount(ttyPaths[0], /dev/console, NULL, MS_BIND, NULL)  0) {
+virReportSystemError(errno,
+ _(Failed to symlink %s to /dev/console),
+ ttyPaths[i]);
+return -1;
+}
 }
 }
 return 0;
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v2] LXC: mount /dev/pts/0 to /dev/console

2013-10-29 Thread Gao feng
Now, /dev/console is linked to the /dev/pts/0,
so for the process agetty, the tty device of
agetty is pts/0. this will cause login container
failed.
since pts/0 is not in the /etc/securetty. so
pam module pam_securetty will prevent the root
user logging on the system.

this patch doesn't make /dev/console a symbol but
binds /dev/pts/0 to it. so the tty device of
agetty will be console. root can login the system
successfully.

change from v1:
1,Add some comments.
2,Modify the System error report message.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 255c711..9c71bad 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -1049,12 +1049,22 @@ static int lxcContainerSetupDevices(char **ttyPaths, 
size_t nttyPaths)
 return -1;
 }
 VIR_FREE(tty);
-if (i == 0 
-symlink(ttyPaths[i], /dev/console)  0) {
-virReportSystemError(errno,
- _(Failed to symlink %s to /dev/console),
- ttyPaths[i]);
-return -1;
+if (i == 0) {
+if (virFileTouch(/dev/console, 0600)  0) {
+virReportSystemError(errno, %s,
+ _(Failed to create /dev/console));
+return -1;
+}
+/* Binding /dev/console to pts, this will make agetty
+ * trust its tty device is console but not the pts device.
+ * So the pam_sercuretty module will not try to prevent
+ * root user logging on container. */
+if (mount(ttyPaths[0], /dev/console, NULL, MS_BIND, NULL)  0) {
+virReportSystemError(errno,
+ _(Failed to bind %s to /dev/console),
+ ttyPaths[i]);
+return -1;
+}
 }
 }
 return 0;
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2 REPOST 1/6] Add virFileIsMountPoint function

2013-10-29 Thread Gao feng
On 10/28/2013 11:40 PM, Daniel P. Berrange wrote:
 From: Daniel P. Berrange berra...@redhat.com
 
 Add a function for efficiently checking if a path is a filesystem
 mount point.
 
 NB will not work for bind mounts, only true filesystem mounts.
 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com
 ---
  src/libvirt_private.syms |  1 +
  src/util/virfile.c   | 50 
 
  src/util/virfile.h   |  2 ++
  3 files changed, 53 insertions(+)
 
 diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
 index f1f817c..ea68490 100644
 --- a/src/libvirt_private.syms
 +++ b/src/libvirt_private.syms
 @@ -1188,6 +1188,7 @@ virFileIsAbsPath;
  virFileIsDir;
  virFileIsExecutable;
  virFileIsLink;
 +virFileIsMountPoint;
  virFileLinkPointsTo;
  virFileLock;
  virFileLoopDeviceAssociate;
 diff --git a/src/util/virfile.c b/src/util/virfile.c
 index 3a9980c..9fde99c 100644
 --- a/src/util/virfile.c
 +++ b/src/util/virfile.c
 @@ -1518,6 +1518,56 @@ virFileIsExecutable(const char *file)
  return false;
  }
  
 +
 +/*
 + * Check that a file refers to a mount point. Trick is that for
 + * a mount point, the st_dev field will differ from the parent
 + * directory.
 + *
 + * Note that this will not detect bind mounts of dirs/files,
 + * only true filesystem mounts.
 + */
 +int virFileIsMountPoint(const char *file)
 +{
 +char *parent = NULL;
 +int ret = -1;
 +struct stat sb1, sb2;
 +
 +if (!(parent = mdir_name(file))) {
 +virReportOOMError();
 +goto cleanup;
 +}
 +
 +VIR_DEBUG(Comparing '%s' to '%s', file, parent);
 +
 +if (stat(file, sb1)  0) {
 +if (errno == ENOENT)
 +ret = 0;
 +else
 +virReportSystemError(errno,
 + _(Cannot stat '%s'),
 + file);
 +goto cleanup;
 +}
 +
 +if (stat(parent, sb2)  0) {
 +virReportSystemError(errno,
 + _(Cannot stat '%s'),
 + parent);
 +goto cleanup;
 +}
 +
 +if (!S_ISDIR(sb1.st_mode))

VIR_FREE(parent)

Acked-by: Gao feng gaof...@cn.fujitsu.com

 +return false;
 +
 +ret = sb1.st_dev != sb2.st_dev;
 +VIR_DEBUG(Is mount %d, ret);
 +
 + cleanup:
 +VIR_FREE(parent);
 +return ret;
 +}
 +
  #ifndef WIN32
  /* Check that a file is accessible under certain
   * user  gid.
 diff --git a/src/util/virfile.h b/src/util/virfile.h
 index 72d35ce..ff84719 100644
 --- a/src/util/virfile.h
 +++ b/src/util/virfile.h
 @@ -156,6 +156,8 @@ bool virFileIsDir (const char *file) ATTRIBUTE_NONNULL(1);
  bool virFileExists(const char *file) ATTRIBUTE_NONNULL(1);
  bool virFileIsExecutable(const char *file) ATTRIBUTE_NONNULL(1);
  
 +int virFileIsMountPoint(const char *file) ATTRIBUTE_NONNULL(1);
 +
  char *virFileSanitizePath(const char *path);
  
  enum {
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2 REPOST 0/6] Try to fix selinux/securityfs mount handling in LXC

2013-10-29 Thread Gao feng
On 10/28/2013 11:40 PM, Daniel P. Berrange wrote:
 From: Daniel P. Berrange berra...@redhat.com
 
 Update of
 
   https://www.redhat.com/archives/libvir-list/2013-October/msg00270.html
 
 Previously I just re-sent the one patch needing changes. This time I
 am reposting the whole series after rebasing.
 
 Daniel P. Berrange (6):
   Add virFileIsMountPoint function
   Remove unused 'opts' field from LXC basic mounts struct
   Remove pointless 'srcpath' variable in lxcContainerMountBasicFS
   Remove duplicate entries in lxcBasicMounts array
   Add flag to lxcBasicMounts to control use in user namespaces
   Skip any files which are not mounted on the host

Ack this serial .
Thanks!

 
  src/libvirt_private.syms |  1 +
  src/lxc/lxc_container.c  | 86 
 +---
  src/util/virfile.c   | 50 
  src/util/virfile.h   |  2 ++
  4 files changed, 105 insertions(+), 34 deletions(-)
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: Improved check before mounting securityfs

2013-10-07 Thread Gao feng
On 10/07/2013 05:52 PM, Bogdan Purcareata wrote:
 Securityfs kernel support may not be available on all platforms
 running libvirt containers. Since securityfs receives special
 handling in the context of user namespaces, make an additional
 check to see if it is supported, by inspecting /proc/filesystems.
 
 Making this check for all lxcBasicMounts is a bit tedious, since
 the /proc filesystem is first unmounted from host, so the
 /proc/filesystems list should be saved before unmounting, to be
 available at all times. However, checks for the support for /proc
 or /sys are superfluous.
 
 In the long run, to support the addition of new filesystems in
 lxcBasicMounts, an additional optional flag should be introduced,
 to mark that for a specific filesystem, the code should first check
 for support in the kernel, before mounting it. For mandatory
 filesystems, if mounting them fails, creating the container fails.
 
 Right now, check for support only for securityfs, since right now
 it is the only special case.
 
 Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
 ---
  src/lxc/lxc_container.c | 69 
 -
  1 file changed, 68 insertions(+), 1 deletion(-)
 

Ok, I know what's wrong, please check my patch.
If you think it's good, please add your Acked-by or Reviewed-by

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] LXC: securityfs: the src of securityfs should be /sys/kernel/security

2013-10-07 Thread Gao feng
Otherwise we can't know if securityfs is avaiabled.

Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/lxc/lxc_container.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index b1f429c..a15ce59 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -768,8 +768,8 @@ static const virLXCBasicMountInfo lxcBasicMounts[] = {
 { /proc/sys, /proc/sys, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
 { sysfs, /sys, sysfs, NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV },
 { sysfs, /sys, sysfs, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
-{ securityfs, /sys/kernel/security, securityfs, NULL, 
MS_NOSUID|MS_NOEXEC|MS_NODEV },
-{ securityfs, /sys/kernel/security, securityfs, NULL, 
MS_BIND|MS_REMOUNT|MS_RDONLY },
+{ /sys/kernel/security, /sys/kernel/security, securityfs, NULL, 
MS_NOSUID|MS_NOEXEC|MS_NODEV },
+{ /sys/kernel/security, /sys/kernel/security, securityfs, NULL, 
MS_BIND|MS_REMOUNT|MS_RDONLY },
 #if WITH_SELINUX
 { SELINUX_MOUNT, SELINUX_MOUNT, selinuxfs, NULL, 
MS_NOSUID|MS_NOEXEC|MS_NODEV },
 { SELINUX_MOUNT, SELINUX_MOUNT, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] Ensure securityfs is mounted readonly in container

2013-10-07 Thread Gao feng
On 08/08/2013 07:52 PM, Daniel P. Berrange wrote:
 From: Dan Walsh dwa...@redhat.com
 
 If securityfs is available on the host, we should ensure to
 mount it read-only in the container. This will avoid systemd
 trying to mount it during startup causing SELinux AVCs.

As the result I read the codes of systemd,since this commit, systemd
doesn't mount securityfs in container.

commit c481f78be7d2620b417e453738ae1d65b576f67e
Author: Lennart Poettering lenn...@poettering.net
Date:   Tue Sep 18 12:05:47 2012 +0200

mount: don't mount securityfs in a container

why this patch said
This will avoid systemd trying to mount it during startup ?

 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com
 ---
  src/lxc/lxc_container.c | 2 ++
  1 file changed, 2 insertions(+)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index b910b10..a943b22 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -770,6 +770,8 @@ static int lxcContainerMountBasicFS(void)
  { /proc/sys, /proc/sys, NULL, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY 
 },
  { sysfs, /sys, sysfs, NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV },
  { sysfs, /sys, sysfs, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY },
 +{ securityfs, /sys/kernel/security, securityfs, NULL, 
 MS_NOSUID|MS_NOEXEC|MS_NODEV },
 +{ securityfs, /sys/kernel/security, securityfs, NULL, 
 MS_BIND|MS_REMOUNT|MS_RDONLY },
  #if WITH_SELINUX
  { SELINUX_MOUNT, SELINUX_MOUNT, selinuxfs, NULL, 
 MS_NOSUID|MS_NOEXEC|MS_NODEV },
  { SELINUX_MOUNT, SELINUX_MOUNT, NULL, NULL, 
 MS_BIND|MS_REMOUNT|MS_RDONLY },
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] LXC: Improved check before mounting securityfs

2013-10-07 Thread Gao feng
On 10/07/2013 09:04 PM, Daniel P. Berrange wrote:
 On Mon, Oct 07, 2013 at 12:52:30PM +0300, Bogdan Purcareata wrote:
 Securityfs kernel support may not be available on all platforms
 running libvirt containers. Since securityfs receives special
 handling in the context of user namespaces, make an additional
 check to see if it is supported, by inspecting /proc/filesystems.

 Making this check for all lxcBasicMounts is a bit tedious, since
 the /proc filesystem is first unmounted from host, so the
 /proc/filesystems list should be saved before unmounting, to be
 available at all times. However, checks for the support for /proc
 or /sys are superfluous.
 
 I actually don't think it is correct to base it on /proc/filesystems.
 
 The intent of this code is that the container setup match the host
 OS setup for these special filesystems. So the container should
 have it mounted, if and only if, the host has it mounted.
 
 We had attempted todo this by using access(/the/path), but this is
 flawed because a) we were looking at the wrong path (the container
 path, not the host path) and b) the directory can exist even if the
 FS isn't mounted.

We already mount sysfs to /sys before we mount securityfs in container,
so the path /sys/kernel/securityfs is right, the securityfs directory
is created when we mount sysfs.

I read the codes of systemd, systemd doesn't mount securityfs in container
environment, so I don't know what's problem commit 
6807238d87fd93dee30038bea1e8582a5f0a9fe7
trying to resolve.

 
 What we should have done here is to check whether the path in
 question is a mount point on the host. This should automatically
 do the right thing if the kernel does not have the filesystem
 in question compiled, as well as if it isn't mounted in the
 host. I'll copy you on a patch which tries todo that, so can you
 test it with your kenrels.
 

 In the long run, to support the addition of new filesystems in
 lxcBasicMounts, an additional optional flag should be introduced,
 to mark that for a specific filesystem, the code should first check
 for support in the kernel, before mounting it. For mandatory
 filesystems, if mounting them fails, creating the container fails.

 Right now, check for support only for securityfs, since right now
 it is the only special case.
 
 I'm including flags in the mount table so we can avoid this
 special casing as you suggest.
 
 Daniel
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/2] free cmd in virNetDevVethDelete

2013-10-06 Thread Gao feng
On 10/04/2013 07:03 PM, Daniel P. Berrange wrote:
 On Fri, Oct 04, 2013 at 06:53:52PM +0800, Gao feng wrote:
 Signed-off-by: Gao feng gaof...@cn.fujitsu.com
 ---
  src/util/virnetdevveth.c | 12 +---
  1 file changed, 9 insertions(+), 3 deletions(-)

 diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c
 index 403961b..1085217 100644
 --- a/src/util/virnetdevveth.c
 +++ b/src/util/virnetdevveth.c
 @@ -196,6 +196,7 @@ int virNetDevVethDelete(const char *veth)
  {
  virCommandPtr cmd = virCommandNewArgList(ip, link, del, veth, 
 NULL);
  int status;
 +int ret = -1;
  
  if (virCommandRun(cmd, status)  0)
  return -1;
 
 This needs to jump to cleanup too.
 

oops, thanks!

 @@ -203,11 +204,16 @@ int virNetDevVethDelete(const char *veth)
  if (status != 0) {
  if (!virNetDevExists(veth)) {
  VIR_DEBUG(Device %s already deleted (by kernel namespace 
 cleanup), veth);
 -return 0;
 +ret = 0;
 +goto cleanup;
  }
  virReportError(VIR_ERR_INTERNAL_ERROR,
 _(Failed to delete veth device %s), veth);
 -return -1;
 +goto cleanup;
  }
 -return 0;
 +
 +ret = 0;
 +cleanup:
 +virCommandFree(cmd);
 +return ret;
  }
 
 ACK, I'll push with the fix mentioned above
 
 Daniel
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2] LXC: Detect fs support. Mount only supported filesystems

2013-10-06 Thread Gao feng
On 10/04/2013 07:33 PM, Purcareata Bogdan-B43198 wrote:

 +/*
 + * This function attempts to detect kernel support
 + * for a specific filesystem type. This is done by
 + * inspecting /proc/filesystems.
 + */
 +static int lxcCheckFSSupport(const char *fs_type)
 +{
 +FILE *fp = NULL;
 +int ret = -1;
 +const char *fslist = /proc/filesystems;
 +char *line = NULL;
 +char *type;
 +size_t n;
 +
 +/* there should be no problem mounting an entry
 + * with NULL fs type, hence NULL fs types are
 + * supported */
 +if (!fs_type) {
 +   ret = 1;
 +   goto out;
 +}
 +
 +VIR_DEBUG(Checking kernel support for %s in %s, fs_type, fslist);
 +
 +if (!(fp = fopen(fslist, r))) {

 I don't know if we can open /proc/filesystems successfully here if container
 shares
 root directory with host, since the /proc filesystem has been unmounted in
 lxcContainerUnmountForSharedRoot.
 
 Right. I just noticed the search for proc fails, since /proc/filesystem 
 requires procfs to be mounted. (Un)fortunately, my handling of 
 lxcCheckFSSupport() bypassed this error, and mounted procfs anyways. I will 
 update the code with a proper handle for the error code. I just don't see how 
 I can handle all filesystem entries in an uniform manner, since each one is 
 so special.
 


So save the supported filesystem list before we unmount the proc filesystem, 
and in lxcCheckFSSupport
use this list to check if the filesystem is supported by kernel.

btw it's better to return the error of fopen to user.

 +virReportSystemError(errno,
 + _(Unable to read %s),
 + fslist);
 +goto out;
 +}
 +
 +while(getline(line, n, fp)  0) {
 +   type = strstr(line, fs_type);
 +
 +   if (!type)
 +   continue;
 +
 +   if (!strncmp(type, fs_type, strlen(type))) {

 The strncmp() function compares the only first (at most) n bytes of s1 and 
 s2.
 please use STREQ here.
 
 Thanks, I will update.
 

 +   ret = 1;
 +   goto cleanup;
 +   }
 +}
 +
 +if (ferror(fp)) {
 +   virReportSystemError(errno,
 + _(Error reading line from %s),
 + fslist);
 +goto cleanup;
 +}
 +
 +VIR_DEBUG(No kernel support for %s, fs_type);
 +
 +ret = 0;
 +

 You set ret to 0 here, so the return value 0 means this filesystem
 is unsupported by kernel, right? what the meaning of return value -1?

 you return -1 when ferror(fp) is true.
 
 So I thought it would be like this:
 - -1 - error encountered
 - 0 - no error, no kernel support for the filesystem
 - 1 - no error, kernel support present
 

 +cleanup:
 +VIR_FREE(line);
 +VIR_FORCE_FCLOSE(fp);
 +out:
 +return ret;
 +}
 +
  static int lxcContainerGetSubtree(const char *prefix,
char ***mountsret,
size_t *nmountsret)
 @@ -789,17 +850,23 @@ static int lxcContainerMountBasicFS(bool
 userns_enabled)
  for (i = 0; i  ARRAY_CARDINALITY(lxcBasicMounts); i++) {
  virLXCBasicMountInfo const *mnt = lxcBasicMounts[i];
  const char *srcpath = NULL;
 +   const char *dstpath = NULL;

  VIR_DEBUG(Processing %s - %s,
mnt-src, mnt-dst);

  srcpath = mnt-src;
 +   dstpath = mnt-dst;

  /* Skip if mount doesn't exist in source */
  if ((srcpath[0] == '/') 
  (access(srcpath, R_OK)  0))
  continue;

 +   if ((access(dstpath, R_OK)  0) || /* mount is not present on host */
 +   (!lxcCheckFSSupport(mnt-type))) /* no fs support in kernel */
 +   continue;
 +

 The access is in the incorrect place, it should be called after we create 
 mnt-
 dst.
 so Move this check after virFileMakePath(mnt-dst).
 
 My specific problem was that mounting security failed even before reaching 
 the actual mount syscall. 
 
 It failed when doing virFileMakePath(/sys/kernel/securityfs), because /sys 
 is previously mounted read only (I realized this just now).
 
 root@p4080ds:/sys/kernel# mkdir securityfs
 mkdir: cannot create directory 'securityfs': No such file or directory
 

I don't know how this occurred, since the directory securityfs is created when 
you mount sysfs.
Actually virFileMakePath will not create securityfs directory since it already 
exists.

Thanks

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2] LXC: Detect fs support. Mount only supported filesystems

2013-10-06 Thread Gao feng
On 10/07/2013 11:44 AM, Gao feng wrote:
 On 10/04/2013 07:33 PM, Purcareata Bogdan-B43198 wrote:
 
 +/*
 + * This function attempts to detect kernel support
 + * for a specific filesystem type. This is done by
 + * inspecting /proc/filesystems.
 + */
 +static int lxcCheckFSSupport(const char *fs_type)
 +{
 +FILE *fp = NULL;
 +int ret = -1;
 +const char *fslist = /proc/filesystems;
 +char *line = NULL;
 +char *type;
 +size_t n;
 +
 +/* there should be no problem mounting an entry
 + * with NULL fs type, hence NULL fs types are
 + * supported */
 +if (!fs_type) {
 +  ret = 1;
 +  goto out;
 +}
 +
 +VIR_DEBUG(Checking kernel support for %s in %s, fs_type, fslist);
 +
 +if (!(fp = fopen(fslist, r))) {

 I don't know if we can open /proc/filesystems successfully here if container
 shares
 root directory with host, since the /proc filesystem has been unmounted in
 lxcContainerUnmountForSharedRoot.

 Right. I just noticed the search for proc fails, since /proc/filesystem 
 requires procfs to be mounted. (Un)fortunately, my handling of 
 lxcCheckFSSupport() bypassed this error, and mounted procfs anyways. I will 
 update the code with a proper handle for the error code. I just don't see 
 how I can handle all filesystem entries in an uniform manner, since each one 
 is so special.


 
 So save the supported filesystem list before we unmount the proc filesystem, 
 and in lxcCheckFSSupport
 use this list to check if the filesystem is supported by kernel.
 
 btw it's better to return the error of fopen to user.
 
 +virReportSystemError(errno,
 + _(Unable to read %s),
 + fslist);
 +goto out;
 +}
 +
 +while(getline(line, n, fp)  0) {
 +  type = strstr(line, fs_type);
 +
 +  if (!type)
 +  continue;
 +
 +  if (!strncmp(type, fs_type, strlen(type))) {

 The strncmp() function compares the only first (at most) n bytes of s1 and 
 s2.
 please use STREQ here.

 Thanks, I will update.


 +  ret = 1;
 +  goto cleanup;
 +  }
 +}
 +
 +if (ferror(fp)) {
 +  virReportSystemError(errno,
 + _(Error reading line from %s),
 + fslist);
 +goto cleanup;
 +}
 +
 +VIR_DEBUG(No kernel support for %s, fs_type);
 +
 +ret = 0;
 +

 You set ret to 0 here, so the return value 0 means this filesystem
 is unsupported by kernel, right? what the meaning of return value -1?

 you return -1 when ferror(fp) is true.

 So I thought it would be like this:
 - -1 - error encountered
 - 0 - no error, no kernel support for the filesystem
 - 1 - no error, kernel support present


 +cleanup:
 +VIR_FREE(line);
 +VIR_FORCE_FCLOSE(fp);
 +out:
 +return ret;
 +}
 +
  static int lxcContainerGetSubtree(const char *prefix,
char ***mountsret,
size_t *nmountsret)
 @@ -789,17 +850,23 @@ static int lxcContainerMountBasicFS(bool
 userns_enabled)
  for (i = 0; i  ARRAY_CARDINALITY(lxcBasicMounts); i++) {
  virLXCBasicMountInfo const *mnt = lxcBasicMounts[i];
  const char *srcpath = NULL;
 +  const char *dstpath = NULL;

  VIR_DEBUG(Processing %s - %s,
mnt-src, mnt-dst);

  srcpath = mnt-src;
 +  dstpath = mnt-dst;

  /* Skip if mount doesn't exist in source */
  if ((srcpath[0] == '/') 
  (access(srcpath, R_OK)  0))
  continue;

 +  if ((access(dstpath, R_OK)  0) || /* mount is not present on host */
 +  (!lxcCheckFSSupport(mnt-type))) /* no fs support in kernel */
 +  continue;
 +

 The access is in the incorrect place, it should be called after we create 
 mnt-
 dst.
 so Move this check after virFileMakePath(mnt-dst).

 My specific problem was that mounting security failed even before reaching 
 the actual mount syscall. 

 It failed when doing virFileMakePath(/sys/kernel/securityfs), because /sys 
 is previously mounted read only (I realized this just now).

 root@p4080ds:/sys/kernel# mkdir securityfs
 mkdir: cannot create directory 'securityfs': No such file or directory

 
 I don't know how this occurred, since the directory securityfs is created 
 when you mount sysfs.
 Actually virFileMakePath will not create securityfs directory since it 
 already exists.
 

So I think you can remove the check of access(dstpath, R_OK). if securityfs is 
supported,
virFileMakePath must be successful.

the reason fail to create directory securityfs is creating files is unsupported 
by sysfs.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 2/5] Avoid reporting an error if veth device is already deleted

2013-10-04 Thread Gao feng
On 10/02/2013 07:31 PM, Daniel P. Berrange wrote:
 From: Daniel P. Berrange berra...@redhat.com
 
 The kernel automatically destroys veth devices when cleaning
 up the container network namepace. During normal shutdown, it
 is thus likely that the attempt to run 'ip link del vethN'
 will fail. If it fails, check if the device exists, and avoid
 reporting an error if it has gone. This switches to use the
 virCommand APIs instead of virRun too.
 
 Signed-off-by: Daniel P. Berrange berra...@redhat.com
 ---
  src/util/virnetdevveth.c | 17 ++---
  1 file changed, 14 insertions(+), 3 deletions(-)
 
 diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c
 index 039767f..c0d32c4 100644
 --- a/src/util/virnetdevveth.c
 +++ b/src/util/virnetdevveth.c
 @@ -161,9 +161,20 @@ cleanup:
   */
  int virNetDevVethDelete(const char *veth)
  {
 -const char *argv[] = {ip, link, del, veth, NULL};
 +virCommandPtr cmd = virCommandNewArgList(ip, link, del, veth, 
 NULL);
 +int status;
  
 -VIR_DEBUG(veth: %s, veth);
 +if (virCommandRun(cmd, status)  0)
 +return -1;
  
 -return virRun(argv, NULL);
 +if (status != 0) {
 +if (!virNetDevExists(veth)) {
 +VIR_DEBUG(Device %s already deleted (by kernel namespace 
 cleanup), veth);
 +return 0;
 +}
 +virReportError(VIR_ERR_INTERNAL_ERROR,
 +   _(Failed to delete veth device %s), veth);
 +return -1;
 +}

miss the virCommandFree.

 +return 0;
  }
 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2] LXC: Detect fs support. Mount only supported filesystems

2013-10-04 Thread Gao feng
On 10/02/2013 10:05 PM, Bogdan Purcareata wrote:
 Kept ((access(dstpath, R_OK)  0) || (!lxcCheckFSSupport(mnt-type)))
 when determining support for the mount. Even if the filesystem type is
 supported, there is still a chance to fail when building the dstpath
 (virFileMakePath). If that call fails, starting the container will fail.
 Specifically encountered this problem for securityfs, as I was unable
 to mkdir /sys/kernel/security.
 
 Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
 ---
  src/lxc/lxc_container.c | 67 
 +
  1 file changed, 67 insertions(+)
 
 diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
 index 989e920..496443d 100644
 --- a/src/lxc/lxc_container.c
 +++ b/src/lxc/lxc_container.c
 @@ -509,6 +509,67 @@ static int lxcContainerChildMountSort(const void *a, 
 const void *b)
  # define MS_SLAVE(119)
  #endif
  
 +/*
 + * This function attempts to detect kernel support
 + * for a specific filesystem type. This is done by
 + * inspecting /proc/filesystems.
 + */
 +static int lxcCheckFSSupport(const char *fs_type)
 +{
 +FILE *fp = NULL;
 +int ret = -1;
 +const char *fslist = /proc/filesystems;
 +char *line = NULL;
 +char *type;
 +size_t n;
 +
 +/* there should be no problem mounting an entry
 + * with NULL fs type, hence NULL fs types are
 + * supported */
 +if (!fs_type) {
 + ret = 1;
 + goto out;
 +}
 +
 +VIR_DEBUG(Checking kernel support for %s in %s, fs_type, fslist);
 +
 +if (!(fp = fopen(fslist, r))) {

I don't know if we can open /proc/filesystems successfully here if container 
shares
root directory with host, since the /proc filesystem has been unmounted in
lxcContainerUnmountForSharedRoot.

 +virReportSystemError(errno,
 + _(Unable to read %s),
 + fslist);
 +goto out;
 +}
 +
 +while(getline(line, n, fp)  0) {
 + type = strstr(line, fs_type);
 +
 + if (!type)
 + continue;
 +
 + if (!strncmp(type, fs_type, strlen(type))) {

The strncmp() function compares the only first (at most) n bytes of s1 and s2.
please use STREQ here.

 + ret = 1;
 + goto cleanup;
 + }
 +}
 +
 +if (ferror(fp)) {
 + virReportSystemError(errno,
 + _(Error reading line from %s),
 + fslist);
 +goto cleanup;
 +}
 +
 +VIR_DEBUG(No kernel support for %s, fs_type);
 +
 +ret = 0;
 +

You set ret to 0 here, so the return value 0 means this filesystem
is unsupported by kernel, right? what the meaning of return value -1?

you return -1 when ferror(fp) is true.

 +cleanup:
 +VIR_FREE(line);
 +VIR_FORCE_FCLOSE(fp);
 +out:
 +return ret;
 +}
 +
  static int lxcContainerGetSubtree(const char *prefix,
char ***mountsret,
size_t *nmountsret)
 @@ -789,17 +850,23 @@ static int lxcContainerMountBasicFS(bool userns_enabled)
  for (i = 0; i  ARRAY_CARDINALITY(lxcBasicMounts); i++) {
  virLXCBasicMountInfo const *mnt = lxcBasicMounts[i];
  const char *srcpath = NULL;
 + const char *dstpath = NULL;
  
  VIR_DEBUG(Processing %s - %s,
mnt-src, mnt-dst);
  
  srcpath = mnt-src;
 + dstpath = mnt-dst;
  
  /* Skip if mount doesn't exist in source */
  if ((srcpath[0] == '/') 
  (access(srcpath, R_OK)  0))
  continue;
  
 + if ((access(dstpath, R_OK)  0) || /* mount is not present on host */
 + (!lxcCheckFSSupport(mnt-type))) /* no fs support in kernel */
 + continue;
 +

The access is in the incorrect place, it should be called after we create 
mnt-dst.
so Move this check after virFileMakePath(mnt-dst).

  #if WITH_SELINUX
  if (STREQ(mnt-src, SELINUX_MOUNT) 
  (!is_selinux_enabled() || userns_enabled))
 

Thanks

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 2/2] free cmd in virNetDevVethCreate

2013-10-04 Thread Gao feng
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/util/virnetdevveth.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c
index 1085217..2d727f1 100644
--- a/src/util/virnetdevveth.c
+++ b/src/util/virnetdevveth.c
@@ -110,6 +110,7 @@ int virNetDevVethCreate(char** veth1, char** veth2)
 char *veth1auto = NULL;
 char *veth2auto = NULL;
 int vethNum = 0;
+virCommandPtr cmd = NULL;
 size_t i;
 
 /*
@@ -139,7 +140,7 @@ int virNetDevVethCreate(char** veth1, char** veth2)
 vethNum = veth2num + 1;
 }
 
-virCommandPtr cmd = virCommandNew(ip);
+cmd = virCommandNew(ip);
 virCommandAddArgList(cmd, link, add,
  *veth1 ? *veth1 : veth1auto,
  type, veth, peer, name,
@@ -169,6 +170,7 @@ int virNetDevVethCreate(char** veth1, char** veth2)
   status);
 VIR_FREE(veth1auto);
 VIR_FREE(veth2auto);
+virCommandFree(cmd);
 }
 
 virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -176,6 +178,7 @@ int virNetDevVethCreate(char** veth1, char** veth2)
MAX_VETH_RETRIES);
 
 cleanup:
+virCommandFree(cmd);
 VIR_FREE(veth1auto);
 VIR_FREE(veth2auto);
 return ret;
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 1/2] free cmd in virNetDevVethDelete

2013-10-04 Thread Gao feng
Signed-off-by: Gao feng gaof...@cn.fujitsu.com
---
 src/util/virnetdevveth.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c
index 403961b..1085217 100644
--- a/src/util/virnetdevveth.c
+++ b/src/util/virnetdevveth.c
@@ -196,6 +196,7 @@ int virNetDevVethDelete(const char *veth)
 {
 virCommandPtr cmd = virCommandNewArgList(ip, link, del, veth, NULL);
 int status;
+int ret = -1;
 
 if (virCommandRun(cmd, status)  0)
 return -1;
@@ -203,11 +204,16 @@ int virNetDevVethDelete(const char *veth)
 if (status != 0) {
 if (!virNetDevExists(veth)) {
 VIR_DEBUG(Device %s already deleted (by kernel namespace 
cleanup), veth);
-return 0;
+ret = 0;
+goto cleanup;
 }
 virReportError(VIR_ERR_INTERNAL_ERROR,
_(Failed to delete veth device %s), veth);
-return -1;
+goto cleanup;
 }
-return 0;
+
+ret = 0;
+cleanup:
+virCommandFree(cmd);
+return ret;
 }
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


  1   2   3   4   5   >