[PATCH v2] vhost: add vsock compat ioctl
This will allow usage of vsock from 32-bit binaries on a 64-bit kernel. Signed-off-by: Sonny Rao <sonny...@chromium.org> --- drivers/vhost/vsock.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0d14e2ff19f16..ee0c385d9fe54 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -699,12 +699,23 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } +#ifdef CONFIG_COMPAT +static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl, +unsigned long arg) +{ + return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, .release= vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_vsock_dev_compat_ioctl, +#endif }; static struct miscdevice vhost_vsock_misc = { -- 2.13.5
[PATCH v2] vhost: add vsock compat ioctl
This will allow usage of vsock from 32-bit binaries on a 64-bit kernel. Signed-off-by: Sonny Rao --- drivers/vhost/vsock.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0d14e2ff19f16..ee0c385d9fe54 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -699,12 +699,23 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } +#ifdef CONFIG_COMPAT +static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl, +unsigned long arg) +{ + return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg)); +} +#endif + static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, .release= vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhost_vsock_dev_compat_ioctl, +#endif }; static struct miscdevice vhost_vsock_misc = { -- 2.13.5
Re: [PATCH] vhost: add vsock compat ioctl
On Wed, Mar 14, 2018 at 12:05 PM, Michael S. Tsirkin <m...@redhat.com> wrote: > On Wed, Mar 14, 2018 at 10:26:05AM -0700, Sonny Rao wrote: >> This will allow usage of vsock from 32-bit binaries on a 64-bit >> kernel. >> >> Signed-off-by: Sonny Rao <sonny...@chromium.org> > > I think you need to convert the pointer argument though. > Something along the lines of: > > #ifdef CONFIG_COMPAT > static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl, > unsigned long arg) > { > return vhost_vsock_dev_ioctl(f, ioctl, (unsigned > long)compat_ptr(arg)); > } > #endif Ok, thanks for pointing that out -- it has worked for me so far, but I'll re-spin as you suggested. > > > >> --- >> drivers/vhost/vsock.c | 1 + >> 1 file changed, 1 insertion(+) >> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >> index 0d14e2ff19f16..d0e65e92110e5 100644 >> --- a/drivers/vhost/vsock.c >> +++ b/drivers/vhost/vsock.c >> @@ -705,6 +705,7 @@ static const struct file_operations vhost_vsock_fops = { >> .release= vhost_vsock_dev_release, >> .llseek = noop_llseek, >> .unlocked_ioctl = vhost_vsock_dev_ioctl, >> + .compat_ioctl = vhost_vsock_dev_ioctl, >> }; >> >> static struct miscdevice vhost_vsock_misc = { >> -- >> 2.13.5
Re: [PATCH] vhost: add vsock compat ioctl
On Wed, Mar 14, 2018 at 12:05 PM, Michael S. Tsirkin wrote: > On Wed, Mar 14, 2018 at 10:26:05AM -0700, Sonny Rao wrote: >> This will allow usage of vsock from 32-bit binaries on a 64-bit >> kernel. >> >> Signed-off-by: Sonny Rao > > I think you need to convert the pointer argument though. > Something along the lines of: > > #ifdef CONFIG_COMPAT > static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl, > unsigned long arg) > { > return vhost_vsock_dev_ioctl(f, ioctl, (unsigned > long)compat_ptr(arg)); > } > #endif Ok, thanks for pointing that out -- it has worked for me so far, but I'll re-spin as you suggested. > > > >> --- >> drivers/vhost/vsock.c | 1 + >> 1 file changed, 1 insertion(+) >> >> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >> index 0d14e2ff19f16..d0e65e92110e5 100644 >> --- a/drivers/vhost/vsock.c >> +++ b/drivers/vhost/vsock.c >> @@ -705,6 +705,7 @@ static const struct file_operations vhost_vsock_fops = { >> .release= vhost_vsock_dev_release, >> .llseek = noop_llseek, >> .unlocked_ioctl = vhost_vsock_dev_ioctl, >> + .compat_ioctl = vhost_vsock_dev_ioctl, >> }; >> >> static struct miscdevice vhost_vsock_misc = { >> -- >> 2.13.5
[PATCH] vhost: add vsock compat ioctl
This will allow usage of vsock from 32-bit binaries on a 64-bit kernel. Signed-off-by: Sonny Rao <sonny...@chromium.org> --- drivers/vhost/vsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0d14e2ff19f16..d0e65e92110e5 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -705,6 +705,7 @@ static const struct file_operations vhost_vsock_fops = { .release= vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, + .compat_ioctl = vhost_vsock_dev_ioctl, }; static struct miscdevice vhost_vsock_misc = { -- 2.13.5
[PATCH] vhost: add vsock compat ioctl
This will allow usage of vsock from 32-bit binaries on a 64-bit kernel. Signed-off-by: Sonny Rao --- drivers/vhost/vsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 0d14e2ff19f16..d0e65e92110e5 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -705,6 +705,7 @@ static const struct file_operations vhost_vsock_fops = { .release= vhost_vsock_dev_release, .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, + .compat_ioctl = vhost_vsock_dev_ioctl, }; static struct miscdevice vhost_vsock_misc = { -- 2.13.5
[PATCH] vhost: fix vhost ioctl signature to build with clang
Clang is particularly anal about signed vs unsigned comparisons and doesn't like the fact that some ioctl numbers set the MSB, so we get this error when trying to build vhost on aarch64: drivers/vhost/vhost.c:1400:7: error: overflow converting case value to switch condition type (3221794578 to 18446744072636378898) [-Werror, -Wswitch] case VHOST_GET_VRING_BASE: 3221794578 is 0xC008AF12 in hex 18446744072636378898 is 0xC008AF12 in hex Fix this by using unsigned ints in the function signature for vhost_vring_ioctl(). Signed-off-by: Sonny Rao <sonny...@chromium.org> --- drivers/vhost/vhost.c | 2 +- drivers/vhost/vhost.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1b3e8d2d5c8b4..5316319d84081 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1337,7 +1337,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EFAULT; } -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ac4b6056f19ae..d8ee85ae8fdcc 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -45,7 +45,7 @@ void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work); -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); struct vhost_log { u64 addr; @@ -177,7 +177,7 @@ void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); -- 2.13.5
[PATCH] vhost: fix vhost ioctl signature to build with clang
Clang is particularly anal about signed vs unsigned comparisons and doesn't like the fact that some ioctl numbers set the MSB, so we get this error when trying to build vhost on aarch64: drivers/vhost/vhost.c:1400:7: error: overflow converting case value to switch condition type (3221794578 to 18446744072636378898) [-Werror, -Wswitch] case VHOST_GET_VRING_BASE: 3221794578 is 0xC008AF12 in hex 18446744072636378898 is 0xC008AF12 in hex Fix this by using unsigned ints in the function signature for vhost_vring_ioctl(). Signed-off-by: Sonny Rao --- drivers/vhost/vhost.c | 2 +- drivers/vhost/vhost.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1b3e8d2d5c8b4..5316319d84081 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1337,7 +1337,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EFAULT; } -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; bool pollstart = false, pollstop = false; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ac4b6056f19ae..d8ee85ae8fdcc 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -45,7 +45,7 @@ void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll); void vhost_poll_queue(struct vhost_poll *poll); void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work); -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); struct vhost_log { u64 addr; @@ -177,7 +177,7 @@ void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *); void vhost_dev_cleanup(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); -long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp); +long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); -- 2.13.5
Re: [PATCH RFC v2] Add /proc/pid/smaps_rollup
On Thu, Aug 10, 2017 at 3:58 AM, Michal Hockowrote: > On Thu 10-08-17 03:23:23, Daniel Colascione wrote: >> Thanks for taking a look at the patch! >> >> On Thu, Aug 10 2017, Michal Hocko wrote: >> > [CC linux-api - the patch was posted here >> > http://lkml.kernel.org/r/20170810001557.147285-1-dan...@google.com] >> > >> > On Thu 10-08-17 13:38:31, Minchan Kim wrote: >> >> On Wed, Aug 09, 2017 at 05:15:57PM -0700, Daniel Colascione wrote: >> >> > /proc/pid/smaps_rollup is a new proc file that improves the >> >> > performance of user programs that determine aggregate memory >> >> > statistics (e.g., total PSS) of a process. >> >> > >> >> > Android regularly "samples" the memory usage of various processes in >> >> > order to balance its memory pool sizes. This sampling process involves >> >> > opening /proc/pid/smaps and summing certain fields. For very large >> >> > processes, sampling memory use this way can take several hundred >> >> > milliseconds, due mostly to the overhead of the seq_printf calls in >> >> > task_mmu.c. >> > >> > Have you tried to reduce that overhead? E.g. by replacing seq_printf by >> > something more simple >> > http://lkml.kernel.org/r/20160817130320.gc20...@dhcp22.suse.cz? >> >> I haven't tried that yet, but if I'm reading that thread correctly, it >> looks like using more efficient printing primitives gives us a 7% >> speedup. The smaps_rollup patch gives us a much bigger speedup while >> reusing almost all the smaps code, so it seems easier and simpler than a >> bunch of incremental improvements to smaps. And even an efficient smaps >> would have to push 2MB through seq_file for the 3000-VMA process case. > > The thing is that more users would benefit from a more efficient > /proc/pid/smaps call. Maybe we can use some caching tricks etc... We > should make sure that existing options should be attempted before a new > user visible interface is added. It is kind of sad that the real work > (pte walk) is less expensive than formating the output and copying it to > the userspace... > >> > How often you you need to read this information? >> >> It varies depending on how often processes change state. We sample a >> short time (tens of seconds) after processes change state (e.g., enters >> foreground) and every few minutes thereafter. We're particularly >> concerned from an energy perspective about needlessly burning CPU on >> background samples. > > Please make sure this is documented in the patch along with some numbers > ideally. > > [...] > >> >> FYI, there was trial but got failed at that time so in this time, >> >> https://marc.info/?l=linux-kernel=147310650003277=2 >> >> http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1229163.html >> > >> > Yes I really disliked the previous attempt and this one is not all that >> > better. The primary unanswered question back then was a relevant >> > usecase. Back then it was argued [1] that PSS was useful for userspace >> > OOM handling but arguments were rather dubious. Follow up questions [2] >> > shown that the useage of PSS was very workload specific. Minchan has >> > noted some usecase as well but not very specific either. >> >> Anyway, I see what you mean about PSS being iffy for user-space OOM >> processing (because PSS doesn't tell you how much memory you get back in >> exchange for killing a given process at a particular moment). We're not >> using it like that. >> >> Instead, we're using the PSS samples we collect asynchronously for >> system-management tasks like fine-tuning oom_adj_score, memory use >> tracking for debugging, application-level memory-use attribution, and >> deciding whether we want to kill large processes during system idle >> maintenance windows. Android has been using PSS for these purposes for a >> long time; as the average process VMA count has increased and and >> devices become more efficiency-conscious, PSS-collection inefficiency >> has started to matter more. IMHO, it'd be a lot safer to optimize the >> existing PSS-collection model, which has been fine-tuned over the years, >> instead of changing the memory tracking approach entirely to work around >> smaps-generation inefficiency. > > This is really vague. Please be more specific. I actually think this is really similar to the Chrome OS use case -- we need to do proper accounting of memory from user space, and we need something more accurate than what we have now (usually RSS) to figure it out. I'm not sure what is vague about that statement? PSS is not perfect but in closed systems where we have some knowledge about what is being shared amongst process, PSS is much better than RSS and readily available. So, I disagree that this is a dubious usage -- if there's a better metric for making this kind of decision, please share it. Also I realized there's another argument for presenting this information outside of smaps which is that we expose far less information about a process and it's address space via something like
Re: [PATCH RFC v2] Add /proc/pid/smaps_rollup
On Thu, Aug 10, 2017 at 3:58 AM, Michal Hocko wrote: > On Thu 10-08-17 03:23:23, Daniel Colascione wrote: >> Thanks for taking a look at the patch! >> >> On Thu, Aug 10 2017, Michal Hocko wrote: >> > [CC linux-api - the patch was posted here >> > http://lkml.kernel.org/r/20170810001557.147285-1-dan...@google.com] >> > >> > On Thu 10-08-17 13:38:31, Minchan Kim wrote: >> >> On Wed, Aug 09, 2017 at 05:15:57PM -0700, Daniel Colascione wrote: >> >> > /proc/pid/smaps_rollup is a new proc file that improves the >> >> > performance of user programs that determine aggregate memory >> >> > statistics (e.g., total PSS) of a process. >> >> > >> >> > Android regularly "samples" the memory usage of various processes in >> >> > order to balance its memory pool sizes. This sampling process involves >> >> > opening /proc/pid/smaps and summing certain fields. For very large >> >> > processes, sampling memory use this way can take several hundred >> >> > milliseconds, due mostly to the overhead of the seq_printf calls in >> >> > task_mmu.c. >> > >> > Have you tried to reduce that overhead? E.g. by replacing seq_printf by >> > something more simple >> > http://lkml.kernel.org/r/20160817130320.gc20...@dhcp22.suse.cz? >> >> I haven't tried that yet, but if I'm reading that thread correctly, it >> looks like using more efficient printing primitives gives us a 7% >> speedup. The smaps_rollup patch gives us a much bigger speedup while >> reusing almost all the smaps code, so it seems easier and simpler than a >> bunch of incremental improvements to smaps. And even an efficient smaps >> would have to push 2MB through seq_file for the 3000-VMA process case. > > The thing is that more users would benefit from a more efficient > /proc/pid/smaps call. Maybe we can use some caching tricks etc... We > should make sure that existing options should be attempted before a new > user visible interface is added. It is kind of sad that the real work > (pte walk) is less expensive than formating the output and copying it to > the userspace... > >> > How often you you need to read this information? >> >> It varies depending on how often processes change state. We sample a >> short time (tens of seconds) after processes change state (e.g., enters >> foreground) and every few minutes thereafter. We're particularly >> concerned from an energy perspective about needlessly burning CPU on >> background samples. > > Please make sure this is documented in the patch along with some numbers > ideally. > > [...] > >> >> FYI, there was trial but got failed at that time so in this time, >> >> https://marc.info/?l=linux-kernel=147310650003277=2 >> >> http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1229163.html >> > >> > Yes I really disliked the previous attempt and this one is not all that >> > better. The primary unanswered question back then was a relevant >> > usecase. Back then it was argued [1] that PSS was useful for userspace >> > OOM handling but arguments were rather dubious. Follow up questions [2] >> > shown that the useage of PSS was very workload specific. Minchan has >> > noted some usecase as well but not very specific either. >> >> Anyway, I see what you mean about PSS being iffy for user-space OOM >> processing (because PSS doesn't tell you how much memory you get back in >> exchange for killing a given process at a particular moment). We're not >> using it like that. >> >> Instead, we're using the PSS samples we collect asynchronously for >> system-management tasks like fine-tuning oom_adj_score, memory use >> tracking for debugging, application-level memory-use attribution, and >> deciding whether we want to kill large processes during system idle >> maintenance windows. Android has been using PSS for these purposes for a >> long time; as the average process VMA count has increased and and >> devices become more efficiency-conscious, PSS-collection inefficiency >> has started to matter more. IMHO, it'd be a lot safer to optimize the >> existing PSS-collection model, which has been fine-tuned over the years, >> instead of changing the memory tracking approach entirely to work around >> smaps-generation inefficiency. > > This is really vague. Please be more specific. I actually think this is really similar to the Chrome OS use case -- we need to do proper accounting of memory from user space, and we need something more accurate than what we have now (usually RSS) to figure it out. I'm not sure what is vague about that statement? PSS is not perfect but in closed systems where we have some knowledge about what is being shared amongst process, PSS is much better than RSS and readily available. So, I disagree that this is a dubious usage -- if there's a better metric for making this kind of decision, please share it. Also I realized there's another argument for presenting this information outside of smaps which is that we expose far less information about a process and it's address space via something like this, so it's much
Re: [PATCH v4 RESEND 1/2] Documentation: tpm: add powered-while-suspended binding documentation
On Mon, Jul 3, 2017 at 5:57 AM, Jarkko Sakkinen <jarkko.sakki...@linux.intel.com> wrote: > On Tue, Jun 27, 2017 at 12:27:23PM +0200, Enric Balletbo i Serra wrote: >> Add a new powered-while-suspended property to control the behavior of the >> TPM suspend/resume. >> >> Signed-off-by: Enric Balletbo i Serra <enric.balle...@collabora.com> >> Signed-off-by: Sonny Rao <sonny...@chromium.org> >> Reviewed-by: Jason Gunthorpe <jguntho...@obsidianresearch.com> >> Reviewed-by: Jarkko Sakkinen <jarkko.sakki...@linux.intel.com> >> Acked-by: Rob Herring <r...@kernel.org> >> --- >> Changes since v3. >> - Rebased on top of linux-next >> Rob Herring >> - Split DT binding from code patch as is preferred. >> >> Did not exist on previous versions. >> >> Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt | 6 ++ >> 1 file changed, 6 insertions(+) >> >> diff --git a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> index 8cb638b..85c8216 100644 >> --- a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> +++ b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> @@ -8,6 +8,12 @@ Required properties: >> the firmware event log >> - linux,sml-size : size of the memory allocated for the firmware event log >> >> +Optional properties: >> + >> +- powered-while-suspended: present when the TPM is left powered on between >> + suspend and resume (makes the suspend/resume >> + callbacks do nothing). >> + >> Example (for OpenPower Systems with Nuvoton TPM 2.0 on I2C) >> -- >> >> -- >> 2.9.3 >> > > So... should I apply this? Hi, since you applied the code part, it would make sense to apply the documentation too. > > /Jarkko
Re: [PATCH v4 RESEND 1/2] Documentation: tpm: add powered-while-suspended binding documentation
On Mon, Jul 3, 2017 at 5:57 AM, Jarkko Sakkinen wrote: > On Tue, Jun 27, 2017 at 12:27:23PM +0200, Enric Balletbo i Serra wrote: >> Add a new powered-while-suspended property to control the behavior of the >> TPM suspend/resume. >> >> Signed-off-by: Enric Balletbo i Serra >> Signed-off-by: Sonny Rao >> Reviewed-by: Jason Gunthorpe >> Reviewed-by: Jarkko Sakkinen >> Acked-by: Rob Herring >> --- >> Changes since v3. >> - Rebased on top of linux-next >> Rob Herring >> - Split DT binding from code patch as is preferred. >> >> Did not exist on previous versions. >> >> Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt | 6 ++ >> 1 file changed, 6 insertions(+) >> >> diff --git a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> index 8cb638b..85c8216 100644 >> --- a/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> +++ b/Documentation/devicetree/bindings/security/tpm/tpm-i2c.txt >> @@ -8,6 +8,12 @@ Required properties: >> the firmware event log >> - linux,sml-size : size of the memory allocated for the firmware event log >> >> +Optional properties: >> + >> +- powered-while-suspended: present when the TPM is left powered on between >> + suspend and resume (makes the suspend/resume >> + callbacks do nothing). >> + >> Example (for OpenPower Systems with Nuvoton TPM 2.0 on I2C) >> -- >> >> -- >> 2.9.3 >> > > So... should I apply this? Hi, since you applied the code part, it would make sense to apply the documentation too. > > /Jarkko
Re: [tpmdd-devel] [PATCH] tpm: do not suspend/resume if power stays on
On Wed, Mar 1, 2017 at 3:18 PM, Jason Gunthorpe <jguntho...@obsidianresearch.com> wrote: > On Wed, Mar 01, 2017 at 02:39:09PM -0800, Sonny Rao wrote: > >> > We recently added global suspend/resume callbacks to the TPM >> > core. Those call backs do not power off the TPM, they just prepare its >> > internal state to loose power to the chip. Skipping that process on >> > hardware that does not power-off the TPM makes sense to me. >> > >> > But, Sonny, perhaps this should be a global flag in tpm_chip, not a >> > per-interface-driver override? >> >> It's a property of the board design not the chip -- maybe I'm >> misunderstanding? > > I mean do not add the code to handle this to tpm_i2c_infineon.c but in > the common chip code instead. > > tpm_i2c_infineon.c should only parse DT properties that are relavent > to the bus that delivers commands to the TPM, things that apply to how > a TPM chip operates should be handled in the core code because they > apply to any command transport bus. Oh right, sorry -- yes this makes perfect sense. > > Jason
Re: [tpmdd-devel] [PATCH] tpm: do not suspend/resume if power stays on
On Wed, Mar 1, 2017 at 3:18 PM, Jason Gunthorpe wrote: > On Wed, Mar 01, 2017 at 02:39:09PM -0800, Sonny Rao wrote: > >> > We recently added global suspend/resume callbacks to the TPM >> > core. Those call backs do not power off the TPM, they just prepare its >> > internal state to loose power to the chip. Skipping that process on >> > hardware that does not power-off the TPM makes sense to me. >> > >> > But, Sonny, perhaps this should be a global flag in tpm_chip, not a >> > per-interface-driver override? >> >> It's a property of the board design not the chip -- maybe I'm >> misunderstanding? > > I mean do not add the code to handle this to tpm_i2c_infineon.c but in > the common chip code instead. > > tpm_i2c_infineon.c should only parse DT properties that are relavent > to the bus that delivers commands to the TPM, things that apply to how > a TPM chip operates should be handled in the core code because they > apply to any command transport bus. Oh right, sorry -- yes this makes perfect sense. > > Jason
Re: [tpmdd-devel] [PATCH] tpm: do not suspend/resume if power stays on
On Wed, Mar 1, 2017 at 10:43 AM, Jason Gunthorpewrote: >> > +Optional properties: >> > +- powered-while-suspended: present when the TPM is left powered on between >> > + suspend and resume (makes the suspend/resume callbacks do nothing). >> >> This reads like configuration rather than a HW property. > > I read this to mean the HW does not cut power to the TPM when Linux > does 'suspend'. That's correct, it is a hardware property describing whether power is removed during suspend. > > We recently added global suspend/resume callbacks to the TPM > core. Those call backs do not power off the TPM, they just prepare its > internal state to loose power to the chip. Skipping that process on > hardware that does not power-off the TPM makes sense to me. > > But, Sonny, perhaps this should be a global flag in tpm_chip, not a > per-interface-driver override? It's a property of the board design not the chip -- maybe I'm misunderstanding? > > Jason
Re: [tpmdd-devel] [PATCH] tpm: do not suspend/resume if power stays on
On Wed, Mar 1, 2017 at 10:43 AM, Jason Gunthorpe wrote: >> > +Optional properties: >> > +- powered-while-suspended: present when the TPM is left powered on between >> > + suspend and resume (makes the suspend/resume callbacks do nothing). >> >> This reads like configuration rather than a HW property. > > I read this to mean the HW does not cut power to the TPM when Linux > does 'suspend'. That's correct, it is a hardware property describing whether power is removed during suspend. > > We recently added global suspend/resume callbacks to the TPM > core. Those call backs do not power off the TPM, they just prepare its > internal state to loose power to the chip. Skipping that process on > hardware that does not power-off the TPM makes sense to me. > > But, Sonny, perhaps this should be a global flag in tpm_chip, not a > per-interface-driver override? It's a property of the board design not the chip -- maybe I'm misunderstanding? > > Jason
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 19, 2016 at 5:27 PM, Robert Foss <robert.f...@collabora.com> wrote: > > > On 2016-09-19 03:32 PM, Michal Hocko wrote: >> >> On Mon 19-09-16 11:16:31, Robert Foss wrote: >>> >>> On 2016-09-14 05:12 AM, Michal Hocko wrote: >>>> >>>> On Tue 13-09-16 13:27:39, Sonny Rao wrote: >> >> [...] >>>>> >>>>> Given that smaps >>>>> doesn't provide this in a straightforward way, what do you think is >>>>> the right way to provide this information? >>>> >>>> >>>> I would be tempted to sneak it into /proc//statm because that looks >>>> like a proper place but getting this information is not for free >>>> performance wise so I am not really sure something that relies on this >>>> file would see unexpected stalls. Maybe this could be worked around by >>>> some caching... I would suggest to check who is actually using this file >>>> (top/ps etc...) >>> >>> >>> What would this caching look like? Can any information be re-used between >>> vma walks? >> >> >> yes basically return the same value if called within HZ or something >> similar. But that assumes that statm latency really matters and it is >> called often enough. > > > Any single application querying more often than HZ, would presumably do so > for accuracy reasons. > However for multiple applications that combined query more often than HZ, > this would most definitely be halpful in terms of performance. > > @Sonny, does chromiumos fall into the first or second category? It's a single application -- and it definitely doesn't query at HZ -- especially given how long it takes to gather the data :-)
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 19, 2016 at 5:27 PM, Robert Foss wrote: > > > On 2016-09-19 03:32 PM, Michal Hocko wrote: >> >> On Mon 19-09-16 11:16:31, Robert Foss wrote: >>> >>> On 2016-09-14 05:12 AM, Michal Hocko wrote: >>>> >>>> On Tue 13-09-16 13:27:39, Sonny Rao wrote: >> >> [...] >>>>> >>>>> Given that smaps >>>>> doesn't provide this in a straightforward way, what do you think is >>>>> the right way to provide this information? >>>> >>>> >>>> I would be tempted to sneak it into /proc//statm because that looks >>>> like a proper place but getting this information is not for free >>>> performance wise so I am not really sure something that relies on this >>>> file would see unexpected stalls. Maybe this could be worked around by >>>> some caching... I would suggest to check who is actually using this file >>>> (top/ps etc...) >>> >>> >>> What would this caching look like? Can any information be re-used between >>> vma walks? >> >> >> yes basically return the same value if called within HZ or something >> similar. But that assumes that statm latency really matters and it is >> called often enough. > > > Any single application querying more often than HZ, would presumably do so > for accuracy reasons. > However for multiple applications that combined query more often than HZ, > this would most definitely be halpful in terms of performance. > > @Sonny, does chromiumos fall into the first or second category? It's a single application -- and it definitely doesn't query at HZ -- especially given how long it takes to gather the data :-)
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 19, 2016 at 12:56 PM, Jann Horn <j...@thejh.net> wrote: > On Mon, Sep 19, 2016 at 09:51:13PM +0200, Michal Hocko wrote: >> [not sure why the CC list was trimmed - do no do that please unless you >> have a strong reason for that - if this was not intentional please >> restpre it] > > Ah, sorry, pressed the wrong key. > > >> On Mon 19-09-16 21:40:01, Jann Horn wrote: >> > On Mon, Sep 19, 2016 at 09:32:38PM +0200, Michal Hocko wrote: >> > > On Mon 19-09-16 11:16:31, Robert Foss wrote: >> > > > On 2016-09-14 05:12 AM, Michal Hocko wrote: >> > > > > On Tue 13-09-16 13:27:39, Sonny Rao wrote: >> > > [...] >> > > > > > Given that smaps >> > > > > > doesn't provide this in a straightforward way, what do you think is >> > > > > > the right way to provide this information? >> > > > > >> > > > > I would be tempted to sneak it into /proc//statm because that >> > > > > looks >> > > > > like a proper place but getting this information is not for free >> > > > > performance wise so I am not really sure something that relies on >> > > > > this >> > > > > file would see unexpected stalls. Maybe this could be worked around >> > > > > by >> > > > > some caching... I would suggest to check who is actually using this >> > > > > file >> > > > > (top/ps etc...) >> > > > >> > > > What would this caching look like? Can any information be re-used >> > > > between >> > > > vma walks? >> > > >> > > yes basically return the same value if called within HZ or something >> > > similar. But that assumes that statm latency really matters and it is >> > > called often enough. >> > >> > That sounds horrible. If some application decides that they want to check >> > statm directly after some action or so (like after program startup), this >> > is >> > going to give them a very bad time. That probably doesn't happen >> > often - but still. >> > >> > I can already imagine some developer going "yeah, that usleep()... that's >> > because the kernel API returns stale information for a couple milliseconds >> > after we do something *shrug*". >> > >> > What are you trying to optimize for? Ten users on the same machine, each of >> > which is running "top" because it looks so great? >> >> Please try to read what I wrote again. I didn't say this would be >> needed. The idea was that _if_ /proc//statm is used very _often_ >> than some caching might help to reduce the overhead. Especially when you >> consider that the information is not precise anyway. It can change >> anytime while you are doing the address space walk. Just thinking out loud here -- I haven't looked closely at the code so please bear with me :-) Instead of checking when the last read was and returning old data, what about a scheme where we still have a timestamp for last stat read on and any changes to that address space invalidate the timestamp. The invalidation could be racy because we're not too concerned about immediate accuracy -- so just a write. The main issue I could see which this is that it could cause the cacheline holding this timestamp to bounce around a lot? Maybe there's an existing solution in the page table locking that could be leveraged here to at least maintain whatever scalability enhancements are present for this type of situation where there are many updates happening in parallel.
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 19, 2016 at 12:56 PM, Jann Horn wrote: > On Mon, Sep 19, 2016 at 09:51:13PM +0200, Michal Hocko wrote: >> [not sure why the CC list was trimmed - do no do that please unless you >> have a strong reason for that - if this was not intentional please >> restpre it] > > Ah, sorry, pressed the wrong key. > > >> On Mon 19-09-16 21:40:01, Jann Horn wrote: >> > On Mon, Sep 19, 2016 at 09:32:38PM +0200, Michal Hocko wrote: >> > > On Mon 19-09-16 11:16:31, Robert Foss wrote: >> > > > On 2016-09-14 05:12 AM, Michal Hocko wrote: >> > > > > On Tue 13-09-16 13:27:39, Sonny Rao wrote: >> > > [...] >> > > > > > Given that smaps >> > > > > > doesn't provide this in a straightforward way, what do you think is >> > > > > > the right way to provide this information? >> > > > > >> > > > > I would be tempted to sneak it into /proc//statm because that >> > > > > looks >> > > > > like a proper place but getting this information is not for free >> > > > > performance wise so I am not really sure something that relies on >> > > > > this >> > > > > file would see unexpected stalls. Maybe this could be worked around >> > > > > by >> > > > > some caching... I would suggest to check who is actually using this >> > > > > file >> > > > > (top/ps etc...) >> > > > >> > > > What would this caching look like? Can any information be re-used >> > > > between >> > > > vma walks? >> > > >> > > yes basically return the same value if called within HZ or something >> > > similar. But that assumes that statm latency really matters and it is >> > > called often enough. >> > >> > That sounds horrible. If some application decides that they want to check >> > statm directly after some action or so (like after program startup), this >> > is >> > going to give them a very bad time. That probably doesn't happen >> > often - but still. >> > >> > I can already imagine some developer going "yeah, that usleep()... that's >> > because the kernel API returns stale information for a couple milliseconds >> > after we do something *shrug*". >> > >> > What are you trying to optimize for? Ten users on the same machine, each of >> > which is running "top" because it looks so great? >> >> Please try to read what I wrote again. I didn't say this would be >> needed. The idea was that _if_ /proc//statm is used very _often_ >> than some caching might help to reduce the overhead. Especially when you >> consider that the information is not precise anyway. It can change >> anytime while you are doing the address space walk. Just thinking out loud here -- I haven't looked closely at the code so please bear with me :-) Instead of checking when the last read was and returning old data, what about a scheme where we still have a timestamp for last stat read on and any changes to that address space invalidate the timestamp. The invalidation could be racy because we're not too concerned about immediate accuracy -- so just a write. The main issue I could see which this is that it could cause the cacheline holding this timestamp to bounce around a lot? Maybe there's an existing solution in the page table locking that could be leveraged here to at least maintain whatever scalability enhancements are present for this type of situation where there are many updates happening in parallel.
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Tue, Sep 13, 2016 at 12:12 AM, Michal Hocko <mho...@kernel.org> wrote: > On Mon 12-09-16 10:28:53, Sonny Rao wrote: >> On Mon, Sep 12, 2016 at 10:15 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Mon 12-09-16 08:31:36, Sonny Rao wrote: > [...] >> >> but how about the other fields like Swap, Private_Dirty and >> >> Private_Shared? >> > >> > Private_Shared can be pretty confusing as well without the whole context >> > as well see my other emails in the original thread (just to remind >> > shmem/tmpfs makes all this really confusing). >> >> But this is exactly the issue -- RSS is can be just as confusing if >> you don't know something about the application. > > I agree that rss can be confusing but we will not make the situation any > better if we add yet another confusing metric. > >> I think the issue is >> how common that situation is, and you seem to believe that it's so >> uncommon that it's actually better to keep the information more >> difficult to get for those of us who know something about our systems. >> >> That's fine, I guess we just have to disagree here, thanks for look at this. > > I think you should just step back and think more about what exactly > you expect from the counter(s). I believe what you want is an > estimate of a freeable memory when the particular process dies or is > killed. That would mean resident single mapped private anonymous memory > + unlinked single mapped shareable mappings + single mapped swapped out > memory. Maybe I've missed something but it should be something along > those lines. Definitely something that the current smaps infrastructure > doesn't give you, though. Yes your description of what we want is pretty good. Having a reasonable lower bound on the estimate is fine, though we probably want to break out swapped out memory separately. Given that smaps doesn't provide this in a straightforward way, what do you think is the right way to provide this information? > -- > Michal Hocko > SUSE Labs
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Tue, Sep 13, 2016 at 12:12 AM, Michal Hocko wrote: > On Mon 12-09-16 10:28:53, Sonny Rao wrote: >> On Mon, Sep 12, 2016 at 10:15 AM, Michal Hocko wrote: >> > On Mon 12-09-16 08:31:36, Sonny Rao wrote: > [...] >> >> but how about the other fields like Swap, Private_Dirty and >> >> Private_Shared? >> > >> > Private_Shared can be pretty confusing as well without the whole context >> > as well see my other emails in the original thread (just to remind >> > shmem/tmpfs makes all this really confusing). >> >> But this is exactly the issue -- RSS is can be just as confusing if >> you don't know something about the application. > > I agree that rss can be confusing but we will not make the situation any > better if we add yet another confusing metric. > >> I think the issue is >> how common that situation is, and you seem to believe that it's so >> uncommon that it's actually better to keep the information more >> difficult to get for those of us who know something about our systems. >> >> That's fine, I guess we just have to disagree here, thanks for look at this. > > I think you should just step back and think more about what exactly > you expect from the counter(s). I believe what you want is an > estimate of a freeable memory when the particular process dies or is > killed. That would mean resident single mapped private anonymous memory > + unlinked single mapped shareable mappings + single mapped swapped out > memory. Maybe I've missed something but it should be something along > those lines. Definitely something that the current smaps infrastructure > doesn't give you, though. Yes your description of what we want is pretty good. Having a reasonable lower bound on the estimate is fine, though we probably want to break out swapped out memory separately. Given that smaps doesn't provide this in a straightforward way, what do you think is the right way to provide this information? > -- > Michal Hocko > SUSE Labs
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 12, 2016 at 10:15 AM, Michal Hocko <mho...@kernel.org> wrote: > On Mon 12-09-16 08:31:36, Sonny Rao wrote: >> On Mon, Sep 12, 2016 at 5:02 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Mon 05-09-16 16:14:06, robert.f...@collabora.com wrote: >> >> From: Robert Foss <robert.f...@collabora.com> >> >> >> >> This series provides the /proc/PID/totmaps feature, which >> >> summarizes the information provided by /proc/PID/smaps for >> >> improved performance and usability reasons. >> >> >> >> A use case is to speed up monitoring of memory consumption in >> >> environments where RSS isn't precise. >> >> >> >> For example Chrome tends to many processes which have hundreds of VMAs >> >> with a substantial amount of shared memory, and the error of using >> >> RSS rather than PSS tends to be very large when looking at overall >> >> memory consumption. PSS isn't kept as a single number that's exported >> >> like RSS, so to calculate PSS means having to parse a very large smaps >> >> file. >> >> >> >> This process is slow and has to be repeated for many processes, and we >> >> found that the just act of doing the parsing was taking up a >> >> significant amount of CPU time, so this patch is an attempt to make >> >> that process cheaper. >> > >> > I still maintain my concerns about a single pss value. It might work in >> > a very specific situations where the consumer knows what is shared but >> > other than that the value can be more misleading than helpful. So a NACK >> > from me until I am shown that this is usable in general and still >> > helpful. >> >> I know you think Pss isn't useful in general (though I'll point out >> two other independent people said they found it useful) > > sure, and one of them admitted that the value is useful because they > _know_ the resource. The other was quite vague for me to understand > all the details. Please, try to understand that once you provide a user > API then it will carved in stone. If the interface is poor and ambigous > it will bite us later. One very specific usecase doesn't justify > something that might be really misleading for 90% of cases. > >> but how about the other fields like Swap, Private_Dirty and >> Private_Shared? > > Private_Shared can be pretty confusing as well without the whole context > as well see my other emails in the original thread (just to remind > shmem/tmpfs makes all this really confusing). But this is exactly the issue -- RSS is can be just as confusing if you don't know something about the application. I think the issue is how common that situation is, and you seem to believe that it's so uncommon that it's actually better to keep the information more difficult to get for those of us who know something about our systems. That's fine, I guess we just have to disagree here, thanks for look at this. > > -- > Michal Hocko > SUSE Labs
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 12, 2016 at 10:15 AM, Michal Hocko wrote: > On Mon 12-09-16 08:31:36, Sonny Rao wrote: >> On Mon, Sep 12, 2016 at 5:02 AM, Michal Hocko wrote: >> > On Mon 05-09-16 16:14:06, robert.f...@collabora.com wrote: >> >> From: Robert Foss >> >> >> >> This series provides the /proc/PID/totmaps feature, which >> >> summarizes the information provided by /proc/PID/smaps for >> >> improved performance and usability reasons. >> >> >> >> A use case is to speed up monitoring of memory consumption in >> >> environments where RSS isn't precise. >> >> >> >> For example Chrome tends to many processes which have hundreds of VMAs >> >> with a substantial amount of shared memory, and the error of using >> >> RSS rather than PSS tends to be very large when looking at overall >> >> memory consumption. PSS isn't kept as a single number that's exported >> >> like RSS, so to calculate PSS means having to parse a very large smaps >> >> file. >> >> >> >> This process is slow and has to be repeated for many processes, and we >> >> found that the just act of doing the parsing was taking up a >> >> significant amount of CPU time, so this patch is an attempt to make >> >> that process cheaper. >> > >> > I still maintain my concerns about a single pss value. It might work in >> > a very specific situations where the consumer knows what is shared but >> > other than that the value can be more misleading than helpful. So a NACK >> > from me until I am shown that this is usable in general and still >> > helpful. >> >> I know you think Pss isn't useful in general (though I'll point out >> two other independent people said they found it useful) > > sure, and one of them admitted that the value is useful because they > _know_ the resource. The other was quite vague for me to understand > all the details. Please, try to understand that once you provide a user > API then it will carved in stone. If the interface is poor and ambigous > it will bite us later. One very specific usecase doesn't justify > something that might be really misleading for 90% of cases. > >> but how about the other fields like Swap, Private_Dirty and >> Private_Shared? > > Private_Shared can be pretty confusing as well without the whole context > as well see my other emails in the original thread (just to remind > shmem/tmpfs makes all this really confusing). But this is exactly the issue -- RSS is can be just as confusing if you don't know something about the application. I think the issue is how common that situation is, and you seem to believe that it's so uncommon that it's actually better to keep the information more difficult to get for those of us who know something about our systems. That's fine, I guess we just have to disagree here, thanks for look at this. > > -- > Michal Hocko > SUSE Labs
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 12, 2016 at 5:02 AM, Michal Hockowrote: > On Mon 05-09-16 16:14:06, robert.f...@collabora.com wrote: >> From: Robert Foss >> >> This series provides the /proc/PID/totmaps feature, which >> summarizes the information provided by /proc/PID/smaps for >> improved performance and usability reasons. >> >> A use case is to speed up monitoring of memory consumption in >> environments where RSS isn't precise. >> >> For example Chrome tends to many processes which have hundreds of VMAs >> with a substantial amount of shared memory, and the error of using >> RSS rather than PSS tends to be very large when looking at overall >> memory consumption. PSS isn't kept as a single number that's exported >> like RSS, so to calculate PSS means having to parse a very large smaps >> file. >> >> This process is slow and has to be repeated for many processes, and we >> found that the just act of doing the parsing was taking up a >> significant amount of CPU time, so this patch is an attempt to make >> that process cheaper. > > I still maintain my concerns about a single pss value. It might work in > a very specific situations where the consumer knows what is shared but > other than that the value can be more misleading than helpful. So a NACK > from me until I am shown that this is usable in general and still > helpful. I know you think Pss isn't useful in general (though I'll point out two other independent people said they found it useful) but how about the other fields like Swap, Private_Dirty and Private_Shared? If we removed Pss would you still NACK it? > > -- > Michal Hocko > SUSE Labs
Re: [PATCH v5 0/3] mm, proc: Implement /proc//totmaps
On Mon, Sep 12, 2016 at 5:02 AM, Michal Hocko wrote: > On Mon 05-09-16 16:14:06, robert.f...@collabora.com wrote: >> From: Robert Foss >> >> This series provides the /proc/PID/totmaps feature, which >> summarizes the information provided by /proc/PID/smaps for >> improved performance and usability reasons. >> >> A use case is to speed up monitoring of memory consumption in >> environments where RSS isn't precise. >> >> For example Chrome tends to many processes which have hundreds of VMAs >> with a substantial amount of shared memory, and the error of using >> RSS rather than PSS tends to be very large when looking at overall >> memory consumption. PSS isn't kept as a single number that's exported >> like RSS, so to calculate PSS means having to parse a very large smaps >> file. >> >> This process is slow and has to be repeated for many processes, and we >> found that the just act of doing the parsing was taking up a >> significant amount of CPU time, so this patch is an attempt to make >> that process cheaper. > > I still maintain my concerns about a single pss value. It might work in > a very specific situations where the consumer knows what is shared but > other than that the value can be more misleading than helpful. So a NACK > from me until I am shown that this is usable in general and still > helpful. I know you think Pss isn't useful in general (though I'll point out two other independent people said they found it useful) but how about the other fields like Swap, Private_Dirty and Private_Shared? If we removed Pss would you still NACK it? > > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Mon, Aug 22, 2016 at 12:54 AM, Michal Hocko <mho...@kernel.org> wrote: > On Fri 19-08-16 10:57:48, Sonny Rao wrote: >> On Fri, Aug 19, 2016 at 12:59 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Thu 18-08-16 23:43:39, Sonny Rao wrote: >> >> On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko <mho...@kernel.org> wrote: >> >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> >> >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> >> >> >> wrote: >> >> >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> >> > [...] >> >> >> >> 2) User space OOM handling -- we'd rather do a more graceful >> >> >> >> shutdown >> >> >> >> than let the kernel's OOM killer activate and need to gather this >> >> >> >> information and we'd like to be able to get this information to make >> >> >> >> the decision much faster than 400ms >> >> >> > >> >> >> > Global OOM handling in userspace is really dubious if you ask me. I >> >> >> > understand you want something better than SIGKILL and in fact this is >> >> >> > already possible with memory cgroup controller (btw. memcg will give >> >> >> > you a cheap access to rss, amount of shared, swapped out memory as >> >> >> > well). Anyway if you are getting close to the OOM your system will >> >> >> > most >> >> >> > probably be really busy and chances are that also reading your new >> >> >> > file >> >> >> > will take much more time. I am also not quite sure how is pss useful >> >> >> > for >> >> >> > oom decisions. >> >> >> >> >> >> I mentioned it before, but based on experience RSS just isn't good >> >> >> enough -- there's too much sharing going on in our use case to make >> >> >> the correct decision based on RSS. If RSS were good enough, simply >> >> >> put, this patch wouldn't exist. >> >> > >> >> > But that doesn't answer my question, I am afraid. So how exactly do you >> >> > use pss for oom decisions? >> >> >> >> We use PSS to calculate the memory used by a process among all the >> >> processes in the system, in the case of Chrome this tells us how much >> >> each renderer process (which is roughly tied to a particular "tab" in >> >> Chrome) is using and how much it has swapped out, so we know what the >> >> worst offenders are -- I'm not sure what's unclear about that? >> > >> > So let me ask more specifically. How can you make any decision based on >> > the pss when you do not know _what_ is the shared resource. In other >> > words if you select a task to terminate based on the pss then you have to >> > kill others who share the same resource otherwise you do not release >> > that shared resource. Not to mention that such a shared resource might >> > be on tmpfs/shmem and it won't get released even after all processes >> > which map it are gone. >> >> Ok I see why you're confused now, sorry. >> >> In our case that we do know what is being shared in general because >> the sharing is mostly between those processes that we're looking at >> and not other random processes or tmpfs, so PSS gives us useful data >> in the context of these processes which are sharing the data >> especially for monitoring between the set of these renderer processes. > > OK, I see and agree that pss might be useful when you _know_ what is > shared. But this sounds quite specific to a particular workload. How > many users are in a similar situation? In other words, if we present > a single number without the context, how much useful it will be in > general? Is it possible that presenting such a number could be even > misleading for somebody who doesn't have an idea which resources are > shared? These are all questions which should be answered before we > actually add this number (be it a new/existing proc file or a syscall). > I still believe that the number without wider context is just not all > that useful. I see the specific point about PSS -- because you need to know what is being shared or otherwise use it in a whole system context, but I still think the whole system context is a valid and generally useful thing. But what about the private_clean and private_dirty? Surely those are more generally useful for
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Mon, Aug 22, 2016 at 12:54 AM, Michal Hocko wrote: > On Fri 19-08-16 10:57:48, Sonny Rao wrote: >> On Fri, Aug 19, 2016 at 12:59 AM, Michal Hocko wrote: >> > On Thu 18-08-16 23:43:39, Sonny Rao wrote: >> >> On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko wrote: >> >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> >> >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko >> >> >> wrote: >> >> >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> >> > [...] >> >> >> >> 2) User space OOM handling -- we'd rather do a more graceful >> >> >> >> shutdown >> >> >> >> than let the kernel's OOM killer activate and need to gather this >> >> >> >> information and we'd like to be able to get this information to make >> >> >> >> the decision much faster than 400ms >> >> >> > >> >> >> > Global OOM handling in userspace is really dubious if you ask me. I >> >> >> > understand you want something better than SIGKILL and in fact this is >> >> >> > already possible with memory cgroup controller (btw. memcg will give >> >> >> > you a cheap access to rss, amount of shared, swapped out memory as >> >> >> > well). Anyway if you are getting close to the OOM your system will >> >> >> > most >> >> >> > probably be really busy and chances are that also reading your new >> >> >> > file >> >> >> > will take much more time. I am also not quite sure how is pss useful >> >> >> > for >> >> >> > oom decisions. >> >> >> >> >> >> I mentioned it before, but based on experience RSS just isn't good >> >> >> enough -- there's too much sharing going on in our use case to make >> >> >> the correct decision based on RSS. If RSS were good enough, simply >> >> >> put, this patch wouldn't exist. >> >> > >> >> > But that doesn't answer my question, I am afraid. So how exactly do you >> >> > use pss for oom decisions? >> >> >> >> We use PSS to calculate the memory used by a process among all the >> >> processes in the system, in the case of Chrome this tells us how much >> >> each renderer process (which is roughly tied to a particular "tab" in >> >> Chrome) is using and how much it has swapped out, so we know what the >> >> worst offenders are -- I'm not sure what's unclear about that? >> > >> > So let me ask more specifically. How can you make any decision based on >> > the pss when you do not know _what_ is the shared resource. In other >> > words if you select a task to terminate based on the pss then you have to >> > kill others who share the same resource otherwise you do not release >> > that shared resource. Not to mention that such a shared resource might >> > be on tmpfs/shmem and it won't get released even after all processes >> > which map it are gone. >> >> Ok I see why you're confused now, sorry. >> >> In our case that we do know what is being shared in general because >> the sharing is mostly between those processes that we're looking at >> and not other random processes or tmpfs, so PSS gives us useful data >> in the context of these processes which are sharing the data >> especially for monitoring between the set of these renderer processes. > > OK, I see and agree that pss might be useful when you _know_ what is > shared. But this sounds quite specific to a particular workload. How > many users are in a similar situation? In other words, if we present > a single number without the context, how much useful it will be in > general? Is it possible that presenting such a number could be even > misleading for somebody who doesn't have an idea which resources are > shared? These are all questions which should be answered before we > actually add this number (be it a new/existing proc file or a syscall). > I still believe that the number without wider context is just not all > that useful. I see the specific point about PSS -- because you need to know what is being shared or otherwise use it in a whole system context, but I still think the whole system context is a valid and generally useful thing. But what about the private_clean and private_dirty? Surely those are more generally useful for calculating a lower bound on process memory usage without additional knowledge? At the end of th
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Fri, Aug 19, 2016 at 1:05 AM, Michal Hocko <mho...@kernel.org> wrote: > On Fri 19-08-16 11:26:34, Minchan Kim wrote: >> Hi Michal, >> >> On Thu, Aug 18, 2016 at 08:01:04PM +0200, Michal Hocko wrote: >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> > > On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: >> > > > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> > [...] >> > > >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> > > >> than let the kernel's OOM killer activate and need to gather this >> > > >> information and we'd like to be able to get this information to make >> > > >> the decision much faster than 400ms >> > > > >> > > > Global OOM handling in userspace is really dubious if you ask me. I >> > > > understand you want something better than SIGKILL and in fact this is >> > > > already possible with memory cgroup controller (btw. memcg will give >> > > > you a cheap access to rss, amount of shared, swapped out memory as >> > > > well). Anyway if you are getting close to the OOM your system will most >> > > > probably be really busy and chances are that also reading your new file >> > > > will take much more time. I am also not quite sure how is pss useful >> > > > for >> > > > oom decisions. >> > > >> > > I mentioned it before, but based on experience RSS just isn't good >> > > enough -- there's too much sharing going on in our use case to make >> > > the correct decision based on RSS. If RSS were good enough, simply >> > > put, this patch wouldn't exist. >> > >> > But that doesn't answer my question, I am afraid. So how exactly do you >> > use pss for oom decisions? >> >> My case is not for OOM decision but I agree it would be great if we can get >> *fast* smap summary information. >> >> PSS is really great tool to figure out how processes consume memory >> more exactly rather than RSS. We have been used it for monitoring >> of memory for per-process. Although it is not used for OOM decision, >> it would be great if it is speed up because we don't want to spend >> many CPU time for just monitoring. >> >> For our usecase, we don't need AnonHugePages, ShmemPmdMapped, Shared_Hugetlb, >> Private_Hugetlb, KernelPageSize, MMUPageSize because we never enable THP and >> hugetlb. Additionally, Locked can be known via vma flags so we don't need it, >> either. Even, we don't need address range for just monitoring when we don't >> investigate in detail. >> >> Although they are not severe overhead, why does it emit the useless >> information? Even bloat day by day. :( With that, userspace tools should >> spend more time to parse which is pointless. > > So far it doesn't really seem that the parsing is the biggest problem. > The major cycles killer is the output formatting and that doesn't sound > like a problem we are not able to address. And I would even argue that > we want to address it in a generic way as much as possible. > >> Having said that, I'm not fan of creating new stat knob for that, either. >> How about appending summary information in the end of smap? >> So, monitoring users can just open the file and lseek to the (end - 1) and >> read the summary only. > > That might confuse existing parsers. Besides that we already have > /proc//statm which gives cumulative numbers already. I am not sure > how often it is used and whether the pte walk is too expensive for > existing users but that should be explored and evaluated before a new > file is created. > > The /proc became a dump of everything people found interesting just > because we were to easy to allow those additions. Do not repeat those > mistakes, please! Another thing I noticed was that we lock down smaps on Chromium OS. I think this is to avoid exposing more information than necessary via proc. The totmaps file gives us just the information we need and nothing else. I certainly don't think we need a proc file for this use case -- do you think a new system call is better or something else? > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Fri, Aug 19, 2016 at 1:05 AM, Michal Hocko wrote: > On Fri 19-08-16 11:26:34, Minchan Kim wrote: >> Hi Michal, >> >> On Thu, Aug 18, 2016 at 08:01:04PM +0200, Michal Hocko wrote: >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> > > On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: >> > > > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> > [...] >> > > >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> > > >> than let the kernel's OOM killer activate and need to gather this >> > > >> information and we'd like to be able to get this information to make >> > > >> the decision much faster than 400ms >> > > > >> > > > Global OOM handling in userspace is really dubious if you ask me. I >> > > > understand you want something better than SIGKILL and in fact this is >> > > > already possible with memory cgroup controller (btw. memcg will give >> > > > you a cheap access to rss, amount of shared, swapped out memory as >> > > > well). Anyway if you are getting close to the OOM your system will most >> > > > probably be really busy and chances are that also reading your new file >> > > > will take much more time. I am also not quite sure how is pss useful >> > > > for >> > > > oom decisions. >> > > >> > > I mentioned it before, but based on experience RSS just isn't good >> > > enough -- there's too much sharing going on in our use case to make >> > > the correct decision based on RSS. If RSS were good enough, simply >> > > put, this patch wouldn't exist. >> > >> > But that doesn't answer my question, I am afraid. So how exactly do you >> > use pss for oom decisions? >> >> My case is not for OOM decision but I agree it would be great if we can get >> *fast* smap summary information. >> >> PSS is really great tool to figure out how processes consume memory >> more exactly rather than RSS. We have been used it for monitoring >> of memory for per-process. Although it is not used for OOM decision, >> it would be great if it is speed up because we don't want to spend >> many CPU time for just monitoring. >> >> For our usecase, we don't need AnonHugePages, ShmemPmdMapped, Shared_Hugetlb, >> Private_Hugetlb, KernelPageSize, MMUPageSize because we never enable THP and >> hugetlb. Additionally, Locked can be known via vma flags so we don't need it, >> either. Even, we don't need address range for just monitoring when we don't >> investigate in detail. >> >> Although they are not severe overhead, why does it emit the useless >> information? Even bloat day by day. :( With that, userspace tools should >> spend more time to parse which is pointless. > > So far it doesn't really seem that the parsing is the biggest problem. > The major cycles killer is the output formatting and that doesn't sound > like a problem we are not able to address. And I would even argue that > we want to address it in a generic way as much as possible. > >> Having said that, I'm not fan of creating new stat knob for that, either. >> How about appending summary information in the end of smap? >> So, monitoring users can just open the file and lseek to the (end - 1) and >> read the summary only. > > That might confuse existing parsers. Besides that we already have > /proc//statm which gives cumulative numbers already. I am not sure > how often it is used and whether the pte walk is too expensive for > existing users but that should be explored and evaluated before a new > file is created. > > The /proc became a dump of everything people found interesting just > because we were to easy to allow those additions. Do not repeat those > mistakes, please! Another thing I noticed was that we lock down smaps on Chromium OS. I think this is to avoid exposing more information than necessary via proc. The totmaps file gives us just the information we need and nothing else. I certainly don't think we need a proc file for this use case -- do you think a new system call is better or something else? > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Fri, Aug 19, 2016 at 12:59 AM, Michal Hocko <mho...@kernel.org> wrote: > On Thu 18-08-16 23:43:39, Sonny Rao wrote: >> On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: >> >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> > [...] >> >> >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> >> >> than let the kernel's OOM killer activate and need to gather this >> >> >> information and we'd like to be able to get this information to make >> >> >> the decision much faster than 400ms >> >> > >> >> > Global OOM handling in userspace is really dubious if you ask me. I >> >> > understand you want something better than SIGKILL and in fact this is >> >> > already possible with memory cgroup controller (btw. memcg will give >> >> > you a cheap access to rss, amount of shared, swapped out memory as >> >> > well). Anyway if you are getting close to the OOM your system will most >> >> > probably be really busy and chances are that also reading your new file >> >> > will take much more time. I am also not quite sure how is pss useful for >> >> > oom decisions. >> >> >> >> I mentioned it before, but based on experience RSS just isn't good >> >> enough -- there's too much sharing going on in our use case to make >> >> the correct decision based on RSS. If RSS were good enough, simply >> >> put, this patch wouldn't exist. >> > >> > But that doesn't answer my question, I am afraid. So how exactly do you >> > use pss for oom decisions? >> >> We use PSS to calculate the memory used by a process among all the >> processes in the system, in the case of Chrome this tells us how much >> each renderer process (which is roughly tied to a particular "tab" in >> Chrome) is using and how much it has swapped out, so we know what the >> worst offenders are -- I'm not sure what's unclear about that? > > So let me ask more specifically. How can you make any decision based on > the pss when you do not know _what_ is the shared resource. In other > words if you select a task to terminate based on the pss then you have to > kill others who share the same resource otherwise you do not release > that shared resource. Not to mention that such a shared resource might > be on tmpfs/shmem and it won't get released even after all processes > which map it are gone. Ok I see why you're confused now, sorry. In our case that we do know what is being shared in general because the sharing is mostly between those processes that we're looking at and not other random processes or tmpfs, so PSS gives us useful data in the context of these processes which are sharing the data especially for monitoring between the set of these renderer processes. We also use the private clean and private dirty and swap fields to make a few metrics for the processes and charge each process for it's private, shared, and swap data. Private clean and dirty are used for estimating a lower bound on how much memory would be freed. Swap and PSS also give us some indication of additional memory which might get freed up. > > I am sorry for being dense but it is still not clear to me how the > single pss number can be used for oom or, in general, any serious > decisions. The counter might be useful of course for debugging purposes > or to have a general overview but then arguing about 40 vs 20ms sounds a > bit strange to me. Yeah so it's more than just the single PSS number, it's PSS, Private_Clean, Private_dirty, Swap are all interesting numbers to make these decisions. > >> Chrome tends to use a lot of shared memory so we found PSS to be >> better than RSS, and I can give you examples of the RSS and PSS on >> real systems to illustrate the magnitude of the difference between >> those two numbers if that would be useful. >> >> > >> >> So even with memcg I think we'd have the same problem? >> > >> > memcg will give you instant anon, shared counters for all processes in >> > the memcg. >> > >> >> We want to be able to get per-process granularity quickly. I'm not >> sure if memcg provides that exactly? > > I will give you that information if you do process-per-memcg but that > doesn't sound ideal. I thought those 20-something processes you were > talking about are treated together but it seems I misunderstood. > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Fri, Aug 19, 2016 at 12:59 AM, Michal Hocko wrote: > On Thu 18-08-16 23:43:39, Sonny Rao wrote: >> On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko wrote: >> > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: >> >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> > [...] >> >> >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> >> >> than let the kernel's OOM killer activate and need to gather this >> >> >> information and we'd like to be able to get this information to make >> >> >> the decision much faster than 400ms >> >> > >> >> > Global OOM handling in userspace is really dubious if you ask me. I >> >> > understand you want something better than SIGKILL and in fact this is >> >> > already possible with memory cgroup controller (btw. memcg will give >> >> > you a cheap access to rss, amount of shared, swapped out memory as >> >> > well). Anyway if you are getting close to the OOM your system will most >> >> > probably be really busy and chances are that also reading your new file >> >> > will take much more time. I am also not quite sure how is pss useful for >> >> > oom decisions. >> >> >> >> I mentioned it before, but based on experience RSS just isn't good >> >> enough -- there's too much sharing going on in our use case to make >> >> the correct decision based on RSS. If RSS were good enough, simply >> >> put, this patch wouldn't exist. >> > >> > But that doesn't answer my question, I am afraid. So how exactly do you >> > use pss for oom decisions? >> >> We use PSS to calculate the memory used by a process among all the >> processes in the system, in the case of Chrome this tells us how much >> each renderer process (which is roughly tied to a particular "tab" in >> Chrome) is using and how much it has swapped out, so we know what the >> worst offenders are -- I'm not sure what's unclear about that? > > So let me ask more specifically. How can you make any decision based on > the pss when you do not know _what_ is the shared resource. In other > words if you select a task to terminate based on the pss then you have to > kill others who share the same resource otherwise you do not release > that shared resource. Not to mention that such a shared resource might > be on tmpfs/shmem and it won't get released even after all processes > which map it are gone. Ok I see why you're confused now, sorry. In our case that we do know what is being shared in general because the sharing is mostly between those processes that we're looking at and not other random processes or tmpfs, so PSS gives us useful data in the context of these processes which are sharing the data especially for monitoring between the set of these renderer processes. We also use the private clean and private dirty and swap fields to make a few metrics for the processes and charge each process for it's private, shared, and swap data. Private clean and dirty are used for estimating a lower bound on how much memory would be freed. Swap and PSS also give us some indication of additional memory which might get freed up. > > I am sorry for being dense but it is still not clear to me how the > single pss number can be used for oom or, in general, any serious > decisions. The counter might be useful of course for debugging purposes > or to have a general overview but then arguing about 40 vs 20ms sounds a > bit strange to me. Yeah so it's more than just the single PSS number, it's PSS, Private_Clean, Private_dirty, Swap are all interesting numbers to make these decisions. > >> Chrome tends to use a lot of shared memory so we found PSS to be >> better than RSS, and I can give you examples of the RSS and PSS on >> real systems to illustrate the magnitude of the difference between >> those two numbers if that would be useful. >> >> > >> >> So even with memcg I think we'd have the same problem? >> > >> > memcg will give you instant anon, shared counters for all processes in >> > the memcg. >> > >> >> We want to be able to get per-process granularity quickly. I'm not >> sure if memcg provides that exactly? > > I will give you that information if you do process-per-memcg but that > doesn't sound ideal. I thought those 20-something processes you were > talking about are treated together but it seems I misunderstood. > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 7:26 PM, Minchan Kim <minc...@kernel.org> wrote: > Hi Michal, > > On Thu, Aug 18, 2016 at 08:01:04PM +0200, Michal Hocko wrote: >> On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> > On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: >> > > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> [...] >> > >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> > >> than let the kernel's OOM killer activate and need to gather this >> > >> information and we'd like to be able to get this information to make >> > >> the decision much faster than 400ms >> > > >> > > Global OOM handling in userspace is really dubious if you ask me. I >> > > understand you want something better than SIGKILL and in fact this is >> > > already possible with memory cgroup controller (btw. memcg will give >> > > you a cheap access to rss, amount of shared, swapped out memory as >> > > well). Anyway if you are getting close to the OOM your system will most >> > > probably be really busy and chances are that also reading your new file >> > > will take much more time. I am also not quite sure how is pss useful for >> > > oom decisions. >> > >> > I mentioned it before, but based on experience RSS just isn't good >> > enough -- there's too much sharing going on in our use case to make >> > the correct decision based on RSS. If RSS were good enough, simply >> > put, this patch wouldn't exist. >> >> But that doesn't answer my question, I am afraid. So how exactly do you >> use pss for oom decisions? > > My case is not for OOM decision but I agree it would be great if we can get > *fast* smap summary information. > > PSS is really great tool to figure out how processes consume memory > more exactly rather than RSS. We have been used it for monitoring > of memory for per-process. Although it is not used for OOM decision, > it would be great if it is speed up because we don't want to spend > many CPU time for just monitoring. > > For our usecase, we don't need AnonHugePages, ShmemPmdMapped, Shared_Hugetlb, > Private_Hugetlb, KernelPageSize, MMUPageSize because we never enable THP and > hugetlb. Additionally, Locked can be known via vma flags so we don't need it, > either. Even, we don't need address range for just monitoring when we don't > investigate in detail. > > Although they are not severe overhead, why does it emit the useless > information? Even bloat day by day. :( With that, userspace tools should > spend more time to parse which is pointless. > > Having said that, I'm not fan of creating new stat knob for that, either. > How about appending summary information in the end of smap? > So, monitoring users can just open the file and lseek to the (end - 1) and > read the summary only. > That would work fine for us as long as it's fast -- i.e. we don't still have to do all the expensive per-VMA format conversion in the kernel. > Thanks.
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 7:26 PM, Minchan Kim wrote: > Hi Michal, > > On Thu, Aug 18, 2016 at 08:01:04PM +0200, Michal Hocko wrote: >> On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> > On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: >> > > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> [...] >> > >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> > >> than let the kernel's OOM killer activate and need to gather this >> > >> information and we'd like to be able to get this information to make >> > >> the decision much faster than 400ms >> > > >> > > Global OOM handling in userspace is really dubious if you ask me. I >> > > understand you want something better than SIGKILL and in fact this is >> > > already possible with memory cgroup controller (btw. memcg will give >> > > you a cheap access to rss, amount of shared, swapped out memory as >> > > well). Anyway if you are getting close to the OOM your system will most >> > > probably be really busy and chances are that also reading your new file >> > > will take much more time. I am also not quite sure how is pss useful for >> > > oom decisions. >> > >> > I mentioned it before, but based on experience RSS just isn't good >> > enough -- there's too much sharing going on in our use case to make >> > the correct decision based on RSS. If RSS were good enough, simply >> > put, this patch wouldn't exist. >> >> But that doesn't answer my question, I am afraid. So how exactly do you >> use pss for oom decisions? > > My case is not for OOM decision but I agree it would be great if we can get > *fast* smap summary information. > > PSS is really great tool to figure out how processes consume memory > more exactly rather than RSS. We have been used it for monitoring > of memory for per-process. Although it is not used for OOM decision, > it would be great if it is speed up because we don't want to spend > many CPU time for just monitoring. > > For our usecase, we don't need AnonHugePages, ShmemPmdMapped, Shared_Hugetlb, > Private_Hugetlb, KernelPageSize, MMUPageSize because we never enable THP and > hugetlb. Additionally, Locked can be known via vma flags so we don't need it, > either. Even, we don't need address range for just monitoring when we don't > investigate in detail. > > Although they are not severe overhead, why does it emit the useless > information? Even bloat day by day. :( With that, userspace tools should > spend more time to parse which is pointless. > > Having said that, I'm not fan of creating new stat knob for that, either. > How about appending summary information in the end of smap? > So, monitoring users can just open the file and lseek to the (end - 1) and > read the summary only. > That would work fine for us as long as it's fast -- i.e. we don't still have to do all the expensive per-VMA format conversion in the kernel. > Thanks.
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko <mho...@kernel.org> wrote: > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: > [...] >> >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> >> than let the kernel's OOM killer activate and need to gather this >> >> information and we'd like to be able to get this information to make >> >> the decision much faster than 400ms >> > >> > Global OOM handling in userspace is really dubious if you ask me. I >> > understand you want something better than SIGKILL and in fact this is >> > already possible with memory cgroup controller (btw. memcg will give >> > you a cheap access to rss, amount of shared, swapped out memory as >> > well). Anyway if you are getting close to the OOM your system will most >> > probably be really busy and chances are that also reading your new file >> > will take much more time. I am also not quite sure how is pss useful for >> > oom decisions. >> >> I mentioned it before, but based on experience RSS just isn't good >> enough -- there's too much sharing going on in our use case to make >> the correct decision based on RSS. If RSS were good enough, simply >> put, this patch wouldn't exist. > > But that doesn't answer my question, I am afraid. So how exactly do you > use pss for oom decisions? We use PSS to calculate the memory used by a process among all the processes in the system, in the case of Chrome this tells us how much each renderer process (which is roughly tied to a particular "tab" in Chrome) is using and how much it has swapped out, so we know what the worst offenders are -- I'm not sure what's unclear about that? Chrome tends to use a lot of shared memory so we found PSS to be better than RSS, and I can give you examples of the RSS and PSS on real systems to illustrate the magnitude of the difference between those two numbers if that would be useful. > >> So even with memcg I think we'd have the same problem? > > memcg will give you instant anon, shared counters for all processes in > the memcg. > We want to be able to get per-process granularity quickly. I'm not sure if memcg provides that exactly? >> > Don't take me wrong, /proc//totmaps might be suitable for your >> > specific usecase but so far I haven't heard any sound argument for it to >> > be generally usable. It is true that smaps is unnecessarily costly but >> > at least I can see some room for improvements. A simple patch I've >> > posted cut the formatting overhead by 7%. Maybe we can do more. >> >> It seems like a general problem that if you want these values the >> existing kernel interface can be very expensive, so it would be >> generally usable by any application which wants a per process PSS, >> private data, dirty data or swap value. > > yes this is really unfortunate. And if at all possible we should address > that. Precise values require the expensive rmap walk. We can introduce > some caching to help that. But so far it seems the biggest overhead is > to simply format the output and that should be addressed before any new > proc file is added. > >> I mentioned two use cases, but I guess I don't understand the comment >> about why it's not usable by other use cases. > > I might be wrong here but a use of pss is quite limited and I do not > remember anybody asking for large optimizations in that area. I still do > not understand your use cases properly so I am quite skeptical about a > general usefulness of a new file. How do you know that usage of PSS is quite limited? I can only say that we've been using it on Chromium OS for at least four years and have found it very valuable, and I think I've explained the use cases in this thread. If you have more specific questions then I can try to clarify. > > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 11:01 AM, Michal Hocko wrote: > On Thu 18-08-16 10:47:57, Sonny Rao wrote: >> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: >> > On Wed 17-08-16 11:57:56, Sonny Rao wrote: > [...] >> >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> >> than let the kernel's OOM killer activate and need to gather this >> >> information and we'd like to be able to get this information to make >> >> the decision much faster than 400ms >> > >> > Global OOM handling in userspace is really dubious if you ask me. I >> > understand you want something better than SIGKILL and in fact this is >> > already possible with memory cgroup controller (btw. memcg will give >> > you a cheap access to rss, amount of shared, swapped out memory as >> > well). Anyway if you are getting close to the OOM your system will most >> > probably be really busy and chances are that also reading your new file >> > will take much more time. I am also not quite sure how is pss useful for >> > oom decisions. >> >> I mentioned it before, but based on experience RSS just isn't good >> enough -- there's too much sharing going on in our use case to make >> the correct decision based on RSS. If RSS were good enough, simply >> put, this patch wouldn't exist. > > But that doesn't answer my question, I am afraid. So how exactly do you > use pss for oom decisions? We use PSS to calculate the memory used by a process among all the processes in the system, in the case of Chrome this tells us how much each renderer process (which is roughly tied to a particular "tab" in Chrome) is using and how much it has swapped out, so we know what the worst offenders are -- I'm not sure what's unclear about that? Chrome tends to use a lot of shared memory so we found PSS to be better than RSS, and I can give you examples of the RSS and PSS on real systems to illustrate the magnitude of the difference between those two numbers if that would be useful. > >> So even with memcg I think we'd have the same problem? > > memcg will give you instant anon, shared counters for all processes in > the memcg. > We want to be able to get per-process granularity quickly. I'm not sure if memcg provides that exactly? >> > Don't take me wrong, /proc//totmaps might be suitable for your >> > specific usecase but so far I haven't heard any sound argument for it to >> > be generally usable. It is true that smaps is unnecessarily costly but >> > at least I can see some room for improvements. A simple patch I've >> > posted cut the formatting overhead by 7%. Maybe we can do more. >> >> It seems like a general problem that if you want these values the >> existing kernel interface can be very expensive, so it would be >> generally usable by any application which wants a per process PSS, >> private data, dirty data or swap value. > > yes this is really unfortunate. And if at all possible we should address > that. Precise values require the expensive rmap walk. We can introduce > some caching to help that. But so far it seems the biggest overhead is > to simply format the output and that should be addressed before any new > proc file is added. > >> I mentioned two use cases, but I guess I don't understand the comment >> about why it's not usable by other use cases. > > I might be wrong here but a use of pss is quite limited and I do not > remember anybody asking for large optimizations in that area. I still do > not understand your use cases properly so I am quite skeptical about a > general usefulness of a new file. How do you know that usage of PSS is quite limited? I can only say that we've been using it on Chromium OS for at least four years and have found it very valuable, and I think I've explained the use cases in this thread. If you have more specific questions then I can try to clarify. > > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 2:05 PM, Robert Foss <robert.f...@collabora.com> wrote: > > > On 2016-08-18 02:01 PM, Michal Hocko wrote: >> >> On Thu 18-08-16 10:47:57, Sonny Rao wrote: >>> >>> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: >>>> >>>> On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> >> [...] >>>>> >>>>> 2) User space OOM handling -- we'd rather do a more graceful shutdown >>>>> than let the kernel's OOM killer activate and need to gather this >>>>> information and we'd like to be able to get this information to make >>>>> the decision much faster than 400ms >>>> >>>> >>>> Global OOM handling in userspace is really dubious if you ask me. I >>>> understand you want something better than SIGKILL and in fact this is >>>> already possible with memory cgroup controller (btw. memcg will give >>>> you a cheap access to rss, amount of shared, swapped out memory as >>>> well). Anyway if you are getting close to the OOM your system will most >>>> probably be really busy and chances are that also reading your new file >>>> will take much more time. I am also not quite sure how is pss useful for >>>> oom decisions. >>> >>> >>> I mentioned it before, but based on experience RSS just isn't good >>> enough -- there's too much sharing going on in our use case to make >>> the correct decision based on RSS. If RSS were good enough, simply >>> put, this patch wouldn't exist. >> >> >> But that doesn't answer my question, I am afraid. So how exactly do you >> use pss for oom decisions? >> >>> So even with memcg I think we'd have the same problem? >> >> >> memcg will give you instant anon, shared counters for all processes in >> the memcg. > > > Is it technically feasible to add instant pss support to memcg? > > @Sonny Rao: Would using cgroups be acceptable for chromiumos? It's possible, though I think we'd end up putting each renderer in it's own cgroup to get the PSS stat, so it seems a bit like overkill. I think memcg also has some overhead that we'd need to quantify but I could be mistaken about this. > > >> >>>> Don't take me wrong, /proc//totmaps might be suitable for your >>>> specific usecase but so far I haven't heard any sound argument for it to >>>> be generally usable. It is true that smaps is unnecessarily costly but >>>> at least I can see some room for improvements. A simple patch I've >>>> posted cut the formatting overhead by 7%. Maybe we can do more. >>> >>> >>> It seems like a general problem that if you want these values the >>> existing kernel interface can be very expensive, so it would be >>> generally usable by any application which wants a per process PSS, >>> private data, dirty data or swap value. >> >> >> yes this is really unfortunate. And if at all possible we should address >> that. Precise values require the expensive rmap walk. We can introduce >> some caching to help that. But so far it seems the biggest overhead is >> to simply format the output and that should be addressed before any new >> proc file is added. >> >>> I mentioned two use cases, but I guess I don't understand the comment >>> about why it's not usable by other use cases. >> >> >> I might be wrong here but a use of pss is quite limited and I do not >> remember anybody asking for large optimizations in that area. I still do >> not understand your use cases properly so I am quite skeptical about a >> general usefulness of a new file. >> >
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 2:05 PM, Robert Foss wrote: > > > On 2016-08-18 02:01 PM, Michal Hocko wrote: >> >> On Thu 18-08-16 10:47:57, Sonny Rao wrote: >>> >>> On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: >>>> >>>> On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> >> [...] >>>>> >>>>> 2) User space OOM handling -- we'd rather do a more graceful shutdown >>>>> than let the kernel's OOM killer activate and need to gather this >>>>> information and we'd like to be able to get this information to make >>>>> the decision much faster than 400ms >>>> >>>> >>>> Global OOM handling in userspace is really dubious if you ask me. I >>>> understand you want something better than SIGKILL and in fact this is >>>> already possible with memory cgroup controller (btw. memcg will give >>>> you a cheap access to rss, amount of shared, swapped out memory as >>>> well). Anyway if you are getting close to the OOM your system will most >>>> probably be really busy and chances are that also reading your new file >>>> will take much more time. I am also not quite sure how is pss useful for >>>> oom decisions. >>> >>> >>> I mentioned it before, but based on experience RSS just isn't good >>> enough -- there's too much sharing going on in our use case to make >>> the correct decision based on RSS. If RSS were good enough, simply >>> put, this patch wouldn't exist. >> >> >> But that doesn't answer my question, I am afraid. So how exactly do you >> use pss for oom decisions? >> >>> So even with memcg I think we'd have the same problem? >> >> >> memcg will give you instant anon, shared counters for all processes in >> the memcg. > > > Is it technically feasible to add instant pss support to memcg? > > @Sonny Rao: Would using cgroups be acceptable for chromiumos? It's possible, though I think we'd end up putting each renderer in it's own cgroup to get the PSS stat, so it seems a bit like overkill. I think memcg also has some overhead that we'd need to quantify but I could be mistaken about this. > > >> >>>> Don't take me wrong, /proc//totmaps might be suitable for your >>>> specific usecase but so far I haven't heard any sound argument for it to >>>> be generally usable. It is true that smaps is unnecessarily costly but >>>> at least I can see some room for improvements. A simple patch I've >>>> posted cut the formatting overhead by 7%. Maybe we can do more. >>> >>> >>> It seems like a general problem that if you want these values the >>> existing kernel interface can be very expensive, so it would be >>> generally usable by any application which wants a per process PSS, >>> private data, dirty data or swap value. >> >> >> yes this is really unfortunate. And if at all possible we should address >> that. Precise values require the expensive rmap walk. We can introduce >> some caching to help that. But so far it seems the biggest overhead is >> to simply format the output and that should be addressed before any new >> proc file is added. >> >>> I mentioned two use cases, but I guess I don't understand the comment >>> about why it's not usable by other use cases. >> >> >> I might be wrong here but a use of pss is quite limited and I do not >> remember anybody asking for large optimizations in that area. I still do >> not understand your use cases properly so I am quite skeptical about a >> general usefulness of a new file. >> >
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko <mho...@kernel.org> wrote: > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> On Wed, Aug 17, 2016 at 6:03 AM, Michal Hocko <mho...@kernel.org> wrote: >> > On Wed 17-08-16 11:31:25, Jann Horn wrote: > [...] >> >> That's at least 30.43% + 9.12% + 7.66% = 47.21% of the task's kernel >> >> time spent on evaluating format strings. The new interface >> >> wouldn't have to spend that much time on format strings because there >> >> isn't so much text to format. >> > >> > well, this is true of course but I would much rather try to reduce the >> > overhead of smaps file than add a new file. The following should help >> > already. I've measured ~7% systime cut down. I guess there is still some >> > room for improvements but I have to say I'm far from being convinced about >> > a new proc file just because we suck at dumping information to the >> > userspace. >> > If this was something like /proc//stat which is >> > essentially read all the time then it would be a different question but >> > is the rss, pss going to be all that often? If yes why? >> >> If the question is why do we need to read RSS, PSS, Private_*, Swap >> and the other fields so often? >> >> I have two use cases so far involving monitoring per-process memory >> usage, and we usually need to read stats for about 25 processes. >> >> Here's a timing example on an fairly recent ARM system 4 core RK3288 >> running at 1.8Ghz >> >> localhost ~ # time cat /proc/25946/smaps > /dev/null >> >> real0m0.036s >> user0m0.020s >> sys 0m0.020s >> >> localhost ~ # time cat /proc/25946/totmaps > /dev/null >> >> real0m0.027s >> user0m0.010s >> sys 0m0.010s >> localhost ~ # >> >> I'll ignore the user time for now, and we see about 20 ms of system >> time with smaps and 10 ms with totmaps, with 20 similar processes it >> would be 400 milliseconds of cpu time for the kernel to get this >> information from smaps vs 200 milliseconds with totmaps. Even totmaps >> is still pretty slow, but much better than smaps. >> >> Use cases: >> 1) Basic task monitoring -- like "top" that shows memory consumption >> including PSS, Private, Swap >> 1 second update means about 40% of one CPU is spent in the kernel >> gathering the data with smaps > > I would argue that even 20% is way too much for such a monitoring. What > is the value to do it so often tha 20 vs 40ms really matters? Yeah it is too much (I believe I said that) but it's significantly better. >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> than let the kernel's OOM killer activate and need to gather this >> information and we'd like to be able to get this information to make >> the decision much faster than 400ms > > Global OOM handling in userspace is really dubious if you ask me. I > understand you want something better than SIGKILL and in fact this is > already possible with memory cgroup controller (btw. memcg will give > you a cheap access to rss, amount of shared, swapped out memory as > well). Anyway if you are getting close to the OOM your system will most > probably be really busy and chances are that also reading your new file > will take much more time. I am also not quite sure how is pss useful for > oom decisions. I mentioned it before, but based on experience RSS just isn't good enough -- there's too much sharing going on in our use case to make the correct decision based on RSS. If RSS were good enough, simply put, this patch wouldn't exist. So even with memcg I think we'd have the same problem? > > Don't take me wrong, /proc//totmaps might be suitable for your > specific usecase but so far I haven't heard any sound argument for it to > be generally usable. It is true that smaps is unnecessarily costly but > at least I can see some room for improvements. A simple patch I've > posted cut the formatting overhead by 7%. Maybe we can do more. It seems like a general problem that if you want these values the existing kernel interface can be very expensive, so it would be generally usable by any application which wants a per process PSS, private data, dirty data or swap value. I mentioned two use cases, but I guess I don't understand the comment about why it's not usable by other use cases. > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Thu, Aug 18, 2016 at 12:44 AM, Michal Hocko wrote: > On Wed 17-08-16 11:57:56, Sonny Rao wrote: >> On Wed, Aug 17, 2016 at 6:03 AM, Michal Hocko wrote: >> > On Wed 17-08-16 11:31:25, Jann Horn wrote: > [...] >> >> That's at least 30.43% + 9.12% + 7.66% = 47.21% of the task's kernel >> >> time spent on evaluating format strings. The new interface >> >> wouldn't have to spend that much time on format strings because there >> >> isn't so much text to format. >> > >> > well, this is true of course but I would much rather try to reduce the >> > overhead of smaps file than add a new file. The following should help >> > already. I've measured ~7% systime cut down. I guess there is still some >> > room for improvements but I have to say I'm far from being convinced about >> > a new proc file just because we suck at dumping information to the >> > userspace. >> > If this was something like /proc//stat which is >> > essentially read all the time then it would be a different question but >> > is the rss, pss going to be all that often? If yes why? >> >> If the question is why do we need to read RSS, PSS, Private_*, Swap >> and the other fields so often? >> >> I have two use cases so far involving monitoring per-process memory >> usage, and we usually need to read stats for about 25 processes. >> >> Here's a timing example on an fairly recent ARM system 4 core RK3288 >> running at 1.8Ghz >> >> localhost ~ # time cat /proc/25946/smaps > /dev/null >> >> real0m0.036s >> user0m0.020s >> sys 0m0.020s >> >> localhost ~ # time cat /proc/25946/totmaps > /dev/null >> >> real0m0.027s >> user0m0.010s >> sys 0m0.010s >> localhost ~ # >> >> I'll ignore the user time for now, and we see about 20 ms of system >> time with smaps and 10 ms with totmaps, with 20 similar processes it >> would be 400 milliseconds of cpu time for the kernel to get this >> information from smaps vs 200 milliseconds with totmaps. Even totmaps >> is still pretty slow, but much better than smaps. >> >> Use cases: >> 1) Basic task monitoring -- like "top" that shows memory consumption >> including PSS, Private, Swap >> 1 second update means about 40% of one CPU is spent in the kernel >> gathering the data with smaps > > I would argue that even 20% is way too much for such a monitoring. What > is the value to do it so often tha 20 vs 40ms really matters? Yeah it is too much (I believe I said that) but it's significantly better. >> 2) User space OOM handling -- we'd rather do a more graceful shutdown >> than let the kernel's OOM killer activate and need to gather this >> information and we'd like to be able to get this information to make >> the decision much faster than 400ms > > Global OOM handling in userspace is really dubious if you ask me. I > understand you want something better than SIGKILL and in fact this is > already possible with memory cgroup controller (btw. memcg will give > you a cheap access to rss, amount of shared, swapped out memory as > well). Anyway if you are getting close to the OOM your system will most > probably be really busy and chances are that also reading your new file > will take much more time. I am also not quite sure how is pss useful for > oom decisions. I mentioned it before, but based on experience RSS just isn't good enough -- there's too much sharing going on in our use case to make the correct decision based on RSS. If RSS were good enough, simply put, this patch wouldn't exist. So even with memcg I think we'd have the same problem? > > Don't take me wrong, /proc//totmaps might be suitable for your > specific usecase but so far I haven't heard any sound argument for it to > be generally usable. It is true that smaps is unnecessarily costly but > at least I can see some room for improvements. A simple patch I've > posted cut the formatting overhead by 7%. Maybe we can do more. It seems like a general problem that if you want these values the existing kernel interface can be very expensive, so it would be generally usable by any application which wants a per process PSS, private data, dirty data or swap value. I mentioned two use cases, but I guess I don't understand the comment about why it's not usable by other use cases. > -- > Michal Hocko > SUSE Labs
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Wed, Aug 17, 2016 at 6:03 AM, Michal Hockowrote: > On Wed 17-08-16 11:31:25, Jann Horn wrote: >> On Wed, Aug 17, 2016 at 10:22:00AM +0200, Michal Hocko wrote: >> > On Tue 16-08-16 12:46:51, Robert Foss wrote: >> > [...] >> > > $ /usr/bin/time -v -p zsh -c "repeat 25 { awk '/^Rss/{rss+=\$2} >> > > /^Pss/{pss+=\$2} END {printf \"rss:%d pss:%d\n\", rss, pss}\' >> > > /proc/5025/smaps }" >> > > [...] >> > > Command being timed: "zsh -c repeat 25 { awk '/^Rss/{rss+=$2} >> > > /^Pss/{pss+=$2} END {printf "rss:%d pss:%d\n", rss, pss}\' >> > > /proc/5025/smaps >> > > }" >> > > User time (seconds): 0.37 >> > > System time (seconds): 0.45 >> > > Percent of CPU this job got: 92% >> > > Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.89 >> > >> > This is really unexpected. Where is the user time spent? Anyway, rather >> > than measuring some random processes I've tried to measure something >> > resembling the worst case. So I've created a simple program to mmap as >> > much as possible: >> > >> > #include >> > #include >> > #include >> > #include >> > int main() >> > { >> > while (mmap(NULL, 4096, PROT_READ|PROT_WRITE, >> > MAP_ANON|MAP_SHARED|MAP_POPULATE, -1, 0) != MAP_FAILED) >> > ; >> > >> > printf("pid:%d\n", getpid()); >> > pause(); >> > return 0; >> > } >> >> Ah, nice, that's a reasonable test program. :) >> >> >> > So with a reasonable user space the parsing is really not all that time >> > consuming wrt. smaps handling. That being said I am still very skeptical >> > about a dedicated proc file which accomplishes what userspace can done >> > in a trivial way. >> >> Now, since your numbers showed that all the time is spent in the kernel, >> also create this test program to just read that file over and over again: >> >> $ cat justreadloop.c >> #include >> #include >> #include >> #include >> #include >> #include >> #include >> >> char buf[100]; >> >> int main(int argc, char **argv) { >> printf("pid:%d\n", getpid()); >> while (1) { >> int fd = open(argv[1], O_RDONLY); >> if (fd < 0) continue; >> if (read(fd, buf, sizeof(buf)) < 0) >> err(1, "read"); >> close(fd); >> } >> } >> $ gcc -Wall -o justreadloop justreadloop.c >> $ >> >> Now launch your test: >> >> $ ./mapstuff >> pid:29397 >> >> point justreadloop at it: >> >> $ ./justreadloop /proc/29397/smaps >> pid:32567 >> >> ... and then check the performance stats of justreadloop: >> >> # perf top -p 32567 >> >> This is what I see: >> >> Samples: 232K of event 'cycles:ppp', Event count (approx.): 60448424325 >> Overhead Shared Object Symbol >> 30,43% [kernel] [k] format_decode >>9,12% [kernel] [k] number >>7,66% [kernel] [k] vsnprintf >>7,06% [kernel] [k] __lock_acquire >>3,23% [kernel] [k] lock_release >>2,85% [kernel] [k] debug_lockdep_rcu_enabled >>2,25% [kernel] [k] skip_atoi >>2,13% [kernel] [k] lock_acquire >>2,05% [kernel] [k] show_smap > > This is a lot! I would expect the rmap walk to consume more but it even > doesn't show up in the top consumers. > >> That's at least 30.43% + 9.12% + 7.66% = 47.21% of the task's kernel >> time spent on evaluating format strings. The new interface >> wouldn't have to spend that much time on format strings because there >> isn't so much text to format. > > well, this is true of course but I would much rather try to reduce the > overhead of smaps file than add a new file. The following should help > already. I've measured ~7% systime cut down. I guess there is still some > room for improvements but I have to say I'm far from being convinced about > a new proc file just because we suck at dumping information to the > userspace. > If this was something like /proc//stat which is > essentially read all the time then it would be a different question but > is the rss, pss going to be all that often? If yes why? If the question is why do we need to read RSS, PSS, Private_*, Swap and the other fields so often? I have two use cases so far involving monitoring per-process memory usage, and we usually need to read stats for about 25 processes. Here's a timing example on an fairly recent ARM system 4 core RK3288 running at 1.8Ghz localhost ~ # time cat /proc/25946/smaps > /dev/null real0m0.036s user0m0.020s sys 0m0.020s localhost ~ # time cat /proc/25946/totmaps > /dev/null real0m0.027s user0m0.010s sys 0m0.010s localhost ~ # I'll ignore the user time for now, and we see about 20 ms of system time with smaps and 10 ms with totmaps, with 20 similar processes it would be 400 milliseconds of cpu time for the kernel to get this information from smaps vs 200 milliseconds with totmaps. Even totmaps is still pretty slow, but much better than smaps. Use cases: 1) Basic task monitoring -- like "top" that shows memory consumption including PSS, Private, Swap 1 second update
Re: [PACTH v2 0/3] Implement /proc//totmaps
On Wed, Aug 17, 2016 at 6:03 AM, Michal Hocko wrote: > On Wed 17-08-16 11:31:25, Jann Horn wrote: >> On Wed, Aug 17, 2016 at 10:22:00AM +0200, Michal Hocko wrote: >> > On Tue 16-08-16 12:46:51, Robert Foss wrote: >> > [...] >> > > $ /usr/bin/time -v -p zsh -c "repeat 25 { awk '/^Rss/{rss+=\$2} >> > > /^Pss/{pss+=\$2} END {printf \"rss:%d pss:%d\n\", rss, pss}\' >> > > /proc/5025/smaps }" >> > > [...] >> > > Command being timed: "zsh -c repeat 25 { awk '/^Rss/{rss+=$2} >> > > /^Pss/{pss+=$2} END {printf "rss:%d pss:%d\n", rss, pss}\' >> > > /proc/5025/smaps >> > > }" >> > > User time (seconds): 0.37 >> > > System time (seconds): 0.45 >> > > Percent of CPU this job got: 92% >> > > Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.89 >> > >> > This is really unexpected. Where is the user time spent? Anyway, rather >> > than measuring some random processes I've tried to measure something >> > resembling the worst case. So I've created a simple program to mmap as >> > much as possible: >> > >> > #include >> > #include >> > #include >> > #include >> > int main() >> > { >> > while (mmap(NULL, 4096, PROT_READ|PROT_WRITE, >> > MAP_ANON|MAP_SHARED|MAP_POPULATE, -1, 0) != MAP_FAILED) >> > ; >> > >> > printf("pid:%d\n", getpid()); >> > pause(); >> > return 0; >> > } >> >> Ah, nice, that's a reasonable test program. :) >> >> >> > So with a reasonable user space the parsing is really not all that time >> > consuming wrt. smaps handling. That being said I am still very skeptical >> > about a dedicated proc file which accomplishes what userspace can done >> > in a trivial way. >> >> Now, since your numbers showed that all the time is spent in the kernel, >> also create this test program to just read that file over and over again: >> >> $ cat justreadloop.c >> #include >> #include >> #include >> #include >> #include >> #include >> #include >> >> char buf[100]; >> >> int main(int argc, char **argv) { >> printf("pid:%d\n", getpid()); >> while (1) { >> int fd = open(argv[1], O_RDONLY); >> if (fd < 0) continue; >> if (read(fd, buf, sizeof(buf)) < 0) >> err(1, "read"); >> close(fd); >> } >> } >> $ gcc -Wall -o justreadloop justreadloop.c >> $ >> >> Now launch your test: >> >> $ ./mapstuff >> pid:29397 >> >> point justreadloop at it: >> >> $ ./justreadloop /proc/29397/smaps >> pid:32567 >> >> ... and then check the performance stats of justreadloop: >> >> # perf top -p 32567 >> >> This is what I see: >> >> Samples: 232K of event 'cycles:ppp', Event count (approx.): 60448424325 >> Overhead Shared Object Symbol >> 30,43% [kernel] [k] format_decode >>9,12% [kernel] [k] number >>7,66% [kernel] [k] vsnprintf >>7,06% [kernel] [k] __lock_acquire >>3,23% [kernel] [k] lock_release >>2,85% [kernel] [k] debug_lockdep_rcu_enabled >>2,25% [kernel] [k] skip_atoi >>2,13% [kernel] [k] lock_acquire >>2,05% [kernel] [k] show_smap > > This is a lot! I would expect the rmap walk to consume more but it even > doesn't show up in the top consumers. > >> That's at least 30.43% + 9.12% + 7.66% = 47.21% of the task's kernel >> time spent on evaluating format strings. The new interface >> wouldn't have to spend that much time on format strings because there >> isn't so much text to format. > > well, this is true of course but I would much rather try to reduce the > overhead of smaps file than add a new file. The following should help > already. I've measured ~7% systime cut down. I guess there is still some > room for improvements but I have to say I'm far from being convinced about > a new proc file just because we suck at dumping information to the > userspace. > If this was something like /proc//stat which is > essentially read all the time then it would be a different question but > is the rss, pss going to be all that often? If yes why? If the question is why do we need to read RSS, PSS, Private_*, Swap and the other fields so often? I have two use cases so far involving monitoring per-process memory usage, and we usually need to read stats for about 25 processes. Here's a timing example on an fairly recent ARM system 4 core RK3288 running at 1.8Ghz localhost ~ # time cat /proc/25946/smaps > /dev/null real0m0.036s user0m0.020s sys 0m0.020s localhost ~ # time cat /proc/25946/totmaps > /dev/null real0m0.027s user0m0.010s sys 0m0.010s localhost ~ # I'll ignore the user time for now, and we see about 20 ms of system time with smaps and 10 ms with totmaps, with 20 similar processes it would be 400 milliseconds of cpu time for the kernel to get this information from smaps vs 200 milliseconds with totmaps. Even totmaps is still pretty slow, but much better than smaps. Use cases: 1) Basic task monitoring -- like "top" that shows memory consumption including PSS, Private, Swap 1 second update means about 40% of
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Wed, Aug 10, 2016 at 10:37 AM, Jann Horn <j...@thejh.net> wrote: > On Wed, Aug 10, 2016 at 10:23:53AM -0700, Sonny Rao wrote: >> On Tue, Aug 9, 2016 at 2:01 PM, Robert Foss <robert.f...@collabora.com> >> wrote: >> > >> > >> > On 2016-08-09 03:24 PM, Jann Horn wrote: >> >> >> >> On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >> >>> >> >>> From: Sonny Rao <sonny...@chromium.org> >> >>> >> >>> This is based on earlier work by Thiago Goncales. It implements a new >> >>> per process proc file which summarizes the contents of the smaps file >> >>> but doesn't display any addresses. It gives more detailed information >> >>> than statm like the PSS (proprotional set size). It differs from the >> >>> original implementation in that it doesn't use the full blown set of >> >>> seq operations, uses a different termination condition, and doesn't >> >>> displayed "Locked" as that was broken on the original implemenation. >> >>> >> >>> This new proc file provides information faster than parsing the >> >>> potentially >> >>> huge smaps file. >> >>> >> >>> Signed-off-by: Sonny Rao <sonny...@chromium.org> >> >>> >> >>> Tested-by: Robert Foss <robert.f...@collabora.com> >> >>> Signed-off-by: Robert Foss <robert.f...@collabora.com> >> >> >> >> >> >> >> >>> +static int totmaps_proc_show(struct seq_file *m, void *data) >> >>> +{ >> >>> + struct proc_maps_private *priv = m->private; >> >>> + struct mm_struct *mm; >> >>> + struct vm_area_struct *vma; >> >>> + struct mem_size_stats *mss_sum = priv->mss; >> >>> + >> >>> + /* reference to priv->task already taken */ >> >>> + /* but need to get the mm here because */ >> >>> + /* task could be in the process of exiting */ >> >> >> >> >> >> Can you please elaborate on this? My understanding here is that you >> >> intend for the caller to be able to repeatedly read the same totmaps >> >> file with pread() and still see updated information after the target >> >> process has called execve() and be able to detect process death >> >> (instead of simply seeing stale values). Is that accurate? >> >> >> >> I would prefer it if you could grab a reference to the mm_struct >> >> directly at open time. >> > >> > >> > Sonny, do you know more about the above comment? >> >> I think right now the file gets re-opened every time, but the mode >> where the file is opened once and repeatedly read is interesting >> because it avoids having to open the file again and again. >> >> I guess you could end up with a wierd situation where you don't read >> the entire contents of the file in open call to read() and you might >> get inconsistent data across the different statistics? > > If the file is read in two chunks, totmaps_proc_show is only called > once. The patch specifies seq_read as read handler. Have a look at its > definition. As long as you don't read from the same seq file in > parallel or seek around in it, simple sequential reads will not > re-invoke the show() method for data that has already been formatted. > For partially consumed data, the kernel buffers the rest until someone > reads it or seeks to another offset. Ok that's good. If the consumer were using pread() though, would that look like a seek?
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Wed, Aug 10, 2016 at 10:37 AM, Jann Horn wrote: > On Wed, Aug 10, 2016 at 10:23:53AM -0700, Sonny Rao wrote: >> On Tue, Aug 9, 2016 at 2:01 PM, Robert Foss >> wrote: >> > >> > >> > On 2016-08-09 03:24 PM, Jann Horn wrote: >> >> >> >> On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >> >>> >> >>> From: Sonny Rao >> >>> >> >>> This is based on earlier work by Thiago Goncales. It implements a new >> >>> per process proc file which summarizes the contents of the smaps file >> >>> but doesn't display any addresses. It gives more detailed information >> >>> than statm like the PSS (proprotional set size). It differs from the >> >>> original implementation in that it doesn't use the full blown set of >> >>> seq operations, uses a different termination condition, and doesn't >> >>> displayed "Locked" as that was broken on the original implemenation. >> >>> >> >>> This new proc file provides information faster than parsing the >> >>> potentially >> >>> huge smaps file. >> >>> >> >>> Signed-off-by: Sonny Rao >> >>> >> >>> Tested-by: Robert Foss >> >>> Signed-off-by: Robert Foss >> >> >> >> >> >> >> >>> +static int totmaps_proc_show(struct seq_file *m, void *data) >> >>> +{ >> >>> + struct proc_maps_private *priv = m->private; >> >>> + struct mm_struct *mm; >> >>> + struct vm_area_struct *vma; >> >>> + struct mem_size_stats *mss_sum = priv->mss; >> >>> + >> >>> + /* reference to priv->task already taken */ >> >>> + /* but need to get the mm here because */ >> >>> + /* task could be in the process of exiting */ >> >> >> >> >> >> Can you please elaborate on this? My understanding here is that you >> >> intend for the caller to be able to repeatedly read the same totmaps >> >> file with pread() and still see updated information after the target >> >> process has called execve() and be able to detect process death >> >> (instead of simply seeing stale values). Is that accurate? >> >> >> >> I would prefer it if you could grab a reference to the mm_struct >> >> directly at open time. >> > >> > >> > Sonny, do you know more about the above comment? >> >> I think right now the file gets re-opened every time, but the mode >> where the file is opened once and repeatedly read is interesting >> because it avoids having to open the file again and again. >> >> I guess you could end up with a wierd situation where you don't read >> the entire contents of the file in open call to read() and you might >> get inconsistent data across the different statistics? > > If the file is read in two chunks, totmaps_proc_show is only called > once. The patch specifies seq_read as read handler. Have a look at its > definition. As long as you don't read from the same seq file in > parallel or seek around in it, simple sequential reads will not > re-invoke the show() method for data that has already been formatted. > For partially consumed data, the kernel buffers the rest until someone > reads it or seeks to another offset. Ok that's good. If the consumer were using pread() though, would that look like a seek?
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 2:01 PM, Robert Foss <robert.f...@collabora.com> wrote: > > > On 2016-08-09 03:24 PM, Jann Horn wrote: >> >> On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >>> >>> From: Sonny Rao <sonny...@chromium.org> >>> >>> This is based on earlier work by Thiago Goncales. It implements a new >>> per process proc file which summarizes the contents of the smaps file >>> but doesn't display any addresses. It gives more detailed information >>> than statm like the PSS (proprotional set size). It differs from the >>> original implementation in that it doesn't use the full blown set of >>> seq operations, uses a different termination condition, and doesn't >>> displayed "Locked" as that was broken on the original implemenation. >>> >>> This new proc file provides information faster than parsing the >>> potentially >>> huge smaps file. >>> >>> Signed-off-by: Sonny Rao <sonny...@chromium.org> >>> >>> Tested-by: Robert Foss <robert.f...@collabora.com> >>> Signed-off-by: Robert Foss <robert.f...@collabora.com> >> >> >> >>> +static int totmaps_proc_show(struct seq_file *m, void *data) >>> +{ >>> + struct proc_maps_private *priv = m->private; >>> + struct mm_struct *mm; >>> + struct vm_area_struct *vma; >>> + struct mem_size_stats *mss_sum = priv->mss; >>> + >>> + /* reference to priv->task already taken */ >>> + /* but need to get the mm here because */ >>> + /* task could be in the process of exiting */ >> >> >> Can you please elaborate on this? My understanding here is that you >> intend for the caller to be able to repeatedly read the same totmaps >> file with pread() and still see updated information after the target >> process has called execve() and be able to detect process death >> (instead of simply seeing stale values). Is that accurate? >> >> I would prefer it if you could grab a reference to the mm_struct >> directly at open time. > > > Sonny, do you know more about the above comment? I think right now the file gets re-opened every time, but the mode where the file is opened once and repeatedly read is interesting because it avoids having to open the file again and again. I guess you could end up with a wierd situation where you don't read the entire contents of the file in open call to read() and you might get inconsistent data across the different statistics? > >> >> >>> + mm = get_task_mm(priv->task); >>> + if (!mm || IS_ERR(mm)) >>> + return -EINVAL; >> >> >> get_task_mm() doesn't return error codes, and all other callers just >> check whether the return value is NULL. >> > > I'll have that fixed in v2, thanks for spotting it! > > >> >>> + down_read(>mmap_sem); >>> + hold_task_mempolicy(priv); >>> + >>> + for (vma = mm->mmap; vma != priv->tail_vma; vma = vma->vm_next) { >>> + struct mem_size_stats mss; >>> + struct mm_walk smaps_walk = { >>> + .pmd_entry = smaps_pte_range, >>> + .mm = vma->vm_mm, >>> + .private = , >>> + }; >>> + >>> + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) { >>> + memset(, 0, sizeof(mss)); >>> + walk_page_vma(vma, _walk); >>> + add_smaps_sum(, mss_sum); >>> + } >>> + } >> >> >> E... what? You accumulate values from mem_size_stats items into a >> struct mss_sum that is associated with the struct file? So when you >> read the file the second time, you get the old values plus the new ones? >> And when you read the file in parallel, you get inconsistent values? >> >> For most files in procfs, the behavior is that you can just call >> pread(fd, buf, sizeof(buf), 0) on the same fd again and again, giving >> you the current values every time, without mutating state. I strongly >> recommend that you get rid of priv->mss and just accumulate the state >> in a local variable (maybe one on the stack). > > > So a simple "static struct mem_size_stats" in totmaps_proc_show() would be a > better solution? > >> >> >>> @@ -836,6 +911,50 @@ static int tid
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 2:01 PM, Robert Foss wrote: > > > On 2016-08-09 03:24 PM, Jann Horn wrote: >> >> On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >>> >>> From: Sonny Rao >>> >>> This is based on earlier work by Thiago Goncales. It implements a new >>> per process proc file which summarizes the contents of the smaps file >>> but doesn't display any addresses. It gives more detailed information >>> than statm like the PSS (proprotional set size). It differs from the >>> original implementation in that it doesn't use the full blown set of >>> seq operations, uses a different termination condition, and doesn't >>> displayed "Locked" as that was broken on the original implemenation. >>> >>> This new proc file provides information faster than parsing the >>> potentially >>> huge smaps file. >>> >>> Signed-off-by: Sonny Rao >>> >>> Tested-by: Robert Foss >>> Signed-off-by: Robert Foss >> >> >> >>> +static int totmaps_proc_show(struct seq_file *m, void *data) >>> +{ >>> + struct proc_maps_private *priv = m->private; >>> + struct mm_struct *mm; >>> + struct vm_area_struct *vma; >>> + struct mem_size_stats *mss_sum = priv->mss; >>> + >>> + /* reference to priv->task already taken */ >>> + /* but need to get the mm here because */ >>> + /* task could be in the process of exiting */ >> >> >> Can you please elaborate on this? My understanding here is that you >> intend for the caller to be able to repeatedly read the same totmaps >> file with pread() and still see updated information after the target >> process has called execve() and be able to detect process death >> (instead of simply seeing stale values). Is that accurate? >> >> I would prefer it if you could grab a reference to the mm_struct >> directly at open time. > > > Sonny, do you know more about the above comment? I think right now the file gets re-opened every time, but the mode where the file is opened once and repeatedly read is interesting because it avoids having to open the file again and again. I guess you could end up with a wierd situation where you don't read the entire contents of the file in open call to read() and you might get inconsistent data across the different statistics? > >> >> >>> + mm = get_task_mm(priv->task); >>> + if (!mm || IS_ERR(mm)) >>> + return -EINVAL; >> >> >> get_task_mm() doesn't return error codes, and all other callers just >> check whether the return value is NULL. >> > > I'll have that fixed in v2, thanks for spotting it! > > >> >>> + down_read(>mmap_sem); >>> + hold_task_mempolicy(priv); >>> + >>> + for (vma = mm->mmap; vma != priv->tail_vma; vma = vma->vm_next) { >>> + struct mem_size_stats mss; >>> + struct mm_walk smaps_walk = { >>> + .pmd_entry = smaps_pte_range, >>> + .mm = vma->vm_mm, >>> + .private = , >>> + }; >>> + >>> + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) { >>> + memset(, 0, sizeof(mss)); >>> + walk_page_vma(vma, _walk); >>> + add_smaps_sum(, mss_sum); >>> + } >>> + } >> >> >> E... what? You accumulate values from mem_size_stats items into a >> struct mss_sum that is associated with the struct file? So when you >> read the file the second time, you get the old values plus the new ones? >> And when you read the file in parallel, you get inconsistent values? >> >> For most files in procfs, the behavior is that you can just call >> pread(fd, buf, sizeof(buf), 0) on the same fd again and again, giving >> you the current values every time, without mutating state. I strongly >> recommend that you get rid of priv->mss and just accumulate the state >> in a local variable (maybe one on the stack). > > > So a simple "static struct mem_size_stats" in totmaps_proc_show() would be a > better solution? > >> >> >>> @@ -836,6 +911,50 @@ static int tid_smaps_open(struct inode *inode, >>> struct file *file) >>> return do_maps_open(inode, file, _tid_smaps_op); >>> } >>>
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 12:16 PM, Konstantin Khlebnikov <koc...@gmail.com> wrote: > > On Tue, Aug 9, 2016 at 7:05 PM, <robert.f...@collabora.com> wrote: > > From: Sonny Rao <sonny...@chromium.org> > > > > This is based on earlier work by Thiago Goncales. It implements a new > > per process proc file which summarizes the contents of the smaps file > > but doesn't display any addresses. It gives more detailed information > > than statm like the PSS (proprotional set size). It differs from the > > original implementation in that it doesn't use the full blown set of > > seq operations, uses a different termination condition, and doesn't > > displayed "Locked" as that was broken on the original implemenation. > > > > This new proc file provides information faster than parsing the potentially > > huge smaps file. > > What statistics do you really need? PSS (Proportional Set Size) and related accounting of shared pages (swap could be shared) is where the existing summaries of memory usage are cumbersome. > > > I think, performance and flexibility issues could be really solved only by new > syscall for querying memory statistics for address range in any process: > process_vm_stat() or some kind of pumped fincore() for /proc/$pid/mem That would be a good long term solution if people want similarly complicated statistics without having to iterate through current interfaces. I mentioned monitoring before but I'll add that Proportional Set size, Unique Set Size, Swap are per process are also useful because they help us make better decisions about what processes need to be throttled or gracefully killed. > > > > > Signed-off-by: Sonny Rao <sonny...@chromium.org> > > > > Tested-by: Robert Foss <robert.f...@collabora.com> > > Signed-off-by: Robert Foss <robert.f...@collabora.com> > > > > --- > > fs/proc/base.c | 1 + > > fs/proc/internal.h | 4 ++ > > fs/proc/task_mmu.c | 126 > > + > > 3 files changed, 131 insertions(+) > > > > diff --git a/fs/proc/base.c b/fs/proc/base.c > > index a11eb71..de3acdf 100644 > > --- a/fs/proc/base.c > > +++ b/fs/proc/base.c > > @@ -2855,6 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = { > > REG("clear_refs", S_IWUSR, proc_clear_refs_operations), > > REG("smaps", S_IRUGO, proc_pid_smaps_operations), > > REG("pagemap",S_IRUSR, proc_pagemap_operations), > > + REG("totmaps",S_IRUGO, proc_totmaps_operations), > > #endif > > #ifdef CONFIG_SECURITY > > DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, > > proc_attr_dir_operations), > > diff --git a/fs/proc/internal.h b/fs/proc/internal.h > > index aa27810..6f3540f 100644 > > --- a/fs/proc/internal.h > > +++ b/fs/proc/internal.h > > @@ -58,6 +58,9 @@ union proc_op { > > struct task_struct *task); > > }; > > > > + > > +extern const struct file_operations proc_totmaps_operations; > > + > > struct proc_inode { > > struct pid *pid; > > int fd; > > @@ -281,6 +284,7 @@ struct proc_maps_private { > > struct mm_struct *mm; > > #ifdef CONFIG_MMU > > struct vm_area_struct *tail_vma; > > + struct mem_size_stats *mss; > > #endif > > #ifdef CONFIG_NUMA > > struct mempolicy *task_mempolicy; > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > > index 4648c7f..b61873e 100644 > > --- a/fs/proc/task_mmu.c > > +++ b/fs/proc/task_mmu.c > > @@ -802,6 +802,81 @@ static int show_smap(struct seq_file *m, void *v, int > > is_pid) > > return 0; > > } > > > > +static void add_smaps_sum(struct mem_size_stats *mss, > > + struct mem_size_stats *mss_sum) > > +{ > > + mss_sum->resident += mss->resident; > > + mss_sum->pss += mss->pss; > > + mss_sum->shared_clean += mss->shared_clean; > > + mss_sum->shared_dirty += mss->shared_dirty; > > + mss_sum->private_clean += mss->private_clean; > > + mss_sum->private_dirty += mss->private_dirty; > > + mss_sum->referenced += mss->referenced; > > + mss_sum->anonymous += mss->anonymous; > > + mss_sum->anonymous_thp += mss->anonymous_thp; > > + mss_sum->swap += mss->swap; > > +} > > + > > +static int totmaps_proc_show(struct seq_file *m, void
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 12:16 PM, Konstantin Khlebnikov wrote: > > On Tue, Aug 9, 2016 at 7:05 PM, wrote: > > From: Sonny Rao > > > > This is based on earlier work by Thiago Goncales. It implements a new > > per process proc file which summarizes the contents of the smaps file > > but doesn't display any addresses. It gives more detailed information > > than statm like the PSS (proprotional set size). It differs from the > > original implementation in that it doesn't use the full blown set of > > seq operations, uses a different termination condition, and doesn't > > displayed "Locked" as that was broken on the original implemenation. > > > > This new proc file provides information faster than parsing the potentially > > huge smaps file. > > What statistics do you really need? PSS (Proportional Set Size) and related accounting of shared pages (swap could be shared) is where the existing summaries of memory usage are cumbersome. > > > I think, performance and flexibility issues could be really solved only by new > syscall for querying memory statistics for address range in any process: > process_vm_stat() or some kind of pumped fincore() for /proc/$pid/mem That would be a good long term solution if people want similarly complicated statistics without having to iterate through current interfaces. I mentioned monitoring before but I'll add that Proportional Set size, Unique Set Size, Swap are per process are also useful because they help us make better decisions about what processes need to be throttled or gracefully killed. > > > > > Signed-off-by: Sonny Rao > > > > Tested-by: Robert Foss > > Signed-off-by: Robert Foss > > > > --- > > fs/proc/base.c | 1 + > > fs/proc/internal.h | 4 ++ > > fs/proc/task_mmu.c | 126 > > + > > 3 files changed, 131 insertions(+) > > > > diff --git a/fs/proc/base.c b/fs/proc/base.c > > index a11eb71..de3acdf 100644 > > --- a/fs/proc/base.c > > +++ b/fs/proc/base.c > > @@ -2855,6 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = { > > REG("clear_refs", S_IWUSR, proc_clear_refs_operations), > > REG("smaps", S_IRUGO, proc_pid_smaps_operations), > > REG("pagemap",S_IRUSR, proc_pagemap_operations), > > + REG("totmaps",S_IRUGO, proc_totmaps_operations), > > #endif > > #ifdef CONFIG_SECURITY > > DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, > > proc_attr_dir_operations), > > diff --git a/fs/proc/internal.h b/fs/proc/internal.h > > index aa27810..6f3540f 100644 > > --- a/fs/proc/internal.h > > +++ b/fs/proc/internal.h > > @@ -58,6 +58,9 @@ union proc_op { > > struct task_struct *task); > > }; > > > > + > > +extern const struct file_operations proc_totmaps_operations; > > + > > struct proc_inode { > > struct pid *pid; > > int fd; > > @@ -281,6 +284,7 @@ struct proc_maps_private { > > struct mm_struct *mm; > > #ifdef CONFIG_MMU > > struct vm_area_struct *tail_vma; > > + struct mem_size_stats *mss; > > #endif > > #ifdef CONFIG_NUMA > > struct mempolicy *task_mempolicy; > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > > index 4648c7f..b61873e 100644 > > --- a/fs/proc/task_mmu.c > > +++ b/fs/proc/task_mmu.c > > @@ -802,6 +802,81 @@ static int show_smap(struct seq_file *m, void *v, int > > is_pid) > > return 0; > > } > > > > +static void add_smaps_sum(struct mem_size_stats *mss, > > + struct mem_size_stats *mss_sum) > > +{ > > + mss_sum->resident += mss->resident; > > + mss_sum->pss += mss->pss; > > + mss_sum->shared_clean += mss->shared_clean; > > + mss_sum->shared_dirty += mss->shared_dirty; > > + mss_sum->private_clean += mss->private_clean; > > + mss_sum->private_dirty += mss->private_dirty; > > + mss_sum->referenced += mss->referenced; > > + mss_sum->anonymous += mss->anonymous; > > + mss_sum->anonymous_thp += mss->anonymous_thp; > > + mss_sum->swap += mss->swap; > > +} > > + > > +static int totmaps_proc_show(struct seq_file *m, void *data) > > +{ > > + struct proc_maps_private *priv = m->private; > > + struct mm_struct *mm; > > + struct vm_area_struct *vma; > > +
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 9:58 AM, Alexey Dobriyan <adobri...@gmail.com> wrote: > > On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: > > From: Sonny Rao <sonny...@chromium.org> > > > > This is based on earlier work by Thiago Goncales. It implements a new > > per process proc file which summarizes the contents of the smaps file > > but doesn't display any addresses. It gives more detailed information > > than statm like the PSS (proprotional set size). It differs from the > > original implementation in that it doesn't use the full blown set of > > seq operations, uses a different termination condition, and doesn't > > displayed "Locked" as that was broken on the original implemenation. > > > > This new proc file provides information faster than parsing the potentially > > huge smaps file. > > You can "parse" /proc/*/pagemap . RSS, swap are there. /proc/*pagemap is generally restricted and I don't believe it would quickly give PSS. > > So which ones do you really need? PSS and Swap are the most important. RSS isn't precise enough because it counts shared pages fully, and there tends to be a lot of sharing. > Why the separate anon hugepages and anon regular pages? I'm not sure if it's necessary, but that's how it's broken out in smaps. > > > + seq_printf(m, > > +"Rss:%8lu kB\n" > > +"Pss:%8lu kB\n" > > +"Shared_Clean: %8lu kB\n" > > +"Shared_Dirty: %8lu kB\n" > > +"Private_Clean: %8lu kB\n" > > +"Private_Dirty: %8lu kB\n" > > +"Referenced: %8lu kB\n" > > +"Anonymous: %8lu kB\n" > > +"AnonHugePages: %8lu kB\n" > > +"Swap: %8lu kB\n", > > +mss_sum->resident >> 10, > > +(unsigned long)(mss_sum->pss >> (10 + PSS_SHIFT)), > > +mss_sum->shared_clean >> 10, > > +mss_sum->shared_dirty >> 10, > > +mss_sum->private_clean >> 10, > > +mss_sum->private_dirty >> 10, > > +mss_sum->referenced >> 10, > > +mss_sum->anonymous >> 10, > > +mss_sum->anonymous_thp >> 10, > > +mss_sum->swap >> 10);
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 9:58 AM, Alexey Dobriyan wrote: > > On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: > > From: Sonny Rao > > > > This is based on earlier work by Thiago Goncales. It implements a new > > per process proc file which summarizes the contents of the smaps file > > but doesn't display any addresses. It gives more detailed information > > than statm like the PSS (proprotional set size). It differs from the > > original implementation in that it doesn't use the full blown set of > > seq operations, uses a different termination condition, and doesn't > > displayed "Locked" as that was broken on the original implemenation. > > > > This new proc file provides information faster than parsing the potentially > > huge smaps file. > > You can "parse" /proc/*/pagemap . RSS, swap are there. /proc/*pagemap is generally restricted and I don't believe it would quickly give PSS. > > So which ones do you really need? PSS and Swap are the most important. RSS isn't precise enough because it counts shared pages fully, and there tends to be a lot of sharing. > Why the separate anon hugepages and anon regular pages? I'm not sure if it's necessary, but that's how it's broken out in smaps. > > > + seq_printf(m, > > +"Rss:%8lu kB\n" > > +"Pss:%8lu kB\n" > > +"Shared_Clean: %8lu kB\n" > > +"Shared_Dirty: %8lu kB\n" > > +"Private_Clean: %8lu kB\n" > > +"Private_Dirty: %8lu kB\n" > > +"Referenced: %8lu kB\n" > > +"Anonymous: %8lu kB\n" > > +"AnonHugePages: %8lu kB\n" > > +"Swap: %8lu kB\n", > > +mss_sum->resident >> 10, > > +(unsigned long)(mss_sum->pss >> (10 + PSS_SHIFT)), > > +mss_sum->shared_clean >> 10, > > +mss_sum->shared_dirty >> 10, > > +mss_sum->private_clean >> 10, > > +mss_sum->private_dirty >> 10, > > +mss_sum->referenced >> 10, > > +mss_sum->anonymous >> 10, > > +mss_sum->anonymous_thp >> 10, > > +mss_sum->swap >> 10);
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 9:29 AM, Mateusz Guzik <mgu...@redhat.com> wrote: > On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >> From: Sonny Rao <sonny...@chromium.org> >> >> This is based on earlier work by Thiago Goncales. It implements a new >> per process proc file which summarizes the contents of the smaps file >> but doesn't display any addresses. It gives more detailed information >> than statm like the PSS (proprotional set size). It differs from the >> original implementation in that it doesn't use the full blown set of >> seq operations, uses a different termination condition, and doesn't >> displayed "Locked" as that was broken on the original implemenation. >> >> This new proc file provides information faster than parsing the potentially >> huge smaps file. > > I have no idea about usefulness of this. I can comment about this. The use case is to speed up monitoring of memory consumption in environments where RSS isn't precise. For example Chrome tends to many processes which have hundreds of VMAs with a substantial amount of shared memory, and the error of using RSS rather than PSS tends to be very large when looking at overall memory consumption. PSS isn't kept as a single number that's exported like RSS, so to calculate PSS means having to parse a very large smaps file. This process is slow and has to be repeated for many processes, and we found that the just act of doing the parsing was taking up a significant amount of CPU time, so this patch is an attempt to make that process cheaper. > > The patch is definitely buggy with respect to how it implements actual > access to mm. > >> +static int totmaps_proc_show(struct seq_file *m, void *data) >> +{ >> + struct proc_maps_private *priv = m->private; >> + struct mm_struct *mm; >> + struct vm_area_struct *vma; >> + struct mem_size_stats *mss_sum = priv->mss; >> + >> + /* reference to priv->task already taken */ >> + /* but need to get the mm here because */ >> + /* task could be in the process of exiting */ >> + mm = get_task_mm(priv->task); >> + if (!mm || IS_ERR(mm)) >> + return -EINVAL; >> + > > That's not how it's done in smaps. > >> +static int totmaps_open(struct inode *inode, struct file *file) >> +{ >> + struct proc_maps_private *priv; >> + int ret = -ENOMEM; >> + priv = kzalloc(sizeof(*priv), GFP_KERNEL); >> + if (priv) { >> + priv->mss = kzalloc(sizeof(*priv->mss), GFP_KERNEL); >> + if (!priv->mss) >> + return -ENOMEM; > > Cases below explicitly kfree(priv). I can't remember whether the close > routine gets called if this one fails. Either way, something is wrong > here. > >> + >> + /* we need to grab references to the task_struct */ >> + /* at open time, because there's a potential information */ >> + /* leak where the totmaps file is opened and held open */ >> + /* while the underlying pid to task mapping changes */ >> + /* underneath it */ >> + priv->task = get_pid_task(proc_pid(inode), PIDTYPE_PID); > > This performs no permission checks that I would see. If you take a look > at smaps you will see the user ends up in proc_maps_open which performs > proc_mem_open(inode, PTRACE_MODE_READ) and gets a mm from there. > > >> + if (!priv->task) { >> + kfree(priv->mss); >> + kfree(priv); >> + return -ESRCH; >> + } >> + >> + ret = single_open(file, totmaps_proc_show, priv); >> + if (ret) { >> + put_task_struct(priv->task); >> + kfree(priv->mss); >> + kfree(priv); >> + } >> + } >> + return ret; >> +} >> + > > -- > Mateusz Guzik
Re: [PACTH v1] mm, proc: Implement /proc//totmaps
On Tue, Aug 9, 2016 at 9:29 AM, Mateusz Guzik wrote: > On Tue, Aug 09, 2016 at 12:05:43PM -0400, robert.f...@collabora.com wrote: >> From: Sonny Rao >> >> This is based on earlier work by Thiago Goncales. It implements a new >> per process proc file which summarizes the contents of the smaps file >> but doesn't display any addresses. It gives more detailed information >> than statm like the PSS (proprotional set size). It differs from the >> original implementation in that it doesn't use the full blown set of >> seq operations, uses a different termination condition, and doesn't >> displayed "Locked" as that was broken on the original implemenation. >> >> This new proc file provides information faster than parsing the potentially >> huge smaps file. > > I have no idea about usefulness of this. I can comment about this. The use case is to speed up monitoring of memory consumption in environments where RSS isn't precise. For example Chrome tends to many processes which have hundreds of VMAs with a substantial amount of shared memory, and the error of using RSS rather than PSS tends to be very large when looking at overall memory consumption. PSS isn't kept as a single number that's exported like RSS, so to calculate PSS means having to parse a very large smaps file. This process is slow and has to be repeated for many processes, and we found that the just act of doing the parsing was taking up a significant amount of CPU time, so this patch is an attempt to make that process cheaper. > > The patch is definitely buggy with respect to how it implements actual > access to mm. > >> +static int totmaps_proc_show(struct seq_file *m, void *data) >> +{ >> + struct proc_maps_private *priv = m->private; >> + struct mm_struct *mm; >> + struct vm_area_struct *vma; >> + struct mem_size_stats *mss_sum = priv->mss; >> + >> + /* reference to priv->task already taken */ >> + /* but need to get the mm here because */ >> + /* task could be in the process of exiting */ >> + mm = get_task_mm(priv->task); >> + if (!mm || IS_ERR(mm)) >> + return -EINVAL; >> + > > That's not how it's done in smaps. > >> +static int totmaps_open(struct inode *inode, struct file *file) >> +{ >> + struct proc_maps_private *priv; >> + int ret = -ENOMEM; >> + priv = kzalloc(sizeof(*priv), GFP_KERNEL); >> + if (priv) { >> + priv->mss = kzalloc(sizeof(*priv->mss), GFP_KERNEL); >> + if (!priv->mss) >> + return -ENOMEM; > > Cases below explicitly kfree(priv). I can't remember whether the close > routine gets called if this one fails. Either way, something is wrong > here. > >> + >> + /* we need to grab references to the task_struct */ >> + /* at open time, because there's a potential information */ >> + /* leak where the totmaps file is opened and held open */ >> + /* while the underlying pid to task mapping changes */ >> + /* underneath it */ >> + priv->task = get_pid_task(proc_pid(inode), PIDTYPE_PID); > > This performs no permission checks that I would see. If you take a look > at smaps you will see the user ends up in proc_maps_open which performs > proc_mem_open(inode, PTRACE_MODE_READ) and gets a mm from there. > > >> + if (!priv->task) { >> + kfree(priv->mss); >> + kfree(priv); >> + return -ESRCH; >> + } >> + >> + ret = single_open(file, totmaps_proc_show, priv); >> + if (ret) { >> + put_task_struct(priv->task); >> + kfree(priv->mss); >> + kfree(priv); >> + } >> + } >> + return ret; >> +} >> + > > -- > Mateusz Guzik
Re: [PATCH v2 3/5] Documentation: arm-pl330: add description of arm,pl330-broken-no-flushp
On Thu, Aug 27, 2015 at 5:38 PM, Shawn Lin wrote: > Signed-off-by: Shawn Lin > Reviewed-by: Doug Anderson > --- > > Changes in v2: > - add Reviewed-by: Doug Anderson > - reorder this patch before the usage of the quirk > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > > Documentation/devicetree/bindings/dma/arm-pl330.txt | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt > b/Documentation/devicetree/bindings/dma/arm-pl330.txt > index 2675658..db7e226 100644 > --- a/Documentation/devicetree/bindings/dma/arm-pl330.txt > +++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt > @@ -15,6 +15,7 @@ Optional properties: > cells in the dmas property of client device. >- dma-channels: contains the total number of DMA channels supported by the > DMAC >- dma-requests: contains the total number of DMA requests supported by the > DMAC > + - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP > > Example: > > -- > 2.3.7 Reviewed-by: Sonny Rao > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 1/5] DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit
On Thu, Aug 27, 2015 at 5:37 PM, Shawn Lin wrote: > From: Boojin Kim > > This patch adds to support burst mode for dev-to-mem and > mem-to-dev transmit. > > Signed-off-by: Boojin Kim > Signed-off-by: Addy Ke > Signed-off-by: Shawn Lin > cc: Heiko Stuebner > cc: Doug Anderson > cc: Olof Johansson > > --- > > Changes in v2: > - amend the author > - amend Olof's mail address > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > - add From original author. > - remove Sunny's tag > > drivers/dma/pl330.c | 18 -- > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c > index ecab4ea0..0d544d2 100644 > --- a/drivers/dma/pl330.c > +++ b/drivers/dma/pl330.c > @@ -1141,10 +1141,13 @@ static inline int _ldst_devtomem(unsigned dry_run, u8 > buf[], > const struct _xfer_spec *pxs, int cyc) > { > int off = 0; > + enum pl330_cond cond; > + > + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; > > while (cyc--) { > - off += _emit_WFP(dry_run, [off], SINGLE, pxs->desc->peri); > - off += _emit_LDP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_WFP(dry_run, [off], cond, pxs->desc->peri); > + off += _emit_LDP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_ST(dry_run, [off], ALWAYS); > off += _emit_FLUSHP(dry_run, [off], pxs->desc->peri); > } > @@ -1156,11 +1159,14 @@ static inline int _ldst_memtodev(unsigned dry_run, u8 > buf[], > const struct _xfer_spec *pxs, int cyc) > { > int off = 0; > + enum pl330_cond cond; > + > + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; > > while (cyc--) { > - off += _emit_WFP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_WFP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_LD(dry_run, [off], ALWAYS); > - off += _emit_STP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_STP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_FLUSHP(dry_run, [off], pxs->desc->peri); > } > > @@ -2557,7 +2563,7 @@ static struct dma_async_tx_descriptor > *pl330_prep_dma_cyclic( > > desc->rqtype = direction; > desc->rqcfg.brst_size = pch->burst_sz; > - desc->rqcfg.brst_len = 1; > + desc->rqcfg.brst_len = pch->burst_len; > desc->bytes_requested = period_len; > fill_px(>px, dst, src, period_len); > > @@ -2702,7 +2708,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct > scatterlist *sgl, > } > > desc->rqcfg.brst_size = pch->burst_sz; > - desc->rqcfg.brst_len = 1; > + desc->rqcfg.brst_len = pch->burst_len; > desc->rqtype = direction; > desc->bytes_requested = sg_dma_len(sg); > } > -- > 2.3.7 Reviewed-by: Sonny Rao > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/5] DMA: pl330: add quirk for broken no flushp
l330, dry_run, [off], pxs, cyc); > break; > case DMA_DEV_TO_MEM: > - off += _ldst_devtomem(dry_run, [off], pxs, cyc); > + off += _ldst_devtomem(pl330, dry_run, [off], pxs, cyc); > break; > case DMA_MEM_TO_MEM: > off += _ldst_memtomem(dry_run, [off], pxs, cyc); > @@ -1197,7 +1225,7 @@ static int _bursts(unsigned dry_run, u8 buf[], > } > > /* Returns bytes consumed and updates bursts */ > -static inline int _loop(unsigned dry_run, u8 buf[], > +static inline int _loop(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], > unsigned long *bursts, const struct _xfer_spec *pxs) > { > int cyc, cycmax, szlp, szlpend, szbrst, off; > @@ -1220,7 +1248,7 @@ static inline int _loop(unsigned dry_run, u8 buf[], > } > > szlp = _emit_LP(1, buf, 0, 0); > - szbrst = _bursts(1, buf, pxs, 1); > + szbrst = _bursts(pl330, 1, buf, pxs, 1); > > lpend.cond = ALWAYS; > lpend.forever = false; > @@ -1252,7 +1280,7 @@ static inline int _loop(unsigned dry_run, u8 buf[], > off += _emit_LP(dry_run, [off], 1, lcnt1); > ljmp1 = off; > > - off += _bursts(dry_run, [off], pxs, cyc); > + off += _bursts(pl330, dry_run, [off], pxs, cyc); > > lpend.cond = ALWAYS; > lpend.forever = false; > @@ -1275,8 +1303,9 @@ static inline int _loop(unsigned dry_run, u8 buf[], > return off; > } > > -static inline int _setup_loops(unsigned dry_run, u8 buf[], > - const struct _xfer_spec *pxs) > +static inline int _setup_loops(struct pl330_dmac *pl330, > + unsigned dry_run, u8 buf[], > + const struct _xfer_spec *pxs) > { > struct pl330_xfer *x = >desc->px; > u32 ccr = pxs->ccr; > @@ -1285,15 +1314,16 @@ static inline int _setup_loops(unsigned dry_run, u8 > buf[], > > while (bursts) { > c = bursts; > - off += _loop(dry_run, [off], , pxs); > + off += _loop(pl330, dry_run, [off], , pxs); > bursts -= c; > } > > return off; > } > > -static inline int _setup_xfer(unsigned dry_run, u8 buf[], > - const struct _xfer_spec *pxs) > +static inline int _setup_xfer(struct pl330_dmac *pl330, > + unsigned dry_run, u8 buf[], > + const struct _xfer_spec *pxs) > { > struct pl330_xfer *x = >desc->px; > int off = 0; > @@ -1304,7 +1334,7 @@ static inline int _setup_xfer(unsigned dry_run, u8 > buf[], > off += _emit_MOV(dry_run, [off], DAR, x->dst_addr); > > /* Setup Loop(s) */ > - off += _setup_loops(dry_run, [off], pxs); > + off += _setup_loops(pl330, dry_run, [off], pxs); > > return off; > } > @@ -1313,8 +1343,9 @@ static inline int _setup_xfer(unsigned dry_run, u8 > buf[], > * A req is a sequence of one or more xfer units. > * Returns the number of bytes taken to setup the MC for the req. > */ > -static int _setup_req(unsigned dry_run, struct pl330_thread *thrd, > - unsigned index, struct _xfer_spec *pxs) > +static int _setup_req(struct pl330_dmac *pl330, unsigned dry_run, > + struct pl330_thread *thrd, unsigned index, > + struct _xfer_spec *pxs) > { > struct _pl330_req *req = >req[index]; > struct pl330_xfer *x; > @@ -1331,7 +1362,7 @@ static int _setup_req(unsigned dry_run, struct > pl330_thread *thrd, > if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr))) > return -EINVAL; > > - off += _setup_xfer(dry_run, [off], pxs); > + off += _setup_xfer(pl330, dry_run, [off], pxs); > > /* DMASEV peripheral/event */ > off += _emit_SEV(dry_run, [off], thrd->ev); > @@ -1425,7 +1456,7 @@ static int pl330_submit_req(struct pl330_thread *thrd, > xs.desc = desc; > > /* First dry run to check if req is acceptable */ > - ret = _setup_req(1, thrd, idx, ); > + ret = _setup_req(pl330, 1, thrd, idx, ); > if (ret < 0) > goto xfer_exit; > > @@ -1439,7 +1470,7 @@ static int pl330_submit_req(struct pl330_thread *thrd, > /* Hook the request */ > thrd->lstenq = idx; > thrd->req[idx].desc = desc; > - _setup_req(0, thrd, idx, ); > + _setup_req(pl330, 0, thrd, idx, ); > > ret = 0; > > @@ -2784,6 +2815,7 @@ pl330_probe(struct amba_device *adev, const struct > amba_id *id) >
Re: [PATCH v2 0/5] Fix broken DMAFLUSHP on Rockchips platform
On Thu, Aug 27, 2015 at 5:36 PM, Shawn Lin wrote: > > The purpose of the DMAFLUSHP instruction: > - Tell the peripheral to clear its status and control registers. > - Send a message to the peripheral to resend its level status. > > There are 3 timings described in PL330 Technical Reference Manual: > - Timing 1: Burst request, can work well without DMAFLUSHP. > - Timing 2: Single and burst request, DMAC will ignore the single > transfer request. This timing happens if there are single > and burst request. > - Timing 3: Single transfers for a burst request, DMAC should signals > datype to request the peripheral to flush the contents of > any control registers. This timing happens if there is > not enough MFIFO to places the burst data. > > A peripheral may signal a DMA request during the execution of > DMAFLUSHP instruction, that cause DMA request being ignored by DMAC. > > But DMAC and all peripherals on RK3X SoCs DO NOT support DMAFLUSHP. > It can't send a message to the peripheral to resend DMA request, > and the peripheral can't acknowledge a flush request from DMAC. > So all DMA requests should NOT be ignored by DMAC, and DMAC will not > notify the peripheral to flush. > > To fix this problem, we need: > - Do NOT execute DMAFLUSHP instruction. > - Timing 2 and timing 3 should not happen. > > Because on RK3X SoCs, there are 6 or below channels and 32 MFIFO depth > for DMAC_BUS, and 8 channels and 64 MFIFO depth for DMAC_PERI, it is > impossible to hit the timing 3 if burst length is equal or less than 4. Fixing this issue also requires changes to drivers, so it would be nice if you put those changes into the same patchset. Otherwise someone may apply this series and expect things to work but they will still be broken. Specifically the peripherals should be setting their burst sizes for their DMA requests low enough to avoid needing the working DMAFLUSHP instruction. Also, I remember we ran into an issue when we tried using burst length of 4 with the i2s device on RK3288 because we could get requests that either weren't aligned or a multiple of 4 sizes and some transfers would just fail, so we ended up using a burst size of 1. I recommend if we aren't sure about size or alignment for a particular peripheral, a burst size of 1 is safest. For something like a block device, I think we can use the larger size bursts. That's another reason to include the driver fixes in the series, just so we get it right, thanks. > > Since the request type signal by the peripheral can only be set by > software. We can set Rockchip Soc's GRF_PERIDMAC_CON0[2:1] to select single > or burst request, if it is set b01, all of the peripharals will signal a > brust > request. So the timing 2 will not happen, too. > > So DMAC on RK3X can support single or burst transfer, but can't support > mixed transfer. > > Because burst transfer is more efficient than single transfer, this is > confirmed by our ASIC team, who strongly suggest to use burst transfer. > And this is confirmed by Addy's test on RK3288-Pink2 board, the speed of > spi flash burst transfer will increase about two times than single transfer. > Also, I have tested dw_mmc with pl330 on RK3188 platform to double confirm > the result. That means burst transfer is reansonable. > > So we need a quirk not to execute DMAFLUSHP instruction and to use burst > transfer. > > Note: > - The Rockchip Soc default value of GRF_PERIDMAC_CON0[2:1] is b01. To > support brust transfer, these bits should not be changed in bootloader. > > > Changes in v2: > - amend the author > - reorder the patches suggested by Doug > - add Reviewed-by: Doug Anderson for > rk3288.dtsi patch and arm-pl330.txt patch > - amend Olof's mail address > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > - add From original author. > - remove Sunny's tag > > Addy Ke (2): > DMA: pl330: add quirk for broken no flushp > ARM: dts: Add arm,pl330-broken-no-flushp quirk for rk3288 platform > > Boojin Kim (1): > DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit > > Shawn Lin (2): > Documentation: arm-pl330: add description of > arm,pl330-broken-no-flushp > ARM: dts: Add arm,pl330-broken-no-flushp quirk for rk3xxx platform > > .../devicetree/bindings/dma/arm-pl330.txt | 1 + > arch/arm/boot/dts/rk3288.dtsi | 3 + > arch/arm/boot/dts/rk3xxx.dtsi | 3 + > drivers/dma/pl330.c| 101 > +++-- > 4 files changed, 79 insertions(+), 29 deletions(-) > > -- > 2.3.7 > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line
Re: [PATCH v2 0/5] Fix broken DMAFLUSHP on Rockchips platform
On Thu, Aug 27, 2015 at 5:36 PM, Shawn Linwrote: > > The purpose of the DMAFLUSHP instruction: > - Tell the peripheral to clear its status and control registers. > - Send a message to the peripheral to resend its level status. > > There are 3 timings described in PL330 Technical Reference Manual: > - Timing 1: Burst request, can work well without DMAFLUSHP. > - Timing 2: Single and burst request, DMAC will ignore the single > transfer request. This timing happens if there are single > and burst request. > - Timing 3: Single transfers for a burst request, DMAC should signals > datype to request the peripheral to flush the contents of > any control registers. This timing happens if there is > not enough MFIFO to places the burst data. > > A peripheral may signal a DMA request during the execution of > DMAFLUSHP instruction, that cause DMA request being ignored by DMAC. > > But DMAC and all peripherals on RK3X SoCs DO NOT support DMAFLUSHP. > It can't send a message to the peripheral to resend DMA request, > and the peripheral can't acknowledge a flush request from DMAC. > So all DMA requests should NOT be ignored by DMAC, and DMAC will not > notify the peripheral to flush. > > To fix this problem, we need: > - Do NOT execute DMAFLUSHP instruction. > - Timing 2 and timing 3 should not happen. > > Because on RK3X SoCs, there are 6 or below channels and 32 MFIFO depth > for DMAC_BUS, and 8 channels and 64 MFIFO depth for DMAC_PERI, it is > impossible to hit the timing 3 if burst length is equal or less than 4. Fixing this issue also requires changes to drivers, so it would be nice if you put those changes into the same patchset. Otherwise someone may apply this series and expect things to work but they will still be broken. Specifically the peripherals should be setting their burst sizes for their DMA requests low enough to avoid needing the working DMAFLUSHP instruction. Also, I remember we ran into an issue when we tried using burst length of 4 with the i2s device on RK3288 because we could get requests that either weren't aligned or a multiple of 4 sizes and some transfers would just fail, so we ended up using a burst size of 1. I recommend if we aren't sure about size or alignment for a particular peripheral, a burst size of 1 is safest. For something like a block device, I think we can use the larger size bursts. That's another reason to include the driver fixes in the series, just so we get it right, thanks. > > Since the request type signal by the peripheral can only be set by > software. We can set Rockchip Soc's GRF_PERIDMAC_CON0[2:1] to select single > or burst request, if it is set b01, all of the peripharals will signal a > brust > request. So the timing 2 will not happen, too. > > So DMAC on RK3X can support single or burst transfer, but can't support > mixed transfer. > > Because burst transfer is more efficient than single transfer, this is > confirmed by our ASIC team, who strongly suggest to use burst transfer. > And this is confirmed by Addy's test on RK3288-Pink2 board, the speed of > spi flash burst transfer will increase about two times than single transfer. > Also, I have tested dw_mmc with pl330 on RK3188 platform to double confirm > the result. That means burst transfer is reansonable. > > So we need a quirk not to execute DMAFLUSHP instruction and to use burst > transfer. > > Note: > - The Rockchip Soc default value of GRF_PERIDMAC_CON0[2:1] is b01. To > support brust transfer, these bits should not be changed in bootloader. > > > Changes in v2: > - amend the author > - reorder the patches suggested by Doug > - add Reviewed-by: Doug Anderson for > rk3288.dtsi patch and arm-pl330.txt patch > - amend Olof's mail address > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > - add From original author. > - remove Sunny's tag > > Addy Ke (2): > DMA: pl330: add quirk for broken no flushp > ARM: dts: Add arm,pl330-broken-no-flushp quirk for rk3288 platform > > Boojin Kim (1): > DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit > > Shawn Lin (2): > Documentation: arm-pl330: add description of > arm,pl330-broken-no-flushp > ARM: dts: Add arm,pl330-broken-no-flushp quirk for rk3xxx platform > > .../devicetree/bindings/dma/arm-pl330.txt | 1 + > arch/arm/boot/dts/rk3288.dtsi | 3 + > arch/arm/boot/dts/rk3xxx.dtsi | 3 + > drivers/dma/pl330.c| 101 > +++-- > 4 files changed, 79 insertions(+), 29 deletions(-) > > -- > 2.3.7 > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To
Re: [PATCH v2 2/5] DMA: pl330: add quirk for broken no flushp
; > switch (pxs->desc->rqtype) { > case DMA_MEM_TO_DEV: > - off += _ldst_memtodev(dry_run, [off], pxs, cyc); > + off += _ldst_memtodev(pl330, dry_run, [off], pxs, cyc); > break; > case DMA_DEV_TO_MEM: > - off += _ldst_devtomem(dry_run, [off], pxs, cyc); > + off += _ldst_devtomem(pl330, dry_run, [off], pxs, cyc); > break; > case DMA_MEM_TO_MEM: > off += _ldst_memtomem(dry_run, [off], pxs, cyc); > @@ -1197,7 +1225,7 @@ static int _bursts(unsigned dry_run, u8 buf[], > } > > /* Returns bytes consumed and updates bursts */ > -static inline int _loop(unsigned dry_run, u8 buf[], > +static inline int _loop(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[], > unsigned long *bursts, const struct _xfer_spec *pxs) > { > int cyc, cycmax, szlp, szlpend, szbrst, off; > @@ -1220,7 +1248,7 @@ static inline int _loop(unsigned dry_run, u8 buf[], > } > > szlp = _emit_LP(1, buf, 0, 0); > - szbrst = _bursts(1, buf, pxs, 1); > + szbrst = _bursts(pl330, 1, buf, pxs, 1); > > lpend.cond = ALWAYS; > lpend.forever = false; > @@ -1252,7 +1280,7 @@ static inline int _loop(unsigned dry_run, u8 buf[], > off += _emit_LP(dry_run, [off], 1, lcnt1); > ljmp1 = off; > > - off += _bursts(dry_run, [off], pxs, cyc); > + off += _bursts(pl330, dry_run, [off], pxs, cyc); > > lpend.cond = ALWAYS; > lpend.forever = false; > @@ -1275,8 +1303,9 @@ static inline int _loop(unsigned dry_run, u8 buf[], > return off; > } > > -static inline int _setup_loops(unsigned dry_run, u8 buf[], > - const struct _xfer_spec *pxs) > +static inline int _setup_loops(struct pl330_dmac *pl330, > + unsigned dry_run, u8 buf[], > + const struct _xfer_spec *pxs) > { > struct pl330_xfer *x = >desc->px; > u32 ccr = pxs->ccr; > @@ -1285,15 +1314,16 @@ static inline int _setup_loops(unsigned dry_run, u8 > buf[], > > while (bursts) { > c = bursts; > - off += _loop(dry_run, [off], , pxs); > + off += _loop(pl330, dry_run, [off], , pxs); > bursts -= c; > } > > return off; > } > > -static inline int _setup_xfer(unsigned dry_run, u8 buf[], > - const struct _xfer_spec *pxs) > +static inline int _setup_xfer(struct pl330_dmac *pl330, > + unsigned dry_run, u8 buf[], > + const struct _xfer_spec *pxs) > { > struct pl330_xfer *x = >desc->px; > int off = 0; > @@ -1304,7 +1334,7 @@ static inline int _setup_xfer(unsigned dry_run, u8 > buf[], > off += _emit_MOV(dry_run, [off], DAR, x->dst_addr); > > /* Setup Loop(s) */ > - off += _setup_loops(dry_run, [off], pxs); > + off += _setup_loops(pl330, dry_run, [off], pxs); > > return off; > } > @@ -1313,8 +1343,9 @@ static inline int _setup_xfer(unsigned dry_run, u8 > buf[], > * A req is a sequence of one or more xfer units. > * Returns the number of bytes taken to setup the MC for the req. > */ > -static int _setup_req(unsigned dry_run, struct pl330_thread *thrd, > - unsigned index, struct _xfer_spec *pxs) > +static int _setup_req(struct pl330_dmac *pl330, unsigned dry_run, > + struct pl330_thread *thrd, unsigned index, > + struct _xfer_spec *pxs) > { > struct _pl330_req *req = >req[index]; > struct pl330_xfer *x; > @@ -1331,7 +1362,7 @@ static int _setup_req(unsigned dry_run, struct > pl330_thread *thrd, > if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr))) > return -EINVAL; > > - off += _setup_xfer(dry_run, [off], pxs); > + off += _setup_xfer(pl330, dry_run, [off], pxs); > > /* DMASEV peripheral/event */ > off += _emit_SEV(dry_run, [off], thrd->ev); > @@ -1425,7 +1456,7 @@ static int pl330_submit_req(struct pl330_thread *thrd, > xs.desc = desc; > > /* First dry run to check if req is acceptable */ > - ret = _setup_req(1, thrd, idx, ); > + ret = _setup_req(pl330, 1, thrd, idx, ); > if (ret < 0) > goto xfer_exit; > > @@ -1439,7 +1470,7 @@ static int pl330_submit_req(struct pl330_thread *thrd, > /* Hook the request */ > thrd->lstenq = idx; > thrd->req[idx].desc = desc; > - _setup_req(0, thrd
Re: [PATCH v2 1/5] DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit
On Thu, Aug 27, 2015 at 5:37 PM, Shawn Lin <shawn@rock-chips.com> wrote: > From: Boojin Kim <boojin@samsung.com> > > This patch adds to support burst mode for dev-to-mem and > mem-to-dev transmit. > > Signed-off-by: Boojin Kim <boojin@samsung.com> > Signed-off-by: Addy Ke <addy...@rock-chips.com> > Signed-off-by: Shawn Lin <shawn@rock-chips.com> > cc: Heiko Stuebner <he...@sntech.de> > cc: Doug Anderson <diand...@chromium.org> > cc: Olof Johansson <o...@lixom.net> > > --- > > Changes in v2: > - amend the author > - amend Olof's mail address > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > - add From original author. > - remove Sunny's tag > > drivers/dma/pl330.c | 18 -- > 1 file changed, 12 insertions(+), 6 deletions(-) > > diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c > index ecab4ea0..0d544d2 100644 > --- a/drivers/dma/pl330.c > +++ b/drivers/dma/pl330.c > @@ -1141,10 +1141,13 @@ static inline int _ldst_devtomem(unsigned dry_run, u8 > buf[], > const struct _xfer_spec *pxs, int cyc) > { > int off = 0; > + enum pl330_cond cond; > + > + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; > > while (cyc--) { > - off += _emit_WFP(dry_run, [off], SINGLE, pxs->desc->peri); > - off += _emit_LDP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_WFP(dry_run, [off], cond, pxs->desc->peri); > + off += _emit_LDP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_ST(dry_run, [off], ALWAYS); > off += _emit_FLUSHP(dry_run, [off], pxs->desc->peri); > } > @@ -1156,11 +1159,14 @@ static inline int _ldst_memtodev(unsigned dry_run, u8 > buf[], > const struct _xfer_spec *pxs, int cyc) > { > int off = 0; > + enum pl330_cond cond; > + > + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; > > while (cyc--) { > - off += _emit_WFP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_WFP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_LD(dry_run, [off], ALWAYS); > - off += _emit_STP(dry_run, [off], SINGLE, pxs->desc->peri); > + off += _emit_STP(dry_run, [off], cond, pxs->desc->peri); > off += _emit_FLUSHP(dry_run, [off], pxs->desc->peri); > } > > @@ -2557,7 +2563,7 @@ static struct dma_async_tx_descriptor > *pl330_prep_dma_cyclic( > > desc->rqtype = direction; > desc->rqcfg.brst_size = pch->burst_sz; > - desc->rqcfg.brst_len = 1; > + desc->rqcfg.brst_len = pch->burst_len; > desc->bytes_requested = period_len; > fill_px(>px, dst, src, period_len); > > @@ -2702,7 +2708,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct > scatterlist *sgl, > } > > desc->rqcfg.brst_size = pch->burst_sz; > - desc->rqcfg.brst_len = 1; > + desc->rqcfg.brst_len = pch->burst_len; > desc->rqtype = direction; > desc->bytes_requested = sg_dma_len(sg); > } > -- > 2.3.7 Reviewed-by: Sonny Rao <sonny...@chromium.org> > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 3/5] Documentation: arm-pl330: add description of arm,pl330-broken-no-flushp
On Thu, Aug 27, 2015 at 5:38 PM, Shawn Lin <shawn@rock-chips.com> wrote: > Signed-off-by: Shawn Lin <shawn@rock-chips.com> > Reviewed-by: Doug Anderson <diand...@chromium.org> > --- > > Changes in v2: > - add Reviewed-by: Doug Anderson <diand...@chromium.org> > - reorder this patch before the usage of the quirk > > Changes in v1: > - rename broken-no-flushp to "arm,pl330-broken-no-flushp" suggested > by Krzysztof. > > Documentation/devicetree/bindings/dma/arm-pl330.txt | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt > b/Documentation/devicetree/bindings/dma/arm-pl330.txt > index 2675658..db7e226 100644 > --- a/Documentation/devicetree/bindings/dma/arm-pl330.txt > +++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt > @@ -15,6 +15,7 @@ Optional properties: > cells in the dmas property of client device. >- dma-channels: contains the total number of DMA channels supported by the > DMAC >- dma-requests: contains the total number of DMA requests supported by the > DMAC > + - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP > > Example: > > -- > 2.3.7 Reviewed-by: Sonny Rao <sonny...@chromium.org> > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit
On Thu, Aug 27, 2015 at 6:28 AM, Shawn Lin wrote: > 在 2015/8/27 20:57, Krzysztof Kozlowski 写道: >> >> 2015-08-27 17:48 GMT+09:00 Shawn Lin : >>> >>> >>> This patch adds to support burst mode for dev-to-mem and >>> mem-to-dev transmit. >>> >>> Signed-off-by: Boojin Kim >>> Signed-off-by: Addy Ke >>> Signed-off-by: Shawn Lin >>> cc: Heiko Stuebner >>> cc: Doug Anderson >>> cc: Olof Johansson >>> Reviewed-and-tested-by: Sonny Rao >> >> >> For the entire patchset: I would prefer to see someone's >> reviewed/tested tag in his response. Sending a version 1 of patchset >> (regardless of Boojin Kim's work two years ago) with such tag could >> mean anything. I cannot verify it easily (unless digging somewhere... >> or asking people). You could add for example: Reviewed-by Santa Claus. >> Should I sent a letter to him asking for confirmation? :) >> > > :) yes, you are right. I should comply with the rule, even if the patchest > had been reviewed or tested by someone on another tree. Hi, yeah I reviewed on a different tree, so you shouldn't put that tag here, thanks for removing it. I can re-review if you'd like. > > >> More seriously - reviewed-by is a statement (please look at >> Documentation/SubmittingPatches) and you cannot force someone to make >> that statement. He must make such statement on his own. >> >> That's all from my side since I don't feel skilled enough to review the >> code. >> >> Best regards, >> Krzysztof >> >>> >>> --- >>> >>> drivers/dma/pl330.c | 18 -- >>> 1 file changed, 12 insertions(+), 6 deletions(-) >>> >>> diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c >>> index ecab4ea0..0d544d2 100644 >>> --- a/drivers/dma/pl330.c >>> +++ b/drivers/dma/pl330.c >>> @@ -1141,10 +1141,13 @@ static inline int _ldst_devtomem(unsigned >>> dry_run, u8 buf[], >>> const struct _xfer_spec *pxs, int cyc) >>> { >>> int off = 0; >>> + enum pl330_cond cond; >>> + >>> + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; >>> >>> while (cyc--) { >>> - off += _emit_WFP(dry_run, [off], SINGLE, >>> pxs->desc->peri); >>> - off += _emit_LDP(dry_run, [off], SINGLE, >>> pxs->desc->peri); >>> + off += _emit_WFP(dry_run, [off], cond, >>> pxs->desc->peri); >>> + off += _emit_LDP(dry_run, [off], cond, >>> pxs->desc->peri); >>> off += _emit_ST(dry_run, [off], ALWAYS); >>> off += _emit_FLUSHP(dry_run, [off], >>> pxs->desc->peri); >>> } >>> @@ -1156,11 +1159,14 @@ static inline int _ldst_memtodev(unsigned >>> dry_run, u8 buf[], >>> const struct _xfer_spec *pxs, int cyc) >>> { >>> int off = 0; >>> + enum pl330_cond cond; >>> + >>> + cond = (pxs->desc->rqcfg.brst_len == 1) ? SINGLE : BURST; >>> >>> while (cyc--) { >>> - off += _emit_WFP(dry_run, [off], SINGLE, >>> pxs->desc->peri); >>> + off += _emit_WFP(dry_run, [off], cond, >>> pxs->desc->peri); >>> off += _emit_LD(dry_run, [off], ALWAYS); >>> - off += _emit_STP(dry_run, [off], SINGLE, >>> pxs->desc->peri); >>> + off += _emit_STP(dry_run, [off], cond, >>> pxs->desc->peri); >>> off += _emit_FLUSHP(dry_run, [off], >>> pxs->desc->peri); >>> } >>> >>> @@ -2557,7 +2563,7 @@ static struct dma_async_tx_descriptor >>> *pl330_prep_dma_cyclic( >>> >>> desc->rqtype = direction; >>> desc->rqcfg.brst_size = pch->burst_sz; >>> - desc->rqcfg.brst_len = 1; >>> + desc->rqcfg.brst_len = pch->burst_len; >>> desc->bytes_requested = period_len; >>> fill_px(>px, dst, src, period_len); >>> >>> @@ -2702,7 +2708,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct >>> scatterlist *sgl, >>> } >>> >>> desc->rqcfg.brst_size = pch->burst_sz; >>> - desc->rqcfg.brst_len = 1; >>> + desc->rqcfg.brst_len = pch->burst_len; >>> desc->rqtype = direction; >>> desc->bytes_requested = sg_dma_len(sg); >>> } >>> -- >>> 2.3.7 >>> >>> >>> -- >>> To unsubscribe from this list: send the line "unsubscribe dmaengine" in >>> the body of a message to majord...@vger.kernel.org >>> More majordomo info at http://vger.kernel.org/majordomo-info.html >> >> >> >> > > > -- > Best Regards > Shawn Lin > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/5] DMA: pl330: support burst mode for dev-to-mem and mem-to-dev transmit
On Thu, Aug 27, 2015 at 6:28 AM, Shawn Lin shawn@rock-chips.com wrote: 在 2015/8/27 20:57, Krzysztof Kozlowski 写道: 2015-08-27 17:48 GMT+09:00 Shawn Lin shawn@rock-chips.com: This patch adds to support burst mode for dev-to-mem and mem-to-dev transmit. Signed-off-by: Boojin Kim boojin@samsung.com Signed-off-by: Addy Ke addy...@rock-chips.com Signed-off-by: Shawn Lin shawn@rock-chips.com cc: Heiko Stuebner he...@sntech.de cc: Doug Anderson diand...@chromium.org cc: Olof Johansson ol...@google.com Reviewed-and-tested-by: Sonny Rao sonny...@chromium.org For the entire patchset: I would prefer to see someone's reviewed/tested tag in his response. Sending a version 1 of patchset (regardless of Boojin Kim's work two years ago) with such tag could mean anything. I cannot verify it easily (unless digging somewhere... or asking people). You could add for example: Reviewed-by Santa Claus. Should I sent a letter to him asking for confirmation? :) :) yes, you are right. I should comply with the rule, even if the patchest had been reviewed or tested by someone on another tree. Hi, yeah I reviewed on a different tree, so you shouldn't put that tag here, thanks for removing it. I can re-review if you'd like. More seriously - reviewed-by is a statement (please look at Documentation/SubmittingPatches) and you cannot force someone to make that statement. He must make such statement on his own. That's all from my side since I don't feel skilled enough to review the code. Best regards, Krzysztof --- drivers/dma/pl330.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index ecab4ea0..0d544d2 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1141,10 +1141,13 @@ static inline int _ldst_devtomem(unsigned dry_run, u8 buf[], const struct _xfer_spec *pxs, int cyc) { int off = 0; + enum pl330_cond cond; + + cond = (pxs-desc-rqcfg.brst_len == 1) ? SINGLE : BURST; while (cyc--) { - off += _emit_WFP(dry_run, buf[off], SINGLE, pxs-desc-peri); - off += _emit_LDP(dry_run, buf[off], SINGLE, pxs-desc-peri); + off += _emit_WFP(dry_run, buf[off], cond, pxs-desc-peri); + off += _emit_LDP(dry_run, buf[off], cond, pxs-desc-peri); off += _emit_ST(dry_run, buf[off], ALWAYS); off += _emit_FLUSHP(dry_run, buf[off], pxs-desc-peri); } @@ -1156,11 +1159,14 @@ static inline int _ldst_memtodev(unsigned dry_run, u8 buf[], const struct _xfer_spec *pxs, int cyc) { int off = 0; + enum pl330_cond cond; + + cond = (pxs-desc-rqcfg.brst_len == 1) ? SINGLE : BURST; while (cyc--) { - off += _emit_WFP(dry_run, buf[off], SINGLE, pxs-desc-peri); + off += _emit_WFP(dry_run, buf[off], cond, pxs-desc-peri); off += _emit_LD(dry_run, buf[off], ALWAYS); - off += _emit_STP(dry_run, buf[off], SINGLE, pxs-desc-peri); + off += _emit_STP(dry_run, buf[off], cond, pxs-desc-peri); off += _emit_FLUSHP(dry_run, buf[off], pxs-desc-peri); } @@ -2557,7 +2563,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( desc-rqtype = direction; desc-rqcfg.brst_size = pch-burst_sz; - desc-rqcfg.brst_len = 1; + desc-rqcfg.brst_len = pch-burst_len; desc-bytes_requested = period_len; fill_px(desc-px, dst, src, period_len); @@ -2702,7 +2708,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } desc-rqcfg.brst_size = pch-burst_sz; - desc-rqcfg.brst_len = 1; + desc-rqcfg.brst_len = pch-burst_len; desc-rqtype = direction; desc-bytes_requested = sg_dma_len(sg); } -- 2.3.7 -- To unsubscribe from this list: send the line unsubscribe dmaengine in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- Best Regards Shawn Lin -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: add Broadwell-U uncore IMC PMU support
On Wed, Apr 22, 2015 at 11:56 PM, Stephane Eranian wrote: > > This patch enables the uncore Memory Controller (IMC) PMU support > for Intel Broadwell-U (Model 61) mobile processors. > The IMC PMU enables measuring memory bandwidth. > > To use with perf: > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ > sleep 10 > > Signed-off-by: Stephane Eranian Tested-by: Sonny Rao > --- > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c > b/arch/x86/kernel/cpu/perf_event_intel_uncore.c > index c635b8b..a03f964 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c > @@ -922,6 +922,9 @@ static int __init uncore_pci_init(void) > case 69: /* Haswell Celeron */ > ret = hsw_uncore_pci_init(); > break; > + case 61: /* Broadwell */ > + ret = bdw_uncore_pci_init(); > + break; > default: > return 0; > } > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h > b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > index 6c8c1e7..06b0793 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > @@ -326,6 +326,7 @@ extern struct event_constraint uncore_constraint_empty; > int snb_uncore_pci_init(void); > int ivb_uncore_pci_init(void); > int hsw_uncore_pci_init(void); > +int bdw_uncore_pci_init(void); > void snb_uncore_cpu_init(void); > void nhm_uncore_cpu_init(void); > > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c > b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c > index 0333d0b..0f768bf 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c > @@ -7,6 +7,7 @@ > #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 > #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 > #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 > +#define PCI_DEVICE_ID_INTEL_BDW_IMC0x1604 > > /* SNB event control */ > #define SNB_UNC_CTL_EV_SEL_MASK0x00ff > @@ -488,6 +489,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = > { > { /* end: all zeroes */ }, > }; > > +static const struct pci_device_id bdw_uncore_pci_ids[] = { > + { /* IMC */ > + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC), > + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), > + }, > + { /* end: all zeroes */ }, > +}; > + > static struct pci_driver snb_uncore_pci_driver = { > .name = "snb_uncore", > .id_table = snb_uncore_pci_ids, > @@ -503,6 +512,11 @@ static struct pci_driver hsw_uncore_pci_driver = { > .id_table = hsw_uncore_pci_ids, > }; > > +static struct pci_driver bdw_uncore_pci_driver = { > + .name = "bdw_uncore", > + .id_table = bdw_uncore_pci_ids, > +}; > + > struct imc_uncore_pci_dev { > __u32 pci_id; > struct pci_driver *driver; > @@ -516,6 +530,7 @@ static const struct imc_uncore_pci_dev > desktop_imc_pci_ids[] = { > IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 v2/3rd > Gen Core processor */ > IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core > Processor */ > IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT > Mobile Processor */ > + IMC_DEV(BDW_IMC, _uncore_pci_driver),/* 5th Gen Core U */ > { /* end marker */ } > }; > > @@ -563,6 +578,11 @@ int hsw_uncore_pci_init(void) > return imc_uncore_pci_init(); > } > > +int bdw_uncore_pci_init(void) > +{ > + return imc_uncore_pci_init(); > +} > + > /* end of Sandy Bridge uncore support */ > > /* Nehalem uncore support */ > -- > 2.1.0 > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: add Broadwell-U uncore IMC PMU support
On Wed, Apr 22, 2015 at 11:56 PM, Stephane Eranian eran...@google.com wrote: This patch enables the uncore Memory Controller (IMC) PMU support for Intel Broadwell-U (Model 61) mobile processors. The IMC PMU enables measuring memory bandwidth. To use with perf: $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/ sleep 10 Signed-off-by: Stephane Eranian eran...@google.com Tested-by: Sonny Rao sonny...@chromium.org --- diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b..a03f964 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -922,6 +922,9 @@ static int __init uncore_pci_init(void) case 69: /* Haswell Celeron */ ret = hsw_uncore_pci_init(); break; + case 61: /* Broadwell */ + ret = bdw_uncore_pci_init(); + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7..06b0793 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -326,6 +326,7 @@ extern struct event_constraint uncore_constraint_empty; int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); +int bdw_uncore_pci_init(void); void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 0333d0b..0f768bf 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -7,6 +7,7 @@ #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC0x1604 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff @@ -488,6 +489,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { { /* end: all zeroes */ }, }; +static const struct pci_device_id bdw_uncore_pci_ids[] = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* end: all zeroes */ }, +}; + static struct pci_driver snb_uncore_pci_driver = { .name = snb_uncore, .id_table = snb_uncore_pci_ids, @@ -503,6 +512,11 @@ static struct pci_driver hsw_uncore_pci_driver = { .id_table = hsw_uncore_pci_ids, }; +static struct pci_driver bdw_uncore_pci_driver = { + .name = bdw_uncore, + .id_table = bdw_uncore_pci_ids, +}; + struct imc_uncore_pci_dev { __u32 pci_id; struct pci_driver *driver; @@ -516,6 +530,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ + IMC_DEV(BDW_IMC, bdw_uncore_pci_driver),/* 5th Gen Core U */ { /* end marker */ } }; @@ -563,6 +578,11 @@ int hsw_uncore_pci_init(void) return imc_uncore_pci_init(); } +int bdw_uncore_pci_init(void) +{ + return imc_uncore_pci_init(); +} + /* end of Sandy Bridge uncore support */ /* Nehalem uncore support */ -- 2.1.0 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:perf/urgent] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
Commit-ID: 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Gitweb: http://git.kernel.org/tip/0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Author: Sonny Rao AuthorDate: Tue, 21 Apr 2015 12:33:11 -0700 Committer: Ingo Molnar CommitDate: Wed, 22 Apr 2015 08:29:19 +0200 perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..4562e9e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a..a593858 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:perf/urgent] perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
Commit-ID: 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Gitweb: http://git.kernel.org/tip/80bcffb376a6890dd7452b12c1ba032f8f24fef6 Author: Sonny Rao AuthorDate: Mon, 20 Apr 2015 15:34:07 -0700 Committer: Ingo Molnar CommitDate: Wed, 22 Apr 2015 08:27:43 +0200 perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao Cc: Arnaldo Carvalho de Melo Cc: Bjorn Helgaas Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, _uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:perf/urgent] perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
Commit-ID: 80bcffb376a6890dd7452b12c1ba032f8f24fef6 Gitweb: http://git.kernel.org/tip/80bcffb376a6890dd7452b12c1ba032f8f24fef6 Author: Sonny Rao sonny...@chromium.org AuthorDate: Mon, 20 Apr 2015 15:34:07 -0700 Committer: Ingo Molnar mi...@kernel.org CommitDate: Wed, 22 Apr 2015 08:27:43 +0200 perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao sonny...@chromium.org Cc: Arnaldo Carvalho de Melo a...@kernel.org Cc: Bjorn Helgaas bhelg...@google.com Cc: Paul Mackerras pau...@samba.org Cc: Peter Zijlstra a.p.zijls...@chello.nl Cc: Stephane Eranian eran...@google.com Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar mi...@kernel.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, ivb_uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:perf/urgent] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
Commit-ID: 0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Gitweb: http://git.kernel.org/tip/0140e6141e4f1d4b15fb469e6912b0e71b7d1cc2 Author: Sonny Rao sonny...@chromium.org AuthorDate: Tue, 21 Apr 2015 12:33:11 -0700 Committer: Ingo Molnar mi...@kernel.org CommitDate: Wed, 22 Apr 2015 08:29:19 +0200 perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao sonny...@chromium.org Acked-by: Bjorn Helgaas bhelg...@google.com Cc: Arnaldo Carvalho de Melo a...@kernel.org Cc: Paul Mackerras pau...@samba.org Cc: Peter Zijlstra a.p.zijls...@chello.nl Cc: Stephane Eranian eran...@google.com Link: http://lkml.kernel.org/r/1429644791-25724-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar mi...@kernel.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..4562e9e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e63c02a..a593858 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2539,10 +2539,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao Acked-by: Bjorn Helgaas --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..4562e9e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..2f7b9a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,10 +2541,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
On Tue, Apr 21, 2015 at 12:21 PM, Bjorn Helgaas wrote: > On Tue, Apr 21, 2015 at 2:09 PM, Sonny Rao wrote: >> This keeps all the related PCI IDs together in the driver where they >> are used. >> >> Signed-off-by: Sonny Rao > > Acked-by: Bjorn Helgaas > >> --- >> arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- >> include/linux/pci_ids.h | 4 >> 2 files changed, 5 insertions(+), 5 deletions(-) >> >> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >> b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >> index ca75e70..02c1a13 100644 >> --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >> @@ -1,7 +1,11 @@ >> /* Nehalem/SandBridge/Haswell uncore support */ >> #include "perf_event_intel_uncore.h" >> >> -/* Uncore IMC PCI Id */ >> +/* Uncore IMC PCI Ids */ > > "IDs" would be more consistent. > Oops, will fix. >> +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 >> +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 >> +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 >> +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 >> #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 >> >> /* SNB event control */ >> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h >> index 38cff8f..2f7b9a4 100644 >> --- a/include/linux/pci_ids.h >> +++ b/include/linux/pci_ids.h >> @@ -2541,10 +2541,6 @@ >> >> #define PCI_VENDOR_ID_INTEL0x8086 >> #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 >> -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 >> -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 >> -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 >> -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 >> #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 >> #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 >> #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 >> -- >> 2.1.2 >> -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..02c1a13 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI Ids */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..2f7b9a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,10 +2541,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/urgent] perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
Commit-ID: 5324e72e00012126101aee6f3e62977055a3b5ee Gitweb: http://git.kernel.org/tip/5324e72e00012126101aee6f3e62977055a3b5ee Author: Sonny Rao AuthorDate: Mon, 20 Apr 2015 15:34:07 -0700 Committer: Ingo Molnar CommitDate: Tue, 21 Apr 2015 09:31:17 +0200 perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao Cc: Arnaldo Carvalho de Melo Cc: Bjorn Helgaas Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, _uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..02c1a13 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI Ids */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..2f7b9a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,10 +2541,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
On Tue, Apr 21, 2015 at 12:21 PM, Bjorn Helgaas bhelg...@google.com wrote: On Tue, Apr 21, 2015 at 2:09 PM, Sonny Rao sonny...@chromium.org wrote: This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao sonny...@chromium.org Acked-by: Bjorn Helgaas bhelg...@google.com --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..02c1a13 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI Ids */ IDs would be more consistent. Oops, will fix. +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..2f7b9a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,10 +2541,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] perf/x86/intel/uncore: Move PCI IDs for IMC to uncore driver
This keeps all the related PCI IDs together in the driver where they are used. Signed-off-by: Sonny Rao sonny...@chromium.org Acked-by: Bjorn Helgaas bhelg...@google.com --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 6 +- include/linux/pci_ids.h | 4 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index ca75e70..4562e9e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,7 +1,11 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h -/* Uncore IMC PCI Id */ +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 /* SNB event control */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..2f7b9a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2541,10 +2541,6 @@ #define PCI_VENDOR_ID_INTEL0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -#define PCI_DEVICE_ID_INTEL_SNB_IMC0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[tip:x86/urgent] perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs
Commit-ID: 5324e72e00012126101aee6f3e62977055a3b5ee Gitweb: http://git.kernel.org/tip/5324e72e00012126101aee6f3e62977055a3b5ee Author: Sonny Rao sonny...@chromium.org AuthorDate: Mon, 20 Apr 2015 15:34:07 -0700 Committer: Ingo Molnar mi...@kernel.org CommitDate: Tue, 21 Apr 2015 09:31:17 +0200 perf/x86/intel/uncore: Add support for Intel Haswell ULT (lower power Mobile Processor) IMC uncore PMUs This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao sonny...@chromium.org Cc: Arnaldo Carvalho de Melo a...@kernel.org Cc: Bjorn Helgaas bhelg...@google.com Cc: Paul Mackerras pau...@samba.org Cc: Peter Zijlstra a.p.zijls...@chello.nl Cc: Stephane Eranian eran...@google.com Link: http://lkml.kernel.org/r/1429569247-16697-1-git-send-email-sonny...@chromium.org Signed-off-by: Ingo Molnar mi...@kernel.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, ivb_uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, _uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
On Mon, Apr 20, 2015 at 12:34 PM, Bjorn Helgaas wrote: > On Mon, Apr 20, 2015 at 1:58 PM, Stephane Eranian wrote: >> On Mon, Apr 20, 2015 at 11:56 AM, Bjorn Helgaas wrote: >>> >>> On Mon, Apr 20, 2015 at 1:42 PM, Sonny Rao wrote: >>> > This uncore is the same as the Haswell desktop part but uses a >>> > different PCI ID. >>> > >>> > Signed-off-by: Sonny Rao >>> > --- >>> > arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 5 + >>> > include/linux/pci_ids.h | 1 + >>> > 2 files changed, 6 insertions(+) >>> > >>> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >>> > b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >>> > index 3001015..0bda6fc 100644 >>> > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >>> > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c >>> > @@ -472,6 +472,10 @@ static const struct pci_device_id >>> > hsw_uncore_pci_ids[] = { >>> > PCI_DEVICE(PCI_VENDOR_ID_INTEL, >>> > PCI_DEVICE_ID_INTEL_HSW_IMC), >>> > .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), >>> > }, >>> > + { /* IMC */ >>> > + PCI_DEVICE(PCI_VENDOR_ID_INTEL, >>> > PCI_DEVICE_ID_INTEL_HSW_U_IMC), >>> > + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), >>> > + }, >>> > { /* end: all zeroes */ }, >>> > }; >>> > >>> > @@ -502,6 +506,7 @@ static const struct imc_uncore_pci_dev >>> > desktop_imc_pci_ids[] = { >>> > IMC_DEV(IVB_IMC, _uncore_pci_driver),/* 3rd Gen Core >>> > processor */ >>> > IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 >>> > v2/3rd Gen Core processor */ >>> > IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core >>> > Processor */ >>> > + IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT >>> > Mobile Processor */ >>> > { /* end marker */ } >>> > }; >>> > >>> > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h >>> > index 38cff8f..e5ae042 100644 >>> > --- a/include/linux/pci_ids.h >>> > +++ b/include/linux/pci_ids.h >>> > @@ -2545,6 +2545,7 @@ >>> > #define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 >>> > #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 >>> > #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 >>> > +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 >>> >>> Please either use the 0x0a04 constant directly in >>> perf_event_intel_uncore_snb.c, or explain why the #define should be >>> here, e.g., maybe it will be used in multiple places. See the comment >>> at the top of pci_ids.h. >>> >> But then, the same reasoning would apply to the other 3 IMC defines, >> wouldn't it? > > Yes. But if we made a mistake in the past, that doesn't mean we > should repeat it today. Shall I post a patch moving the others as well? > >>> > #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 >>> > #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 >>> > #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 >>> > -- >>> > 2.1.2 >>> > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 5 + include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..0bda6fc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -472,6 +472,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +506,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, _uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, _uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, _uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, _uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..e5ae042 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2545,6 +2545,7 @@ #define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 8 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..ca75e70 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,9 @@ /* Nehalem/SandBridge/Haswell uncore support */ #include perf_event_intel_uncore.h +/* Uncore IMC PCI Id */ +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK0x00ff #define SNB_UNC_CTL_UMASK_MASK 0xff00 @@ -472,6 +475,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +509,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, ivb_uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 5 + include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..0bda6fc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -472,6 +472,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +506,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, ivb_uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..e5ae042 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2545,6 +2545,7 @@ #define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] perf/x86/intel/uncore: add support for Haswell ULT IMC uncore
On Mon, Apr 20, 2015 at 12:34 PM, Bjorn Helgaas bhelg...@google.com wrote: On Mon, Apr 20, 2015 at 1:58 PM, Stephane Eranian eran...@google.com wrote: On Mon, Apr 20, 2015 at 11:56 AM, Bjorn Helgaas bhelg...@google.com wrote: On Mon, Apr 20, 2015 at 1:42 PM, Sonny Rao sonny...@chromium.org wrote: This uncore is the same as the Haswell desktop part but uses a different PCI ID. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 5 + include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015..0bda6fc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -472,6 +472,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -502,6 +506,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(IVB_IMC, ivb_uncore_pci_driver),/* 3rd Gen Core processor */ IMC_DEV(IVB_E3_IMC, ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */ IMC_DEV(HSW_IMC, hsw_uncore_pci_driver),/* 4th Gen Core Processor */ + IMC_DEV(HSW_U_IMC, hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ { /* end marker */ } }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 38cff8f..e5ae042 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2545,6 +2545,7 @@ #define PCI_DEVICE_ID_INTEL_IVB_IMC0x0154 #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 #define PCI_DEVICE_ID_INTEL_HSW_IMC0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 Please either use the 0x0a04 constant directly in perf_event_intel_uncore_snb.c, or explain why the #define should be here, e.g., maybe it will be used in multiple places. See the comment at the top of pci_ids.h. But then, the same reasoning would apply to the other 3 IMC defines, wouldn't it? Yes. But if we made a mistake in the past, that doesn't mean we should repeat it today. Shall I post a patch moving the others as well? #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arm: dts: rk3288: Enable Cortex-A12 HW PMU events
This adds the dts node for the PMU with the correct PMUIRQ interrupts for each core. Signed-off-by: Sonny Rao --- arch/arm/boot/dts/rk3288.dtsi | 8 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 165968d..8253abb 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -44,6 +44,14 @@ spi2 = }; + arm-pmu { + compatible = "arm,cortex-a12-pmu"; + interrupts = , +, +, +; + }; + cpus { #address-cells = <1>; #size-cells = <0>; -- 2.2.0.rc0.207.ga3a616c -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arm: dts: rk3288: Enable Cortex-A12 HW PMU events
This adds the dts node for the PMU with the correct PMUIRQ interrupts for each core. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/arm/boot/dts/rk3288.dtsi | 8 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 165968d..8253abb 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -44,6 +44,14 @@ spi2 = spi2; }; + arm-pmu { + compatible = arm,cortex-a12-pmu; + interrupts = GIC_SPI 151 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH, +GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH; + }; + cpus { #address-cells = 1; #size-cells = 0; -- 2.2.0.rc0.207.ga3a616c -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] RTC: RK808: fix the rtc time reading issue
On Wed, Jan 14, 2015 at 10:36 AM, Doug Anderson wrote: > Sonny, > >> Chris, it looks like you swapped the set and the clear of this bit, >> and you're relying on the fact that the i2c transaction takes a >> certain amount of time after the RTC_GET_TIME BIT is set. I'm not >> sure how long it actually takes, but why not just put in a usleep() >> for the minimum wait time? > > I think we are safe. > > At 400kHz (the max speed of this part) each bit can be transferred no > faster than 2.5us. In order to do a valid i2c transaction we need to > _at least_ write the address of the device and the data onto the bus, > which is 16 bits. 16 * 2.5us = 40us. That's above the 31.25us > > Personally I think what Chris has is fine, with the comment. Ok, I'm fine with that if we're sure it's slow enough. Comment explaining would certainly help. > > -Doug -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] RTC: RK808: fix the rtc time reading issue
On Tue, Jan 13, 2015 at 6:43 PM, Chris Zhong wrote: > After we set the GET_TIME bit, the rtc time couldn't be read immediately, > we should wait up to 31.25 us, about one cycle of 32khz. Otherwise reading > RTC time will return a old time. If clear the GET_TIME bit after setting, > the time of i2c transfer certainly more than 31.25us. > > Signed-off-by: Chris Zhong > > --- > > drivers/rtc/rtc-rk808.c | 9 +++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c > index df42257..8dae322 100644 > --- a/drivers/rtc/rtc-rk808.c > +++ b/drivers/rtc/rtc-rk808.c > @@ -67,15 +67,20 @@ static int rk808_rtc_readtime(struct device *dev, struct > rtc_time *tm) > /* Force an update of the shadowed registers right now */ > ret = regmap_update_bits(rk808->regmap, RK808_RTC_CTRL_REG, > BIT_RTC_CTRL_REG_RTC_GET_TIME, > -0); > +BIT_RTC_CTRL_REG_RTC_GET_TIME); > if (ret) { > dev_err(dev, "Failed to update bits rtc_ctrl: %d\n", ret); > return ret; > } > > + /* After we set the GET_TIME bit, the rtc time couldn't be read > +* immediately, we should wait up to 31.25 us, about one cycle of > +* 32khz. If we clear the GET_TIME bit here, the time of i2c transfer > +* certainly more than 31.25us. > +*/ Chris, it looks like you swapped the set and the clear of this bit, and you're relying on the fact that the i2c transaction takes a certain amount of time after the RTC_GET_TIME BIT is set. I'm not sure how long it actually takes, but why not just put in a usleep() for the minimum wait time? > ret = regmap_update_bits(rk808->regmap, RK808_RTC_CTRL_REG, > BIT_RTC_CTRL_REG_RTC_GET_TIME, > -BIT_RTC_CTRL_REG_RTC_GET_TIME); > +0); > if (ret) { > dev_err(dev, "Failed to update bits rtc_ctrl: %d\n", ret); > return ret; > -- > 1.9.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majord...@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] RTC: RK808: fix the rtc time reading issue
On Wed, Jan 14, 2015 at 10:36 AM, Doug Anderson diand...@chromium.org wrote: Sonny, Chris, it looks like you swapped the set and the clear of this bit, and you're relying on the fact that the i2c transaction takes a certain amount of time after the RTC_GET_TIME BIT is set. I'm not sure how long it actually takes, but why not just put in a usleep() for the minimum wait time? I think we are safe. At 400kHz (the max speed of this part) each bit can be transferred no faster than 2.5us. In order to do a valid i2c transaction we need to _at least_ write the address of the device and the data onto the bus, which is 16 bits. 16 * 2.5us = 40us. That's above the 31.25us Personally I think what Chris has is fine, with the comment. Ok, I'm fine with that if we're sure it's slow enough. Comment explaining would certainly help. -Doug -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] RTC: RK808: fix the rtc time reading issue
On Tue, Jan 13, 2015 at 6:43 PM, Chris Zhong z...@rock-chips.com wrote: After we set the GET_TIME bit, the rtc time couldn't be read immediately, we should wait up to 31.25 us, about one cycle of 32khz. Otherwise reading RTC time will return a old time. If clear the GET_TIME bit after setting, the time of i2c transfer certainly more than 31.25us. Signed-off-by: Chris Zhong z...@rock-chips.com --- drivers/rtc/rtc-rk808.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index df42257..8dae322 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -67,15 +67,20 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) /* Force an update of the shadowed registers right now */ ret = regmap_update_bits(rk808-regmap, RK808_RTC_CTRL_REG, BIT_RTC_CTRL_REG_RTC_GET_TIME, -0); +BIT_RTC_CTRL_REG_RTC_GET_TIME); if (ret) { dev_err(dev, Failed to update bits rtc_ctrl: %d\n, ret); return ret; } + /* After we set the GET_TIME bit, the rtc time couldn't be read +* immediately, we should wait up to 31.25 us, about one cycle of +* 32khz. If we clear the GET_TIME bit here, the time of i2c transfer +* certainly more than 31.25us. +*/ Chris, it looks like you swapped the set and the clear of this bit, and you're relying on the fact that the i2c transaction takes a certain amount of time after the RTC_GET_TIME BIT is set. I'm not sure how long it actually takes, but why not just put in a usleep() for the minimum wait time? ret = regmap_update_bits(rk808-regmap, RK808_RTC_CTRL_REG, BIT_RTC_CTRL_REG_RTC_GET_TIME, -BIT_RTC_CTRL_REG_RTC_GET_TIME); +0); if (ret) { dev_err(dev, Failed to update bits rtc_ctrl: %d\n, ret); return ret; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: arm64 hitting BUG in arch_timer.h
On Wed, Dec 10, 2014 at 12:56 PM, Mark Salter wrote: > Using Linus' tree from this morning, I am hitting: > >[0.00] BUG: failure at > ./arch/arm64/include/asm/arch_timer.h:112/arch_counter_get_cntpct! > > This is triggered by commit 0b46b8a718 ("clocksource: arch_timer: Fix > code to use physical timers when requested") which addresses an armv7 > problem. Arm64 wants to always use a virtual timer. I used this to avoid > the BUG and get a booting kernel: > > diff --git a/drivers/clocksource/arm_arch_timer.c > b/drivers/clocksource/arm_arch > index 71846f9..4d8a01e 100644 > --- a/drivers/clocksource/arm_arch_timer.c > +++ b/drivers/clocksource/arm_arch_timer.c > @@ -468,7 +468,7 @@ static void __init arch_counter_register(unsigned type) > > /* Register the CP15 based counter if we have one */ > if (type & ARCH_CP15_TIMER) { > - if (arch_timer_use_virtual) > + if (IS_ENABLED(CONFIG_ARM64) || arch_timer_use_virtual) > arch_timer_read_counter = arch_counter_get_cntvct; > else > arch_timer_read_counter = arch_counter_get_cntpct; > > Yes Catalin has prepared a similar patch: https://patchwork.kernel.org/patch/5468031/ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: arm64 hitting BUG in arch_timer.h
On Wed, Dec 10, 2014 at 12:56 PM, Mark Salter msal...@redhat.com wrote: Using Linus' tree from this morning, I am hitting: [0.00] BUG: failure at ./arch/arm64/include/asm/arch_timer.h:112/arch_counter_get_cntpct! This is triggered by commit 0b46b8a718 (clocksource: arch_timer: Fix code to use physical timers when requested) which addresses an armv7 problem. Arm64 wants to always use a virtual timer. I used this to avoid the BUG and get a booting kernel: diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch index 71846f9..4d8a01e 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -468,7 +468,7 @@ static void __init arch_counter_register(unsigned type) /* Register the CP15 based counter if we have one */ if (type ARCH_CP15_TIMER) { - if (arch_timer_use_virtual) + if (IS_ENABLED(CONFIG_ARM64) || arch_timer_use_virtual) arch_timer_read_counter = arch_counter_get_cntvct; else arch_timer_read_counter = arch_counter_get_cntpct; Yes Catalin has prepared a similar patch: https://patchwork.kernel.org/patch/5468031/ -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 3:22 PM, Dylan Reid wrote: > On Wed, Dec 3, 2014 at 3:03 PM, Sonny Rao wrote: >> On Wed, Dec 3, 2014 at 12:03 PM, Mark Brown wrote: >>> On Wed, Dec 03, 2014 at 11:38:13AM -0800, Sonny Rao wrote: >>>> On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown wrote: >>> >>>> > I would expect that the clock for the CODEC should be managed by the >>>> > CODEC if at all possible - that seems more logical than having the CPU >>>> > I2S controller request and manage it if it's a separate clock. Why add >>>> > this to the CPU side driver? >>> >>>> This output clock has a mux and can either be a fixed 12Mhz output or >>>> can be derived from the same fractional divider which drives the i2s >>>> block. I thought it was simpler to keep them all the same, but need >>>> to put ownership in the i2s in anticipation of the i2s driver setting >>>> it's own clock rate. >>> >>>> If you think this is an implementation detail and this output clock >>>> should just be owned by the codec driver, even though I'm guessing it >>>> will just have to be the same as i2s, then I think we can drop this >>>> and make sure simple card (or whatever other codec driver) claims this >>>> clock. >>> >>> simple-card obviously isn't a CODEC driver... >> >> Yeah, sorry. >> >>> For generality I think >>> the clock does need to be exposed to the CODEC driver, otherwise this >>> will work differently to how other systems are working and we can't >>> substitute in a different clock on the CODEC side so easily if it >>> doesn't happen to use the output from the I2S block. >> >> Ok, then I think what we will do is abandon this patch and I will send >> something that adds this functionality to the particular codec that >> I'm interested in -- max98090. > > Sorry I didn't read this earlier. I don't think that this belongs in > the max98090. The original patch description is a bit confusing. The > clock being grabbed here is actually i2s mclk. My understanding is > that, on this SoC, the mclk is driven from a different IP block than > the rest of the i2s signals. The i2s driver needs to be told about > the clock and enable/disable it at the appropriate times. I'm > assuming it's optional because there are boards using this SoC with > i2s slave mode that don't drive mclk at all. > > Please correct me if I'm wrong on any of the above. I don't think you're wrong, and I'm an audio/i2s neophyte so I think you're probably right and hopefully Mark can confirm that this is how we want it. One important thing to point out, which might be causing confusion, is that this driver is claiming a clock which it internally calls "mclk" but the way it's specified for rk3288 in the DT, that one is just the one which drives the internal logic and has a gate. This clock I'm adding is the actual mclk which is being driven to the i2s slave device, and it has it's own gate and also has a mux, and we need to claim both to be able to enable proper clock gating. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 12:03 PM, Mark Brown wrote: > On Wed, Dec 03, 2014 at 11:38:13AM -0800, Sonny Rao wrote: >> On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown wrote: > >> > I would expect that the clock for the CODEC should be managed by the >> > CODEC if at all possible - that seems more logical than having the CPU >> > I2S controller request and manage it if it's a separate clock. Why add >> > this to the CPU side driver? > >> This output clock has a mux and can either be a fixed 12Mhz output or >> can be derived from the same fractional divider which drives the i2s >> block. I thought it was simpler to keep them all the same, but need >> to put ownership in the i2s in anticipation of the i2s driver setting >> it's own clock rate. > >> If you think this is an implementation detail and this output clock >> should just be owned by the codec driver, even though I'm guessing it >> will just have to be the same as i2s, then I think we can drop this >> and make sure simple card (or whatever other codec driver) claims this >> clock. > > simple-card obviously isn't a CODEC driver... Yeah, sorry. > For generality I think > the clock does need to be exposed to the CODEC driver, otherwise this > will work differently to how other systems are working and we can't > substitute in a different clock on the CODEC side so easily if it > doesn't happen to use the output from the I2S block. Ok, then I think what we will do is abandon this patch and I will send something that adds this functionality to the particular codec that I'm interested in -- max98090. I'm a little tied up at the moment so I'm not going to send that for a little while, but will come eventually. Thanks for the advice! -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown wrote: > On Wed, Dec 03, 2014 at 03:18:38PM +0800, Jianqun Xu wrote: >> From: Sonny Rao >> >> We need to claim the clock which is driving the codec so that when we enable >> clock gating, we continue to clock the codec when needed. I make this an >> optional clock since there might be some applications where we don't need it >> but can still use the I2S block. > > I would expect that the clock for the CODEC should be managed by the > CODEC if at all possible - that seems more logical than having the CPU > I2S controller request and manage it if it's a separate clock. Why add > this to the CPU side driver? It's a good question. Right now the way I'm running this stuff we're mostly setting all the i2s the clock rates from the codec driver, but I think this isn't the correct way to go, and the i2s driver needs to set it's rate based on the hw params, but that isn't happening (yet). This output clock has a mux and can either be a fixed 12Mhz output or can be derived from the same fractional divider which drives the i2s block. I thought it was simpler to keep them all the same, but need to put ownership in the i2s in anticipation of the i2s driver setting it's own clock rate. If you think this is an implementation detail and this output clock should just be owned by the codec driver, even though I'm guessing it will just have to be the same as i2s, then I think we can drop this and make sure simple card (or whatever other codec driver) claims this clock. > > We've not always done this for older systems due to the lack of a usable > clock API but that's starting to be addressed. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown broo...@kernel.org wrote: On Wed, Dec 03, 2014 at 03:18:38PM +0800, Jianqun Xu wrote: From: Sonny Rao sonny...@chromium.org We need to claim the clock which is driving the codec so that when we enable clock gating, we continue to clock the codec when needed. I make this an optional clock since there might be some applications where we don't need it but can still use the I2S block. I would expect that the clock for the CODEC should be managed by the CODEC if at all possible - that seems more logical than having the CPU I2S controller request and manage it if it's a separate clock. Why add this to the CPU side driver? It's a good question. Right now the way I'm running this stuff we're mostly setting all the i2s the clock rates from the codec driver, but I think this isn't the correct way to go, and the i2s driver needs to set it's rate based on the hw params, but that isn't happening (yet). This output clock has a mux and can either be a fixed 12Mhz output or can be derived from the same fractional divider which drives the i2s block. I thought it was simpler to keep them all the same, but need to put ownership in the i2s in anticipation of the i2s driver setting it's own clock rate. If you think this is an implementation detail and this output clock should just be owned by the codec driver, even though I'm guessing it will just have to be the same as i2s, then I think we can drop this and make sure simple card (or whatever other codec driver) claims this clock. We've not always done this for older systems due to the lack of a usable clock API but that's starting to be addressed. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 12:03 PM, Mark Brown broo...@kernel.org wrote: On Wed, Dec 03, 2014 at 11:38:13AM -0800, Sonny Rao wrote: On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown broo...@kernel.org wrote: I would expect that the clock for the CODEC should be managed by the CODEC if at all possible - that seems more logical than having the CPU I2S controller request and manage it if it's a separate clock. Why add this to the CPU side driver? This output clock has a mux and can either be a fixed 12Mhz output or can be derived from the same fractional divider which drives the i2s block. I thought it was simpler to keep them all the same, but need to put ownership in the i2s in anticipation of the i2s driver setting it's own clock rate. If you think this is an implementation detail and this output clock should just be owned by the codec driver, even though I'm guessing it will just have to be the same as i2s, then I think we can drop this and make sure simple card (or whatever other codec driver) claims this clock. simple-card obviously isn't a CODEC driver... Yeah, sorry. For generality I think the clock does need to be exposed to the CODEC driver, otherwise this will work differently to how other systems are working and we can't substitute in a different clock on the CODEC side so easily if it doesn't happen to use the output from the I2S block. Ok, then I think what we will do is abandon this patch and I will send something that adds this functionality to the particular codec that I'm interested in -- max98090. I'm a little tied up at the moment so I'm not going to send that for a little while, but will come eventually. Thanks for the advice! -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [alsa-devel] [PATCH v2 2/2] ASoC: rockchip: i2s: add support for grabbing output clock to codec
On Wed, Dec 3, 2014 at 3:22 PM, Dylan Reid dgr...@chromium.org wrote: On Wed, Dec 3, 2014 at 3:03 PM, Sonny Rao sonny...@chromium.org wrote: On Wed, Dec 3, 2014 at 12:03 PM, Mark Brown broo...@kernel.org wrote: On Wed, Dec 03, 2014 at 11:38:13AM -0800, Sonny Rao wrote: On Wed, Dec 3, 2014 at 11:20 AM, Mark Brown broo...@kernel.org wrote: I would expect that the clock for the CODEC should be managed by the CODEC if at all possible - that seems more logical than having the CPU I2S controller request and manage it if it's a separate clock. Why add this to the CPU side driver? This output clock has a mux and can either be a fixed 12Mhz output or can be derived from the same fractional divider which drives the i2s block. I thought it was simpler to keep them all the same, but need to put ownership in the i2s in anticipation of the i2s driver setting it's own clock rate. If you think this is an implementation detail and this output clock should just be owned by the codec driver, even though I'm guessing it will just have to be the same as i2s, then I think we can drop this and make sure simple card (or whatever other codec driver) claims this clock. simple-card obviously isn't a CODEC driver... Yeah, sorry. For generality I think the clock does need to be exposed to the CODEC driver, otherwise this will work differently to how other systems are working and we can't substitute in a different clock on the CODEC side so easily if it doesn't happen to use the output from the I2S block. Ok, then I think what we will do is abandon this patch and I will send something that adds this functionality to the particular codec that I'm interested in -- max98090. Sorry I didn't read this earlier. I don't think that this belongs in the max98090. The original patch description is a bit confusing. The clock being grabbed here is actually i2s mclk. My understanding is that, on this SoC, the mclk is driven from a different IP block than the rest of the i2s signals. The i2s driver needs to be told about the clock and enable/disable it at the appropriate times. I'm assuming it's optional because there are boards using this SoC with i2s slave mode that don't drive mclk at all. Please correct me if I'm wrong on any of the above. I don't think you're wrong, and I'm an audio/i2s neophyte so I think you're probably right and hopefully Mark can confirm that this is how we want it. One important thing to point out, which might be causing confusion, is that this driver is claiming a clock which it internally calls mclk but the way it's specified for rk3288 in the DT, that one is just the one which drives the internal logic and has a gate. This clock I'm adding is the actual mclk which is being driven to the i2s slave device, and it has it's own gate and also has a mux, and we need to claim both to be able to enable proper clock gating. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] clk: rockchip: rk3288 export i2s0_clkout for use in DT
On Wed, Nov 26, 2014 at 3:32 PM, Heiko Stübner wrote: > Am Dienstag, 18. November 2014, 23:15:19 schrieb Sonny Rao: >> This exposes the clock that comes out of the i2s block which generally >> goes to the audio codec. >> >> Signed-off-by: Sonny Rao > > applied to my clk branch after removing the CLK_SET_RATE_PARENT Hi, sorry for the delay, and thanks for fixing it. I think when I applied the patch to next-20141118 that had a CLK_SET_RATE_PARENT in it from this patch: commit fc69ed70c16a31d6a77ec47a30a9fe941f763f1e Author: Jianqun Date: Tue Sep 30 11:12:04 2014 +0800 clk: rockchip: rk3288: i2s_frac adds flag to set parent's rate I agree that is is not necessary and maybe not desirable. Thanks again! > Heiko -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] clk: rockchip: rk3288 export i2s0_clkout for use in DT
On Wed, Nov 26, 2014 at 3:32 PM, Heiko Stübner he...@sntech.de wrote: Am Dienstag, 18. November 2014, 23:15:19 schrieb Sonny Rao: This exposes the clock that comes out of the i2s block which generally goes to the audio codec. Signed-off-by: Sonny Rao sonny...@chromium.org applied to my clk branch after removing the CLK_SET_RATE_PARENT Hi, sorry for the delay, and thanks for fixing it. I think when I applied the patch to next-20141118 that had a CLK_SET_RATE_PARENT in it from this patch: commit fc69ed70c16a31d6a77ec47a30a9fe941f763f1e Author: Jianqun jay...@rock-chips.com Date: Tue Sep 30 11:12:04 2014 +0800 clk: rockchip: rk3288: i2s_frac adds flag to set parent's rate I agree that is is not necessary and maybe not desirable. Thanks again! Heiko -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ARM: dts: rk3288: add arm,cpu-registers-not-fw-configured
This will enable use of physical arch timers on rk3288, where each core comes out of reset with a different virtual offset. Using physical timers will help with SMP booting on coreboot and older u-boot and should also allow suspend-resume and cpu-hotplug to work on all firmwares. Firmware which does initialize the cpu registers properly at boot and cpu-hotplug can remove this property from the device tree. Signed-off-by: Sonny Rao --- arch/arm/boot/dts/rk3288.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 0f50d5d..c861f52 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -139,6 +139,7 @@ timer { compatible = "arm,armv7-timer"; + arm,cpu-registers-not-fw-configured; interrupts = , , , -- 2.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] ARM: dts: rk3288: add arm,cpu-registers-not-fw-configured
This will enable use of physical arch timers on rk3288, where each core comes out of reset with a different virtual offset. Using physical timers will help with SMP booting on coreboot and older u-boot and should also allow suspend-resume and cpu-hotplug to work on all firmwares. Firmware which does initialize the cpu registers properly at boot and cpu-hotplug can remove this property from the device tree. Signed-off-by: Sonny Rao sonny...@chromium.org --- arch/arm/boot/dts/rk3288.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 0f50d5d..c861f52 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -139,6 +139,7 @@ timer { compatible = arm,armv7-timer; + arm,cpu-registers-not-fw-configured; interrupts = GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH), GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH), GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH), -- 2.1.2 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/