Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-05 Thread Moger, Babu
Hi James,

On 10/05/2018 11:20 AM, James Morse wrote:
> Hi Babu,
> 
> On 24/09/18 20:19, Moger, Babu wrote:
>> Enables QOS feature on AMD.
>> Following QoS sub-features are supported in AMD if the underlying
>> hardware supports it.
>>  - L3 Cache allocation enforcement
>>  - L3 Cache occupancy monitoring
>>  - L3 Code-Data Prioritization support
>>  - Memory Bandwidth Enforcement(Allocation)
>>
>> There are differences in the way some of the features are implemented.
>> Separate those functions and add those as vendor specific functions.
>> The major difference is in MBA feature.
>>  - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
>>  - AMD uses direct bandwidth value instead of delay based on bandwidth
>>values.
>>  - MSR register base addresses are different for MBA.
> 
>>  - Also AMD allows non-contiguous L3 cache bit masks.
> 
> Nice!
> 
> This is visible to user-space, the 'Cache Bit Masks (CBM)' section of
> Documentation/x86/intel_rdt_ui.txt currently says 'X86 hardware requires ... a
> contiguous block'.
> 
> Does user-space need to know it can do this in advance, or is it a 
> try-it-and-see?

It is try-it-and-see.
> 
> Arm's MPAM stuff can do this too, but I'm against having the ABI vary between
> architectures. If this is going to be discoverable, I'd like it to work on 
> Arm too.

It not discoverable at this point. Mostly predefined. Yes. It will be bit
of a challenge handle these differences. We may have to come up with some
kind of a flag(or something) to make it look similar on the ABI side.

> 
> 
> Thanks,
> 
> James
> 
>> Adds following functions to take care of the differences.
>> rdt_get_mem_config_amd : MBA initialization function
>> parse_bw_amd : Bandwidth parsing
>> mba_wrmsr_amd: Writes bandwidth value
>> cbm_validate_amd : L3 cache bitmask validation
> 
>> diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
>> b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> index 5a282b6c4bd7..1e4631f88696 100644
>> --- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> +++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> @@ -123,6 +169,41 @@ bool cbm_validate(char *buf, u32 *data, struct 
>> rdt_resource *r)
>>  return true;
>>  }
>>  
>> +/*
>> + * Check whether a cache bit mask is valid. AMD allows
>> + * non-contiguous masks.
>> + */
>> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
>> +{
>> +unsigned long first_bit, zero_bit, val;
>> +unsigned int cbm_len = r->cache.cbm_len;
>> +int ret;
>> +
>> +ret = kstrtoul(buf, 16, );
>> +if (ret) {
>> +rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
>> +return false;
>> +}
>> +
>> +if (val == 0 || val > r->default_ctrl) {
>> +rdt_last_cmd_puts("mask out of range\n");
>> +return false;
>> +}
>> +
>> +first_bit = find_first_bit(, cbm_len);
>> +zero_bit = find_next_zero_bit(, cbm_len, first_bit);
>> +
>> +
>> +if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
>> +rdt_last_cmd_printf("Need at least %d bits in mask\n",
>> +r->cache.min_cbm_bits);
>> +return false;
>> +}
>> +
>> +*data = val;
>> +return true;
>> +}
>> +
>>  struct rdt_cbm_parse_data {
>>  struct rdtgroup *rdtgrp;
>>  char*buf;
>>
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-05 Thread Moger, Babu
Hi James,

On 10/05/2018 11:20 AM, James Morse wrote:
> Hi Babu,
> 
> On 24/09/18 20:19, Moger, Babu wrote:
>> Enables QOS feature on AMD.
>> Following QoS sub-features are supported in AMD if the underlying
>> hardware supports it.
>>  - L3 Cache allocation enforcement
>>  - L3 Cache occupancy monitoring
>>  - L3 Code-Data Prioritization support
>>  - Memory Bandwidth Enforcement(Allocation)
>>
>> There are differences in the way some of the features are implemented.
>> Separate those functions and add those as vendor specific functions.
>> The major difference is in MBA feature.
>>  - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
>>  - AMD uses direct bandwidth value instead of delay based on bandwidth
>>values.
>>  - MSR register base addresses are different for MBA.
> 
>>  - Also AMD allows non-contiguous L3 cache bit masks.
> 
> Nice!
> 
> This is visible to user-space, the 'Cache Bit Masks (CBM)' section of
> Documentation/x86/intel_rdt_ui.txt currently says 'X86 hardware requires ... a
> contiguous block'.
> 
> Does user-space need to know it can do this in advance, or is it a 
> try-it-and-see?

It is try-it-and-see.
> 
> Arm's MPAM stuff can do this too, but I'm against having the ABI vary between
> architectures. If this is going to be discoverable, I'd like it to work on 
> Arm too.

It not discoverable at this point. Mostly predefined. Yes. It will be bit
of a challenge handle these differences. We may have to come up with some
kind of a flag(or something) to make it look similar on the ABI side.

> 
> 
> Thanks,
> 
> James
> 
>> Adds following functions to take care of the differences.
>> rdt_get_mem_config_amd : MBA initialization function
>> parse_bw_amd : Bandwidth parsing
>> mba_wrmsr_amd: Writes bandwidth value
>> cbm_validate_amd : L3 cache bitmask validation
> 
>> diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
>> b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> index 5a282b6c4bd7..1e4631f88696 100644
>> --- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> +++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
>> @@ -123,6 +169,41 @@ bool cbm_validate(char *buf, u32 *data, struct 
>> rdt_resource *r)
>>  return true;
>>  }
>>  
>> +/*
>> + * Check whether a cache bit mask is valid. AMD allows
>> + * non-contiguous masks.
>> + */
>> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
>> +{
>> +unsigned long first_bit, zero_bit, val;
>> +unsigned int cbm_len = r->cache.cbm_len;
>> +int ret;
>> +
>> +ret = kstrtoul(buf, 16, );
>> +if (ret) {
>> +rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
>> +return false;
>> +}
>> +
>> +if (val == 0 || val > r->default_ctrl) {
>> +rdt_last_cmd_puts("mask out of range\n");
>> +return false;
>> +}
>> +
>> +first_bit = find_first_bit(, cbm_len);
>> +zero_bit = find_next_zero_bit(, cbm_len, first_bit);
>> +
>> +
>> +if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
>> +rdt_last_cmd_printf("Need at least %d bits in mask\n",
>> +r->cache.min_cbm_bits);
>> +return false;
>> +}
>> +
>> +*data = val;
>> +return true;
>> +}
>> +
>>  struct rdt_cbm_parse_data {
>>  struct rdtgroup *rdtgrp;
>>  char*buf;
>>
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-05 Thread James Morse
Hi Babu,

On 24/09/18 20:19, Moger, Babu wrote:
> Enables QOS feature on AMD.
> Following QoS sub-features are supported in AMD if the underlying
> hardware supports it.
>  - L3 Cache allocation enforcement
>  - L3 Cache occupancy monitoring
>  - L3 Code-Data Prioritization support
>  - Memory Bandwidth Enforcement(Allocation)
> 
> There are differences in the way some of the features are implemented.
> Separate those functions and add those as vendor specific functions.
> The major difference is in MBA feature.
>  - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
>  - AMD uses direct bandwidth value instead of delay based on bandwidth
>values.
>  - MSR register base addresses are different for MBA.

>  - Also AMD allows non-contiguous L3 cache bit masks.

Nice!

This is visible to user-space, the 'Cache Bit Masks (CBM)' section of
Documentation/x86/intel_rdt_ui.txt currently says 'X86 hardware requires ... a
contiguous block'.

Does user-space need to know it can do this in advance, or is it a 
try-it-and-see?

Arm's MPAM stuff can do this too, but I'm against having the ABI vary between
architectures. If this is going to be discoverable, I'd like it to work on Arm 
too.


Thanks,

James

> Adds following functions to take care of the differences.
> rdt_get_mem_config_amd : MBA initialization function
> parse_bw_amd : Bandwidth parsing
> mba_wrmsr_amd: Writes bandwidth value
> cbm_validate_amd : L3 cache bitmask validation

> diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
> b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> index 5a282b6c4bd7..1e4631f88696 100644
> --- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> @@ -123,6 +169,41 @@ bool cbm_validate(char *buf, u32 *data, struct 
> rdt_resource *r)
>   return true;
>  }
>  
> +/*
> + * Check whether a cache bit mask is valid. AMD allows
> + * non-contiguous masks.
> + */
> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
> +{
> + unsigned long first_bit, zero_bit, val;
> + unsigned int cbm_len = r->cache.cbm_len;
> + int ret;
> +
> + ret = kstrtoul(buf, 16, );
> + if (ret) {
> + rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
> + return false;
> + }
> +
> + if (val == 0 || val > r->default_ctrl) {
> + rdt_last_cmd_puts("mask out of range\n");
> + return false;
> + }
> +
> + first_bit = find_first_bit(, cbm_len);
> + zero_bit = find_next_zero_bit(, cbm_len, first_bit);
> +
> +
> + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
> + rdt_last_cmd_printf("Need at least %d bits in mask\n",
> + r->cache.min_cbm_bits);
> + return false;
> + }
> +
> + *data = val;
> + return true;
> +}
> +
>  struct rdt_cbm_parse_data {
>   struct rdtgroup *rdtgrp;
>   char*buf;
> 



Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-05 Thread James Morse
Hi Babu,

On 24/09/18 20:19, Moger, Babu wrote:
> Enables QOS feature on AMD.
> Following QoS sub-features are supported in AMD if the underlying
> hardware supports it.
>  - L3 Cache allocation enforcement
>  - L3 Cache occupancy monitoring
>  - L3 Code-Data Prioritization support
>  - Memory Bandwidth Enforcement(Allocation)
> 
> There are differences in the way some of the features are implemented.
> Separate those functions and add those as vendor specific functions.
> The major difference is in MBA feature.
>  - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
>  - AMD uses direct bandwidth value instead of delay based on bandwidth
>values.
>  - MSR register base addresses are different for MBA.

>  - Also AMD allows non-contiguous L3 cache bit masks.

Nice!

This is visible to user-space, the 'Cache Bit Masks (CBM)' section of
Documentation/x86/intel_rdt_ui.txt currently says 'X86 hardware requires ... a
contiguous block'.

Does user-space need to know it can do this in advance, or is it a 
try-it-and-see?

Arm's MPAM stuff can do this too, but I'm against having the ABI vary between
architectures. If this is going to be discoverable, I'd like it to work on Arm 
too.


Thanks,

James

> Adds following functions to take care of the differences.
> rdt_get_mem_config_amd : MBA initialization function
> parse_bw_amd : Bandwidth parsing
> mba_wrmsr_amd: Writes bandwidth value
> cbm_validate_amd : L3 cache bitmask validation

> diff --git a/arch/x86/kernel/cpu/rdt_ctrlmondata.c 
> b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> index 5a282b6c4bd7..1e4631f88696 100644
> --- a/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/rdt_ctrlmondata.c
> @@ -123,6 +169,41 @@ bool cbm_validate(char *buf, u32 *data, struct 
> rdt_resource *r)
>   return true;
>  }
>  
> +/*
> + * Check whether a cache bit mask is valid. AMD allows
> + * non-contiguous masks.
> + */
> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
> +{
> + unsigned long first_bit, zero_bit, val;
> + unsigned int cbm_len = r->cache.cbm_len;
> + int ret;
> +
> + ret = kstrtoul(buf, 16, );
> + if (ret) {
> + rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
> + return false;
> + }
> +
> + if (val == 0 || val > r->default_ctrl) {
> + rdt_last_cmd_puts("mask out of range\n");
> + return false;
> + }
> +
> + first_bit = find_first_bit(, cbm_len);
> + zero_bit = find_next_zero_bit(, cbm_len, first_bit);
> +
> +
> + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
> + rdt_last_cmd_printf("Need at least %d bits in mask\n",
> + r->cache.min_cbm_bits);
> + return false;
> + }
> +
> + *data = val;
> + return true;
> +}
> +
>  struct rdt_cbm_parse_data {
>   struct rdtgroup *rdtgrp;
>   char*buf;
> 



Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-03 Thread Moger, Babu
Hi Reinette,

On 10/02/2018 05:13 PM, Reinette Chatre wrote:
> Hi Babu,
> 
> On 9/24/2018 12:19 PM, Moger, Babu wrote:
>> +/*
>> + * Check whether a cache bit mask is valid. AMD allows
>> + * non-contiguous masks.
>> + */
>> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
>> +{
>> +unsigned long first_bit, zero_bit, val;
>> +unsigned int cbm_len = r->cache.cbm_len;
>> +int ret;
>> +
>> +ret = kstrtoul(buf, 16, );
>> +if (ret) {
>> +rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
>> +return false;
>> +}
>> +
>> +if (val == 0 || val > r->default_ctrl) {
>> +rdt_last_cmd_puts("mask out of range\n");
>> +return false;
>> +}
> 
> According to
> https://www.amd.com/system/files/TechDocs/56375_Quality_of_Service_Extensions.pdf
> "If an L3_MASK_n register is programmed with all 0’s, that COS will be
> prevented from allocating any lines in the L3 cache."
> 
> The "val == 0" test thus does not seem necessary.

Yes. Good point. We don't need this test.
> 
>> +
>> +first_bit = find_first_bit(, cbm_len);
>> +zero_bit = find_next_zero_bit(, cbm_len, first_bit);
>> +
>> +
>> +if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
>> +rdt_last_cmd_printf("Need at least %d bits in mask\n",
>> +r->cache.min_cbm_bits);
>> +return false;
>> +}
> 
> If AMD platforms accept CBM of all zeroes then it seems that the
> platforms would not require a minimum number of set bits?

Yes. We don't need this check as well.  Tested and confirmed.
Thanks
> 
> Reinette
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-03 Thread Moger, Babu
Hi Reinette,

On 10/02/2018 05:13 PM, Reinette Chatre wrote:
> Hi Babu,
> 
> On 9/24/2018 12:19 PM, Moger, Babu wrote:
>> +/*
>> + * Check whether a cache bit mask is valid. AMD allows
>> + * non-contiguous masks.
>> + */
>> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
>> +{
>> +unsigned long first_bit, zero_bit, val;
>> +unsigned int cbm_len = r->cache.cbm_len;
>> +int ret;
>> +
>> +ret = kstrtoul(buf, 16, );
>> +if (ret) {
>> +rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
>> +return false;
>> +}
>> +
>> +if (val == 0 || val > r->default_ctrl) {
>> +rdt_last_cmd_puts("mask out of range\n");
>> +return false;
>> +}
> 
> According to
> https://www.amd.com/system/files/TechDocs/56375_Quality_of_Service_Extensions.pdf
> "If an L3_MASK_n register is programmed with all 0’s, that COS will be
> prevented from allocating any lines in the L3 cache."
> 
> The "val == 0" test thus does not seem necessary.

Yes. Good point. We don't need this test.
> 
>> +
>> +first_bit = find_first_bit(, cbm_len);
>> +zero_bit = find_next_zero_bit(, cbm_len, first_bit);
>> +
>> +
>> +if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
>> +rdt_last_cmd_printf("Need at least %d bits in mask\n",
>> +r->cache.min_cbm_bits);
>> +return false;
>> +}
> 
> If AMD platforms accept CBM of all zeroes then it seems that the
> platforms would not require a minimum number of set bits?

Yes. We don't need this check as well.  Tested and confirmed.
Thanks
> 
> Reinette
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-03 Thread Moger, Babu


On 10/02/2018 01:27 PM, Fenghua Yu wrote:
> On Mon, Sep 24, 2018 at 07:19:16PM +, Moger, Babu wrote:
>>  int parse_bw(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
>> +int parse_bw_amd(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
> 
> Please note the type of _buf in parse_bw() is changed in latest kernel
> to fix some issues. Please follow the same definition of parse_bw() in
> parse_bw_amd().

Yes, I noticed it. Saw the same issue(mba data parsing) and found it fixed
in 4-19-rc5. Will re-base to the latest kernel.
> 
> Thanks.
> 
> -Fenghua
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-03 Thread Moger, Babu


On 10/02/2018 01:27 PM, Fenghua Yu wrote:
> On Mon, Sep 24, 2018 at 07:19:16PM +, Moger, Babu wrote:
>>  int parse_bw(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
>> +int parse_bw_amd(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
> 
> Please note the type of _buf in parse_bw() is changed in latest kernel
> to fix some issues. Please follow the same definition of parse_bw() in
> parse_bw_amd().

Yes, I noticed it. Saw the same issue(mba data parsing) and found it fixed
in 4-19-rc5. Will re-base to the latest kernel.
> 
> Thanks.
> 
> -Fenghua
> 


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-02 Thread Reinette Chatre
Hi Babu,

On 9/24/2018 12:19 PM, Moger, Babu wrote:
> +/*
> + * Check whether a cache bit mask is valid. AMD allows
> + * non-contiguous masks.
> + */
> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
> +{
> + unsigned long first_bit, zero_bit, val;
> + unsigned int cbm_len = r->cache.cbm_len;
> + int ret;
> +
> + ret = kstrtoul(buf, 16, );
> + if (ret) {
> + rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
> + return false;
> + }
> +
> + if (val == 0 || val > r->default_ctrl) {
> + rdt_last_cmd_puts("mask out of range\n");
> + return false;
> + }

According to
https://www.amd.com/system/files/TechDocs/56375_Quality_of_Service_Extensions.pdf
"If an L3_MASK_n register is programmed with all 0’s, that COS will be
prevented from allocating any lines in the L3 cache."

The "val == 0" test thus does not seem necessary.

> +
> + first_bit = find_first_bit(, cbm_len);
> + zero_bit = find_next_zero_bit(, cbm_len, first_bit);
> +
> +
> + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
> + rdt_last_cmd_printf("Need at least %d bits in mask\n",
> + r->cache.min_cbm_bits);
> + return false;
> + }

If AMD platforms accept CBM of all zeroes then it seems that the
platforms would not require a minimum number of set bits?

Reinette


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-02 Thread Reinette Chatre
Hi Babu,

On 9/24/2018 12:19 PM, Moger, Babu wrote:
> +/*
> + * Check whether a cache bit mask is valid. AMD allows
> + * non-contiguous masks.
> + */
> +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
> +{
> + unsigned long first_bit, zero_bit, val;
> + unsigned int cbm_len = r->cache.cbm_len;
> + int ret;
> +
> + ret = kstrtoul(buf, 16, );
> + if (ret) {
> + rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
> + return false;
> + }
> +
> + if (val == 0 || val > r->default_ctrl) {
> + rdt_last_cmd_puts("mask out of range\n");
> + return false;
> + }

According to
https://www.amd.com/system/files/TechDocs/56375_Quality_of_Service_Extensions.pdf
"If an L3_MASK_n register is programmed with all 0’s, that COS will be
prevented from allocating any lines in the L3 cache."

The "val == 0" test thus does not seem necessary.

> +
> + first_bit = find_first_bit(, cbm_len);
> + zero_bit = find_next_zero_bit(, cbm_len, first_bit);
> +
> +
> + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
> + rdt_last_cmd_printf("Need at least %d bits in mask\n",
> + r->cache.min_cbm_bits);
> + return false;
> + }

If AMD platforms accept CBM of all zeroes then it seems that the
platforms would not require a minimum number of set bits?

Reinette


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-02 Thread Fenghua Yu
On Mon, Sep 24, 2018 at 07:19:16PM +, Moger, Babu wrote:
>  int parse_bw(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
> +int parse_bw_amd(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);

Please note the type of _buf in parse_bw() is changed in latest kernel
to fix some issues. Please follow the same definition of parse_bw() in
parse_bw_amd().

Thanks.

-Fenghua


Re: [RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-10-02 Thread Fenghua Yu
On Mon, Sep 24, 2018 at 07:19:16PM +, Moger, Babu wrote:
>  int parse_bw(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);
> +int parse_bw_amd(void *_buf, struct rdt_resource *r,  struct rdt_domain *d);

Please note the type of _buf in parse_bw() is changed in latest kernel
to fix some issues. Please follow the same definition of parse_bw() in
parse_bw_amd().

Thanks.

-Fenghua


[RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-09-24 Thread Moger, Babu
Enables QOS feature on AMD.
Following QoS sub-features are supported in AMD if the underlying
hardware supports it.
 - L3 Cache allocation enforcement
 - L3 Cache occupancy monitoring
 - L3 Code-Data Prioritization support
 - Memory Bandwidth Enforcement(Allocation)

There are differences in the way some of the features are implemented.
Separate those functions and add those as vendor specific functions.
The major difference is in MBA feature.
 - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
 - AMD uses direct bandwidth value instead of delay based on bandwidth
   values.
 - MSR register base addresses are different for MBA.
 - Also AMD allows non-contiguous L3 cache bit masks.

Adds following functions to take care of the differences.
rdt_get_mem_config_amd : MBA initialization function
parse_bw_amd : Bandwidth parsing
mba_wrmsr_amd: Writes bandwidth value
cbm_validate_amd : L3 cache bitmask validation

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 70 ++-
 arch/x86/kernel/cpu/rdt.h |  3 +
 arch/x86/kernel/cpu/rdt_ctrlmondata.c | 81 +++
 3 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index ae26b9b3fafa..21c17893cc0a 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -35,6 +35,7 @@
 
 #define MBA_IS_LINEAR  0x4
 #define MBA_MAX_MBPS   U32_MAX
+#define MAX_MBA_BW_AMD 0x800
 
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
@@ -63,6 +64,9 @@ static void
 mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
 static void
 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
 
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
@@ -282,6 +286,31 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return true;
 }
 
+static bool rdt_get_mem_config_amd(struct rdt_resource *r)
+{
+   union cpuid_0x10_3_eax eax;
+   union cpuid_0x10_x_edx edx;
+   u32 ebx, ecx;
+
+   cpuid_count(0x8020, 1, , , , );
+   r->num_closid = edx.split.cos_max + 1;
+   r->default_ctrl = MAX_MBA_BW_AMD;
+
+   /* AMD does not use delay. Set delay_linear to false by default */
+   r->membw.delay_linear = false;
+
+   /* FIX ME - May need to be read from MSR */
+   r->membw.min_bw = 0;
+   r->membw.bw_gran = 1;
+   /* Max value is 2048, Data width should be 4 in decimal */
+   r->data_width = 4;
+
+   r->alloc_capable = true;
+   r->alloc_enabled = true;
+
+   return true;
+}
+
 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
 {
union cpuid_0x10_1_eax eax;
@@ -341,6 +370,16 @@ static int get_cache_id(int cpu, int level)
return -1;
 }
 
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource 
*r)
+{
+   unsigned int i;
+
+   /*  Write the bw values for mba. */
+   for (i = m->low; i < m->high; i++)
+   wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+}
+
 /*
  * Map the memory b/w percentage value to delay values
  * that can be written to QOS_MSRs.
@@ -858,8 +897,12 @@ static __init void rdt_detect_l3_mon(void)
 
 static __init void rdt_check_mba(void)
 {
-   if (rdt_cpu_has(X86_FEATURE_MBA))
-   rdt_get_mem_config(_resources_all[RDT_RESOURCE_MBA]);
+   if (rdt_cpu_has(X86_FEATURE_MBA)) {
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+   
rdt_get_mem_config(_resources_all[RDT_RESOURCE_MBA]);
+   else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+   
rdt_get_mem_config_amd(_resources_all[RDT_RESOURCE_MBA]);
+   }
 }
 
 static __init void rdt_init_res_defs_intel(void)
@@ -884,10 +927,33 @@ static __init void rdt_init_res_defs_intel(void)
}
 }
 
+static __init void rdt_init_res_defs_amd(void)
+{
+   struct rdt_resource *r;
+
+   for_each_rdt_resource(r) {
+   if ((r->rid == RDT_RESOURCE_L3) ||
+   (r->rid == RDT_RESOURCE_L3DATA) ||
+   (r->rid == RDT_RESOURCE_L3CODE) ||
+   (r->rid == RDT_RESOURCE_L2) ||
+   (r->rid == RDT_RESOURCE_L2DATA) ||
+   (r->rid == RDT_RESOURCE_L2CODE))
+   r->cbm_validate = cbm_validate_amd;
+
+   else if (r->rid == RDT_RESOURCE_MBA) {
+r->msr_base = IA32_MBA_BW_BASE;
+r->msr_update = mba_wrmsr_amd;
+r->parse_ctrlval = parse_bw_amd;
+   }
+   }
+}
+
 static __init void rdt_init_res_defs(void)
 {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
rdt_init_res_defs_intel();
+   else if (boot_cpu_data.x86_vendor == 

[RFC PATCH 10/10] arch/x86: Introduce QOS feature for AMD

2018-09-24 Thread Moger, Babu
Enables QOS feature on AMD.
Following QoS sub-features are supported in AMD if the underlying
hardware supports it.
 - L3 Cache allocation enforcement
 - L3 Cache occupancy monitoring
 - L3 Code-Data Prioritization support
 - Memory Bandwidth Enforcement(Allocation)

There are differences in the way some of the features are implemented.
Separate those functions and add those as vendor specific functions.
The major difference is in MBA feature.
 - AMD uses CPUID leaf 0x8020 to initialize the MBA features.
 - AMD uses direct bandwidth value instead of delay based on bandwidth
   values.
 - MSR register base addresses are different for MBA.
 - Also AMD allows non-contiguous L3 cache bit masks.

Adds following functions to take care of the differences.
rdt_get_mem_config_amd : MBA initialization function
parse_bw_amd : Bandwidth parsing
mba_wrmsr_amd: Writes bandwidth value
cbm_validate_amd : L3 cache bitmask validation

Signed-off-by: Babu Moger 
---
 arch/x86/kernel/cpu/rdt.c | 70 ++-
 arch/x86/kernel/cpu/rdt.h |  3 +
 arch/x86/kernel/cpu/rdt_ctrlmondata.c | 81 +++
 3 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/rdt.c b/arch/x86/kernel/cpu/rdt.c
index ae26b9b3fafa..21c17893cc0a 100644
--- a/arch/x86/kernel/cpu/rdt.c
+++ b/arch/x86/kernel/cpu/rdt.c
@@ -35,6 +35,7 @@
 
 #define MBA_IS_LINEAR  0x4
 #define MBA_MAX_MBPS   U32_MAX
+#define MAX_MBA_BW_AMD 0x800
 
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
@@ -63,6 +64,9 @@ static void
 mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
 static void
 cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
 
 #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
 
@@ -282,6 +286,31 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return true;
 }
 
+static bool rdt_get_mem_config_amd(struct rdt_resource *r)
+{
+   union cpuid_0x10_3_eax eax;
+   union cpuid_0x10_x_edx edx;
+   u32 ebx, ecx;
+
+   cpuid_count(0x8020, 1, , , , );
+   r->num_closid = edx.split.cos_max + 1;
+   r->default_ctrl = MAX_MBA_BW_AMD;
+
+   /* AMD does not use delay. Set delay_linear to false by default */
+   r->membw.delay_linear = false;
+
+   /* FIX ME - May need to be read from MSR */
+   r->membw.min_bw = 0;
+   r->membw.bw_gran = 1;
+   /* Max value is 2048, Data width should be 4 in decimal */
+   r->data_width = 4;
+
+   r->alloc_capable = true;
+   r->alloc_enabled = true;
+
+   return true;
+}
+
 static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
 {
union cpuid_0x10_1_eax eax;
@@ -341,6 +370,16 @@ static int get_cache_id(int cpu, int level)
return -1;
 }
 
+static void
+mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource 
*r)
+{
+   unsigned int i;
+
+   /*  Write the bw values for mba. */
+   for (i = m->low; i < m->high; i++)
+   wrmsrl(r->msr_base + i, d->ctrl_val[i]);
+}
+
 /*
  * Map the memory b/w percentage value to delay values
  * that can be written to QOS_MSRs.
@@ -858,8 +897,12 @@ static __init void rdt_detect_l3_mon(void)
 
 static __init void rdt_check_mba(void)
 {
-   if (rdt_cpu_has(X86_FEATURE_MBA))
-   rdt_get_mem_config(_resources_all[RDT_RESOURCE_MBA]);
+   if (rdt_cpu_has(X86_FEATURE_MBA)) {
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+   
rdt_get_mem_config(_resources_all[RDT_RESOURCE_MBA]);
+   else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+   
rdt_get_mem_config_amd(_resources_all[RDT_RESOURCE_MBA]);
+   }
 }
 
 static __init void rdt_init_res_defs_intel(void)
@@ -884,10 +927,33 @@ static __init void rdt_init_res_defs_intel(void)
}
 }
 
+static __init void rdt_init_res_defs_amd(void)
+{
+   struct rdt_resource *r;
+
+   for_each_rdt_resource(r) {
+   if ((r->rid == RDT_RESOURCE_L3) ||
+   (r->rid == RDT_RESOURCE_L3DATA) ||
+   (r->rid == RDT_RESOURCE_L3CODE) ||
+   (r->rid == RDT_RESOURCE_L2) ||
+   (r->rid == RDT_RESOURCE_L2DATA) ||
+   (r->rid == RDT_RESOURCE_L2CODE))
+   r->cbm_validate = cbm_validate_amd;
+
+   else if (r->rid == RDT_RESOURCE_MBA) {
+r->msr_base = IA32_MBA_BW_BASE;
+r->msr_update = mba_wrmsr_amd;
+r->parse_ctrlval = parse_bw_amd;
+   }
+   }
+}
+
 static __init void rdt_init_res_defs(void)
 {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
rdt_init_res_defs_intel();
+   else if (boot_cpu_data.x86_vendor ==