from:"Dennis Chen"

[PATCH v2 1/2] HID: multitouch: enable Surface 3 Type Cover Pro to report multitouch data

2017-01-19 Thread Dennis Chen

Nearly identical to the previous set of patches related to Microsoft
Surface Keyboards.

Removes Surface Pro 3 generation TypeCover support from hid-microsoft
so proper multitouch data can be reported from the touchpad.

Signed-off-by: Dennis Chen <barracks...@gmail.com>
---
 drivers/hid/hid-core.c  | 8 +---
 drivers/hid/hid-ids.h   | 3 ---
 drivers/hid/hid-microsoft.c | 6 --
 drivers/hid/usbhid/hid-quirks.c | 3 ---
 4 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index d173e7f..3d78f2a 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -724,10 +724,7 @@ static void hid_scan_collection(struct hid_parser *parser, 
unsigned type)
hid->group = HID_GROUP_SENSOR_HUB;
 
if (hid->vendor == USB_VENDOR_ID_MICROSOFT &&
-   (hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP ||
-hid->product == USB_DEVICE_ID_MS_POWER_COVER) &&
+   hid->product == USB_DEVICE_ID_MS_POWER_COVER &&
hid->group == HID_GROUP_MULTITOUCH)
hid->group = HID_GROUP_GENERIC;
 
@@ -1982,9 +1979,6 @@ static const struct hid_device_id 
hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 87486ae..12f00a6 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -722,9 +722,6 @@
 #define USB_DEVICE_ID_MS_SURFACE_PRO_2   0x0799
 #define USB_DEVICE_ID_MS_TOUCH_COVER_2   0x07a7
 #define USB_DEVICE_ID_MS_TYPE_COVER_20x07a9
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_30x07dc
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2  0x07e2
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP 0x07dd
 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da
 
 #define USB_VENDOR_ID_MOJO 0x8282
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index d856726..96e7d32 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -274,12 +274,6 @@ static const struct hid_device_id ms_devices[] = {
.driver_data = MS_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
.driver_data = MS_DUPLICATE_USAGES },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP),
-   .driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER),
.driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_KEYBOARD),
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index f6432c5..a1e5dc3 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -102,9 +102,6 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_NEXIO, USB_DEVICE_ID_NEXIO_MULTITOUCH_PTI0750, 
HID_QUIRK_NO_INIT_REPORTS },
-- 
2.7.4

[PATCH v2 1/2] HID: multitouch: enable Surface 3 Type Cover Pro to report multitouch data

2017-01-19 Thread Dennis Chen

Nearly identical to the previous set of patches related to Microsoft
Surface Keyboards.

Removes Surface Pro 3 generation TypeCover support from hid-microsoft
so proper multitouch data can be reported from the touchpad.

Signed-off-by: Dennis Chen 
---
 drivers/hid/hid-core.c  | 8 +---
 drivers/hid/hid-ids.h   | 3 ---
 drivers/hid/hid-microsoft.c | 6 --
 drivers/hid/usbhid/hid-quirks.c | 3 ---
 4 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index d173e7f..3d78f2a 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -724,10 +724,7 @@ static void hid_scan_collection(struct hid_parser *parser, 
unsigned type)
hid->group = HID_GROUP_SENSOR_HUB;
 
if (hid->vendor == USB_VENDOR_ID_MICROSOFT &&
-   (hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP ||
-hid->product == USB_DEVICE_ID_MS_POWER_COVER) &&
+   hid->product == USB_DEVICE_ID_MS_POWER_COVER &&
hid->group == HID_GROUP_MULTITOUCH)
hid->group = HID_GROUP_GENERIC;
 
@@ -1982,9 +1979,6 @@ static const struct hid_device_id 
hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 87486ae..12f00a6 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -722,9 +722,6 @@
 #define USB_DEVICE_ID_MS_SURFACE_PRO_2   0x0799
 #define USB_DEVICE_ID_MS_TOUCH_COVER_2   0x07a7
 #define USB_DEVICE_ID_MS_TYPE_COVER_20x07a9
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_30x07dc
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2  0x07e2
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP 0x07dd
 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da
 
 #define USB_VENDOR_ID_MOJO 0x8282
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index d856726..96e7d32 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -274,12 +274,6 @@ static const struct hid_device_id ms_devices[] = {
.driver_data = MS_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
.driver_data = MS_DUPLICATE_USAGES },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP),
-   .driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER),
.driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_KEYBOARD),
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index f6432c5..a1e5dc3 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -102,9 +102,6 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_NEXIO, USB_DEVICE_ID_NEXIO_MULTITOUCH_PTI0750, 
HID_QUIRK_NO_INIT_REPORTS },
-- 
2.7.4

[PATCH v2 2/2] HID: whitespace cleanup

2017-01-19 Thread Dennis Chen

Removes trailing whitespace in hid-core.c and usbhid/hid-quirks.c

Signed-off-by: Dennis Chen <barracks...@gmail.com>
---
 drivers/hid/hid-core.c  | 2 +-
 drivers/hid/usbhid/hid-quirks.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3d78f2a..52863e7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2302,7 +2302,7 @@ __ATTRIBUTE_GROUPS(hid_dev);
 
 static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-   struct hid_device *hdev = to_hid_device(dev);   
+   struct hid_device *hdev = to_hid_device(dev);
 
if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
hdev->bus, hdev->vendor, hdev->product))
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index a1e5dc3..80df173 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -290,7 +290,7 @@ static void usbhid_remove_all_dquirks(void)
 
 }
 
-/** 
+/**
  * usbhid_quirks_init: apply USB HID quirks specified at module load time
  */
 int usbhid_quirks_init(char **quirks_param)
@@ -354,7 +354,7 @@ static const struct hid_blacklist 
*usbhid_exists_squirk(const u16 idVendor,
 
if (bl_entry != NULL)
dbg_hid("Found squirk 0x%x for USB HID vendor 0x%hx prod 
0x%hx\n",
-   bl_entry->quirks, bl_entry->idVendor, 
+   bl_entry->quirks, bl_entry->idVendor,
bl_entry->idProduct);
return bl_entry;
 }
-- 
2.7.4

[PATCH v2 2/2] HID: whitespace cleanup

2017-01-19 Thread Dennis Chen

Removes trailing whitespace in hid-core.c and usbhid/hid-quirks.c

Signed-off-by: Dennis Chen 
---
 drivers/hid/hid-core.c  | 2 +-
 drivers/hid/usbhid/hid-quirks.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3d78f2a..52863e7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2302,7 +2302,7 @@ __ATTRIBUTE_GROUPS(hid_dev);
 
 static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-   struct hid_device *hdev = to_hid_device(dev);   
+   struct hid_device *hdev = to_hid_device(dev);
 
if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
hdev->bus, hdev->vendor, hdev->product))
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index a1e5dc3..80df173 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -290,7 +290,7 @@ static void usbhid_remove_all_dquirks(void)
 
 }
 
-/** 
+/**
  * usbhid_quirks_init: apply USB HID quirks specified at module load time
  */
 int usbhid_quirks_init(char **quirks_param)
@@ -354,7 +354,7 @@ static const struct hid_blacklist 
*usbhid_exists_squirk(const u16 idVendor,
 
if (bl_entry != NULL)
dbg_hid("Found squirk 0x%x for USB HID vendor 0x%hx prod 
0x%hx\n",
-   bl_entry->quirks, bl_entry->idVendor, 
+   bl_entry->quirks, bl_entry->idVendor,
bl_entry->idProduct);
return bl_entry;
 }
-- 
2.7.4

[PATCH] HID: multitouch: enable Surface 3 Type Cover Pro to report multitouch data

2017-01-19 Thread Dennis Chen

Hi,

This should be the last related patch for the Surface Pro
Type Covers.

Sincerely,
Dennis Chen

>8--8<

Nearly identical to the previous set of patches related to Microsoft
Surface Keyboards.

Removes Surface Pro 3 generation TypeCover support from hid-microsoft
so proper multitouch data can be reported from the touchpad.

Signed-off-by: Dennis Chen <barracks...@gmail.com>
---
 drivers/hid/hid-core.c  | 10 ++
 drivers/hid/hid-ids.h   |  3 ---
 drivers/hid/hid-microsoft.c |  6 --
 drivers/hid/usbhid/hid-quirks.c |  7 ++-
 4 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index d173e7f..52863e7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -724,10 +724,7 @@ static void hid_scan_collection(struct hid_parser *parser, 
unsigned type)
hid->group = HID_GROUP_SENSOR_HUB;
 
if (hid->vendor == USB_VENDOR_ID_MICROSOFT &&
-   (hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP ||
-hid->product == USB_DEVICE_ID_MS_POWER_COVER) &&
+   hid->product == USB_DEVICE_ID_MS_POWER_COVER &&
hid->group == HID_GROUP_MULTITOUCH)
hid->group = HID_GROUP_GENERIC;
 
@@ -1982,9 +1979,6 @@ static const struct hid_device_id 
hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
@@ -2308,7 +2302,7 @@ __ATTRIBUTE_GROUPS(hid_dev);
 
 static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-   struct hid_device *hdev = to_hid_device(dev);   
+   struct hid_device *hdev = to_hid_device(dev);
 
if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
hdev->bus, hdev->vendor, hdev->product))
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 87486ae..12f00a6 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -722,9 +722,6 @@
 #define USB_DEVICE_ID_MS_SURFACE_PRO_2   0x0799
 #define USB_DEVICE_ID_MS_TOUCH_COVER_2   0x07a7
 #define USB_DEVICE_ID_MS_TYPE_COVER_20x07a9
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_30x07dc
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2  0x07e2
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP 0x07dd
 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da
 
 #define USB_VENDOR_ID_MOJO 0x8282
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index d856726..96e7d32 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -274,12 +274,6 @@ static const struct hid_device_id ms_devices[] = {
.driver_data = MS_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
.driver_data = MS_DUPLICATE_USAGES },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP),
-   .driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER),
.driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_KEYBOARD),
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index f6432c5..80df173 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -102,9 +102,6 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS

[PATCH] HID: multitouch: enable Surface 3 Type Cover Pro to report multitouch data

2017-01-19 Thread Dennis Chen

Hi,

This should be the last related patch for the Surface Pro
Type Covers.

Sincerely,
Dennis Chen

>8--8<

Nearly identical to the previous set of patches related to Microsoft
Surface Keyboards.

Removes Surface Pro 3 generation TypeCover support from hid-microsoft
so proper multitouch data can be reported from the touchpad.

Signed-off-by: Dennis Chen 
---
 drivers/hid/hid-core.c  | 10 ++
 drivers/hid/hid-ids.h   |  3 ---
 drivers/hid/hid-microsoft.c |  6 --
 drivers/hid/usbhid/hid-quirks.c |  7 ++-
 4 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index d173e7f..52863e7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -724,10 +724,7 @@ static void hid_scan_collection(struct hid_parser *parser, 
unsigned type)
hid->group = HID_GROUP_SENSOR_HUB;
 
if (hid->vendor == USB_VENDOR_ID_MICROSOFT &&
-   (hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2 ||
-hid->product == USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP ||
-hid->product == USB_DEVICE_ID_MS_POWER_COVER) &&
+   hid->product == USB_DEVICE_ID_MS_POWER_COVER &&
hid->group == HID_GROUP_MULTITOUCH)
hid->group = HID_GROUP_GENERIC;
 
@@ -1982,9 +1979,6 @@ static const struct hid_device_id 
hid_have_special_driver[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2) },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_600) },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1) },
@@ -2308,7 +2302,7 @@ __ATTRIBUTE_GROUPS(hid_dev);
 
 static int hid_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-   struct hid_device *hdev = to_hid_device(dev);   
+   struct hid_device *hdev = to_hid_device(dev);
 
if (add_uevent_var(env, "HID_ID=%04X:%08X:%08X",
hdev->bus, hdev->vendor, hdev->product))
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 87486ae..12f00a6 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -722,9 +722,6 @@
 #define USB_DEVICE_ID_MS_SURFACE_PRO_2   0x0799
 #define USB_DEVICE_ID_MS_TOUCH_COVER_2   0x07a7
 #define USB_DEVICE_ID_MS_TYPE_COVER_20x07a9
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_30x07dc
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2  0x07e2
-#define USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP 0x07dd
 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da
 
 #define USB_VENDOR_ID_MOJO 0x8282
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index d856726..96e7d32 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -274,12 +274,6 @@ static const struct hid_device_id ms_devices[] = {
.driver_data = MS_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_MOUSE_4500),
.driver_data = MS_DUPLICATE_USAGES },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_2),
-   .driver_data = MS_HIDINPUT },
-   { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_TYPE_COVER_PRO_3_JP),
-   .driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER),
.driver_data = MS_HIDINPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, 
USB_DEVICE_ID_MS_COMFORT_KEYBOARD),
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index f6432c5..80df173 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -102,9 +102,6 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2, 
HID_QUIRK_NO_INIT_REPORTS },
-   { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_PRO_3, 
HID_QUIRK_NO_INIT_REPOR

Re: [PATCH V10 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-09-08 Thread Dennis Chen

Hi Tomasz,

On Tue, Sep 06, 2016 at 11:08:51AM +0200, Tomasz Nowicki wrote:
[...]
> +static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
> +u32 *rid_out)
> +{
> + /* Single mapping does not care for input id */
> + if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
> + if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
> + type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
> + *rid_out = map->output_base;
> + return 0;
> + }
> +
> + pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for 
> node type %d, skipping ID map\n",
> + map, type);
> + return -ENXIO;
> + }
> +
> + if (rid_in < map->input_base ||
> + (rid_in >= map->input_base + map->id_count))
> + return -ENXIO;
> +
> + *rid_out = map->output_base + (rid_in - map->input_base);
> + return 0;
> +}
> +
> +static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
> + u32 rid_in, u32 *rid_out,
> + u8 type)
> +{
> + u32 rid = rid_in;
> +
> + /* Parse the ID mapping tree to find specified node type */
> + while (node) {
> + struct acpi_iort_id_mapping *map;
> + int i;
> +
> + if (node->type == type) {
> + if (rid_out)
> + *rid_out = rid;
> + return node;
> + }
> +
> + if (!node->mapping_offset || !node->mapping_count)
> + goto fail_map;
If node->mapping_count is zero, then node->mapping_offset must be zero. A 
firmware bug
otherwise?
> +
> + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
> +node->mapping_offset);
> +
> + /* Firmware bug! */
> + if (!map->output_reference) {
> + pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
> reference\n",
> +node, node->type);
> + goto fail_map;
> + }
> +
> + /* Do the RID translation */
> + for (i = 0; i < node->mapping_count; i++, map++) {
> + if (!iort_id_map(map, node->type, rid, ))
> + break;
> + }
>
Just curious about if there is kind of possibility that we can get some 
reduplicated DeviceIDs
with a deliberate ID mapping design in FW for the SMMU/RC node. For instance, 
for a system with 2
PCIe RCs both behind an individual SMMU device, how to make sure the StreamID 
mapped is unique 
across the entire system, the same for DeviceID mapped. 
Anyway, this is my personal confusion, maybe it's not a problem at all for 
current design ;-)

Thanks,
Dennis
> +
> + if (i == node->mapping_count)
> + goto fail_map;
> +
> + node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
> + map->output_reference);
> + }
> +
> +fail_map:
> + /* Map input RID to output RID unchanged on mapping failure*/
> + if (rid_out)
> + *rid_out = rid_in;
> +
> + return NULL;
> +}
> +
> +static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
> +{
> + struct pci_bus *pbus;
> +
> + if (!dev_is_pci(dev))
> + return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
> +   iort_match_node_callback, dev);
> +
> + /* Find a PCI root bus */
> + pbus = to_pci_dev(dev)->bus;
> + while (!pci_is_root_bus(pbus))
> + pbus = pbus->parent;
> +
> + return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
> +   iort_match_node_callback, >dev);
> +}
> +
> +void __init acpi_iort_init(void)
> +{
> + acpi_status status;
> +
> + status = acpi_get_table(ACPI_SIG_IORT, 0, _table);
> + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
> + const char *msg = acpi_format_exception(status);
> + pr_err("Failed to get table, %s\n", msg);
> + }
> +}
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 85b7d07..e56e643 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -36,6 +36,7 @@
>  #ifdef CONFIG_X86
>  #include 
>  #endif
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1186,6 +1187,7 @@ static int __init acpi_init(void)
>   }
>  
>   pci_mmcfg_late_init();
> + acpi_iort_init();
>   acpi_scan_init();
>   acpi_ec_init();
>   acpi_debugfs_init();
> diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
> new file mode 100644
> index 000..fcacaf7
> --- /dev/null
> +++ b/include/linux/acpi_iort.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright (C) 2016, Semihalf
> + *   Author: Tomasz Nowicki 
> + *
> + * This program is free software;

Re: [PATCH V10 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-09-08 Thread Dennis Chen

Hi Tomasz,

On Tue, Sep 06, 2016 at 11:08:51AM +0200, Tomasz Nowicki wrote:
[...]
> +static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
> +u32 *rid_out)
> +{
> + /* Single mapping does not care for input id */
> + if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
> + if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
> + type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
> + *rid_out = map->output_base;
> + return 0;
> + }
> +
> + pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for 
> node type %d, skipping ID map\n",
> + map, type);
> + return -ENXIO;
> + }
> +
> + if (rid_in < map->input_base ||
> + (rid_in >= map->input_base + map->id_count))
> + return -ENXIO;
> +
> + *rid_out = map->output_base + (rid_in - map->input_base);
> + return 0;
> +}
> +
> +static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
> + u32 rid_in, u32 *rid_out,
> + u8 type)
> +{
> + u32 rid = rid_in;
> +
> + /* Parse the ID mapping tree to find specified node type */
> + while (node) {
> + struct acpi_iort_id_mapping *map;
> + int i;
> +
> + if (node->type == type) {
> + if (rid_out)
> + *rid_out = rid;
> + return node;
> + }
> +
> + if (!node->mapping_offset || !node->mapping_count)
> + goto fail_map;
If node->mapping_count is zero, then node->mapping_offset must be zero. A 
firmware bug
otherwise?
> +
> + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
> +node->mapping_offset);
> +
> + /* Firmware bug! */
> + if (!map->output_reference) {
> + pr_err(FW_BUG "[node %p type %d] ID map has NULL parent 
> reference\n",
> +node, node->type);
> + goto fail_map;
> + }
> +
> + /* Do the RID translation */
> + for (i = 0; i < node->mapping_count; i++, map++) {
> + if (!iort_id_map(map, node->type, rid, ))
> + break;
> + }
>
Just curious about if there is kind of possibility that we can get some 
reduplicated DeviceIDs
with a deliberate ID mapping design in FW for the SMMU/RC node. For instance, 
for a system with 2
PCIe RCs both behind an individual SMMU device, how to make sure the StreamID 
mapped is unique 
across the entire system, the same for DeviceID mapped. 
Anyway, this is my personal confusion, maybe it's not a problem at all for 
current design ;-)

Thanks,
Dennis
> +
> + if (i == node->mapping_count)
> + goto fail_map;
> +
> + node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
> + map->output_reference);
> + }
> +
> +fail_map:
> + /* Map input RID to output RID unchanged on mapping failure*/
> + if (rid_out)
> + *rid_out = rid_in;
> +
> + return NULL;
> +}
> +
> +static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
> +{
> + struct pci_bus *pbus;
> +
> + if (!dev_is_pci(dev))
> + return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
> +   iort_match_node_callback, dev);
> +
> + /* Find a PCI root bus */
> + pbus = to_pci_dev(dev)->bus;
> + while (!pci_is_root_bus(pbus))
> + pbus = pbus->parent;
> +
> + return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
> +   iort_match_node_callback, >dev);
> +}
> +
> +void __init acpi_iort_init(void)
> +{
> + acpi_status status;
> +
> + status = acpi_get_table(ACPI_SIG_IORT, 0, _table);
> + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
> + const char *msg = acpi_format_exception(status);
> + pr_err("Failed to get table, %s\n", msg);
> + }
> +}
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 85b7d07..e56e643 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -36,6 +36,7 @@
>  #ifdef CONFIG_X86
>  #include 
>  #endif
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -1186,6 +1187,7 @@ static int __init acpi_init(void)
>   }
>  
>   pci_mmcfg_late_init();
> + acpi_iort_init();
>   acpi_scan_init();
>   acpi_ec_init();
>   acpi_debugfs_init();
> diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
> new file mode 100644
> index 000..fcacaf7
> --- /dev/null
> +++ b/include/linux/acpi_iort.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright (C) 2016, Semihalf
> + *   Author: Tomasz Nowicki 
> + *
> + * This program is free software; you can

Re: [PATCH V8 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-08-18 Thread Dennis Chen

On Thu, Aug 18, 2016 at 12:14:20PM +0100, Lorenzo Pieralisi wrote:
> On Thu, Aug 18, 2016 at 06:55:50PM +0800, Dennis Chen wrote:
> 
> [...]
> 
> > > +static struct acpi_iort_node *
> > > +iort_scan_node(enum acpi_iort_node_type type,
> > > +iort_find_node_callback callback, void *context)
> > > +{
> > > + struct acpi_iort_node *iort_node, *iort_end;
> > > + struct acpi_table_iort *iort;
> > > + int i;
> > > +
> > > + /* Get the first IORT node */
> > > + iort = (struct acpi_table_iort *)iort_table;
> > >
> > Here, the same as I comments on Lorenzo's patch. If IORT is missed in
> > the firmware, then iort_table will be NULL, result in kernel panic in
> > the codes followed.
> 
> The pointer is checked in the functions that are visible to
> other compilation units before calling this function:
> 
> iort_msi_map_rid()
> iort_get_device_domain()
> 
> I missed to check it when I added iort_node_match() and
> iort_iommu_configure() though, which makes me think that it is probably
> better to move the iort_table pointer check to iort_scan_node() so that
> it is done in one single place instead of adding it to all given
> external interfaces.
>
Agree to check this in iort_scan_node(). Also, given some duplicate codes 
between
iort_scan_node() and iort_smmu_init(), so maybe we can think about to refactor
iort_match_callback() as the way to implement the function of iort_smmu_int(),
thus we can call iort_node_match(ACPI_IORT_NODE_SMMU_V3)...

Thanks,
Dennis
> 
> Lorenzo
> 
> > 
> > Thanks,
> > Dennis
> > > + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort,
> > > +  iort->node_offset);
> > > + iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
> > > + iort_table->length);
> > > +
> > > + for (i = 0; i < iort->node_count; i++) {
> > > + if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND,
> > > +"IORT node pointer overflows, bad table!\n"))
> > > + return NULL;
> > > +
> > > + if (iort_node->type == type) {
> > > + if (ACPI_SUCCESS(callback(iort_node, context)))
> > > + return iort_node;
> > > + }
> > > +
> > > + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
> > > +  iort_node->length);
> > > + }
> > > +
> > > + return NULL;
> > > +}
> > > +
> > > +static acpi_status
> > > +iort_match_node_callback(struct acpi_iort_node *node, void *context)
> > > +{
> > > + struct device *dev = context;
> > > +
> > > + switch (node->type) {
> > > + case ACPI_IORT_NODE_NAMED_COMPONENT: {
> > > + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> > > + struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
> > > + struct acpi_iort_named_component *ncomp;
> > > +
> > > + if (!adev)
> > > + break;
> > > +
> > > + ncomp = (struct acpi_iort_named_component *)node->node_data;
> > > +
> > > + if (ACPI_FAILURE(acpi_get_name(adev->handle,
> > > +ACPI_FULL_PATHNAME, ))) {
> > > + dev_warn(dev, "Can't get device full path name\n");
> > > + } else {
> > > + int match;
> > > +
> > > + match = !strcmp(ncomp->device_name, buffer.pointer);
> > > + acpi_os_free(buffer.pointer);
> > > +
> > > + if (match)
> > > + return AE_OK;
> > > + }
> > > +
> > > + break;
> > > + }
> > > + case ACPI_IORT_NODE_PCI_ROOT_COMPLEX: {
> > > + struct acpi_iort_root_complex *pci_rc;
> > > + struct pci_bus *bus;
> > > +
> > > + bus = to_pci_bus(dev);
> > > + pci_rc = (struct acpi_iort_root_complex *)node->node_data;
> > > +
> > > + /*
> > > +  * It is assumed that PCI segment numbers maps one-to-one
> > > +  * with root complexes. Each segment number can represent only
> > > +  * one root complex.
> > > +  */
> > > + if (pci_rc->pci_segment_number == pci_domain_nr(bus))
>

Re: [PATCH V8 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-08-18 Thread Dennis Chen

On Thu, Aug 18, 2016 at 12:14:20PM +0100, Lorenzo Pieralisi wrote:
> On Thu, Aug 18, 2016 at 06:55:50PM +0800, Dennis Chen wrote:
> 
> [...]
> 
> > > +static struct acpi_iort_node *
> > > +iort_scan_node(enum acpi_iort_node_type type,
> > > +iort_find_node_callback callback, void *context)
> > > +{
> > > + struct acpi_iort_node *iort_node, *iort_end;
> > > + struct acpi_table_iort *iort;
> > > + int i;
> > > +
> > > + /* Get the first IORT node */
> > > + iort = (struct acpi_table_iort *)iort_table;
> > >
> > Here, the same as I comments on Lorenzo's patch. If IORT is missed in
> > the firmware, then iort_table will be NULL, result in kernel panic in
> > the codes followed.
> 
> The pointer is checked in the functions that are visible to
> other compilation units before calling this function:
> 
> iort_msi_map_rid()
> iort_get_device_domain()
> 
> I missed to check it when I added iort_node_match() and
> iort_iommu_configure() though, which makes me think that it is probably
> better to move the iort_table pointer check to iort_scan_node() so that
> it is done in one single place instead of adding it to all given
> external interfaces.
>
Agree to check this in iort_scan_node(). Also, given some duplicate codes 
between
iort_scan_node() and iort_smmu_init(), so maybe we can think about to refactor
iort_match_callback() as the way to implement the function of iort_smmu_int(),
thus we can call iort_node_match(ACPI_IORT_NODE_SMMU_V3)...

Thanks,
Dennis
> 
> Lorenzo
> 
> > 
> > Thanks,
> > Dennis
> > > + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort,
> > > +  iort->node_offset);
> > > + iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
> > > + iort_table->length);
> > > +
> > > + for (i = 0; i < iort->node_count; i++) {
> > > + if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND,
> > > +"IORT node pointer overflows, bad table!\n"))
> > > + return NULL;
> > > +
> > > + if (iort_node->type == type) {
> > > + if (ACPI_SUCCESS(callback(iort_node, context)))
> > > + return iort_node;
> > > + }
> > > +
> > > + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
> > > +  iort_node->length);
> > > + }
> > > +
> > > + return NULL;
> > > +}
> > > +
> > > +static acpi_status
> > > +iort_match_node_callback(struct acpi_iort_node *node, void *context)
> > > +{
> > > + struct device *dev = context;
> > > +
> > > + switch (node->type) {
> > > + case ACPI_IORT_NODE_NAMED_COMPONENT: {
> > > + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
> > > + struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
> > > + struct acpi_iort_named_component *ncomp;
> > > +
> > > + if (!adev)
> > > + break;
> > > +
> > > + ncomp = (struct acpi_iort_named_component *)node->node_data;
> > > +
> > > + if (ACPI_FAILURE(acpi_get_name(adev->handle,
> > > +ACPI_FULL_PATHNAME, ))) {
> > > + dev_warn(dev, "Can't get device full path name\n");
> > > + } else {
> > > + int match;
> > > +
> > > + match = !strcmp(ncomp->device_name, buffer.pointer);
> > > + acpi_os_free(buffer.pointer);
> > > +
> > > + if (match)
> > > + return AE_OK;
> > > + }
> > > +
> > > + break;
> > > + }
> > > + case ACPI_IORT_NODE_PCI_ROOT_COMPLEX: {
> > > + struct acpi_iort_root_complex *pci_rc;
> > > + struct pci_bus *bus;
> > > +
> > > + bus = to_pci_bus(dev);
> > > + pci_rc = (struct acpi_iort_root_complex *)node->node_data;
> > > +
> > > + /*
> > > +  * It is assumed that PCI segment numbers maps one-to-one
> > > +  * with root complexes. Each segment number can represent only
> > > +  * one root complex.
> > > +  */
> > > + if (pci_rc->pci_segment_number == pci_domain_nr(bus))
>

Re: [PATCH V8 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-08-18 Thread Dennis Chen

Hi Tomasz,

On Thu, Aug 11, 2016 at 12:06:31PM +0200, Tomasz Nowicki wrote:
> IORT shows representation of IO topology for ARM based systems.
> It describes how various components are connected together on
> parent-child basis e.g. PCI RC -> SMMU -> ITS. Also see IORT spec.
> http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> 
> Initial support allows to detect IORT table presence and save its
> root pointer obtained through acpi_get_table(). The pointer validity
> depends on acpi_gbl_permanent_mmap because if acpi_gbl_permanent_mmap
> is not set while using IORT nodes we would dereference unmapped pointers.
> 
> For the aforementioned reason call iort_table_detect() from acpi_init()
> which guarantees acpi_gbl_permanent_mmap to be set at that point.
> 
> Add generic helpers which are helpful for scanning and retrieving
> information from IORT table content. List of the most important helpers:
> - iort_find_dev_node() finds IORT node for a given device
> - iort_node_map_rid() maps device RID and returns IORT node which provides
>   final translation
> 
> IORT support is placed under drivers/acpi/arm64/ new directory due to its
> ARM64 specific nature. The code there is considered only for ARM64.
> The long term plan is to keep all ARM64 specific tables support
> in this place e.g. GTDT table.
> 
> Signed-off-by: Tomasz Nowicki 
> Reviewed-by: Hanjun Guo 
> ---
>  drivers/acpi/Kconfig|   5 +
>  drivers/acpi/Makefile   |   2 +
>  drivers/acpi/arm64/Kconfig  |   6 ++
>  drivers/acpi/arm64/Makefile |   1 +
>  drivers/acpi/arm64/iort.c   | 218 
> 
>  drivers/acpi/bus.c  |   2 +
>  include/linux/iort.h|  30 ++
>  7 files changed, 264 insertions(+)
>  create mode 100644 drivers/acpi/arm64/Kconfig
>  create mode 100644 drivers/acpi/arm64/Makefile
>  create mode 100644 drivers/acpi/arm64/iort.c
>  create mode 100644 include/linux/iort.h
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 445ce28..6cef2d1 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -521,4 +521,9 @@ config ACPI_CONFIGFS
> userspace. The configurable ACPI groups will be visible under
> /config/acpi, assuming configfs is mounted under /config.
>  
> +if ARM64
> +source "drivers/acpi/arm64/Kconfig"
> +
> +endif
> +
>  endif# ACPI
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 5ae9d85..e5ada78 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -105,3 +105,5 @@ obj-$(CONFIG_ACPI_CONFIGFS)   += acpi_configfs.o
>  
>  video-objs   += acpi_video.o video_detect.o
>  obj-y+= dptf/
> +
> +obj-$(CONFIG_ARM64)  += arm64/
> diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
> new file mode 100644
> index 000..fc818dc
> --- /dev/null
> +++ b/drivers/acpi/arm64/Kconfig
> @@ -0,0 +1,6 @@
> +#
> +# ACPI Configuration for ARM64
> +#
> +
> +config IORT_TABLE
> + bool
> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
> new file mode 100644
> index 000..d01be6f
> --- /dev/null
> +++ b/drivers/acpi/arm64/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_IORT_TABLE) += iort.o
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> new file mode 100644
> index 000..f89056e
> --- /dev/null
> +++ b/drivers/acpi/arm64/iort.c
> @@ -0,0 +1,218 @@
> +/*
> + * Copyright (C) 2016, Semihalf
> + *   Author: Tomasz Nowicki 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * This file implements early detection/parsing of I/O mapping
> + * reported to OS through firmware via I/O Remapping Table (IORT)
> + * IORT document number: ARM DEN 0049A
> + */
> +
> +#define pr_fmt(fmt)  "ACPI: IORT: " fmt
> +
> +#include 
> +#include 
> +#include 
> +
> +typedef acpi_status (*iort_find_node_callback)
> + (struct acpi_iort_node *node, void *context);
> +
> +/* Root pointer to the mapped IORT table */
> +static struct acpi_table_header *iort_table;
> +
> +static struct acpi_iort_node *
> +iort_scan_node(enum acpi_iort_node_type type,
> +iort_find_node_callback callback, void *context)
> +{
> + struct acpi_iort_node *iort_node, *iort_end;
> + struct acpi_table_iort *iort;
> + int i;
> +
> + /* Get the first IORT node */
> + iort = (struct acpi_table_iort *)iort_table;
>
Here, the same as I comments on Lorenzo's patch. If

Re: [PATCH V8 1/8] ACPI: I/O Remapping Table (IORT) initial support

2016-08-18 Thread Dennis Chen

Hi Tomasz,

On Thu, Aug 11, 2016 at 12:06:31PM +0200, Tomasz Nowicki wrote:
> IORT shows representation of IO topology for ARM based systems.
> It describes how various components are connected together on
> parent-child basis e.g. PCI RC -> SMMU -> ITS. Also see IORT spec.
> http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> 
> Initial support allows to detect IORT table presence and save its
> root pointer obtained through acpi_get_table(). The pointer validity
> depends on acpi_gbl_permanent_mmap because if acpi_gbl_permanent_mmap
> is not set while using IORT nodes we would dereference unmapped pointers.
> 
> For the aforementioned reason call iort_table_detect() from acpi_init()
> which guarantees acpi_gbl_permanent_mmap to be set at that point.
> 
> Add generic helpers which are helpful for scanning and retrieving
> information from IORT table content. List of the most important helpers:
> - iort_find_dev_node() finds IORT node for a given device
> - iort_node_map_rid() maps device RID and returns IORT node which provides
>   final translation
> 
> IORT support is placed under drivers/acpi/arm64/ new directory due to its
> ARM64 specific nature. The code there is considered only for ARM64.
> The long term plan is to keep all ARM64 specific tables support
> in this place e.g. GTDT table.
> 
> Signed-off-by: Tomasz Nowicki 
> Reviewed-by: Hanjun Guo 
> ---
>  drivers/acpi/Kconfig|   5 +
>  drivers/acpi/Makefile   |   2 +
>  drivers/acpi/arm64/Kconfig  |   6 ++
>  drivers/acpi/arm64/Makefile |   1 +
>  drivers/acpi/arm64/iort.c   | 218 
> 
>  drivers/acpi/bus.c  |   2 +
>  include/linux/iort.h|  30 ++
>  7 files changed, 264 insertions(+)
>  create mode 100644 drivers/acpi/arm64/Kconfig
>  create mode 100644 drivers/acpi/arm64/Makefile
>  create mode 100644 drivers/acpi/arm64/iort.c
>  create mode 100644 include/linux/iort.h
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 445ce28..6cef2d1 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -521,4 +521,9 @@ config ACPI_CONFIGFS
> userspace. The configurable ACPI groups will be visible under
> /config/acpi, assuming configfs is mounted under /config.
>  
> +if ARM64
> +source "drivers/acpi/arm64/Kconfig"
> +
> +endif
> +
>  endif# ACPI
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 5ae9d85..e5ada78 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -105,3 +105,5 @@ obj-$(CONFIG_ACPI_CONFIGFS)   += acpi_configfs.o
>  
>  video-objs   += acpi_video.o video_detect.o
>  obj-y+= dptf/
> +
> +obj-$(CONFIG_ARM64)  += arm64/
> diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
> new file mode 100644
> index 000..fc818dc
> --- /dev/null
> +++ b/drivers/acpi/arm64/Kconfig
> @@ -0,0 +1,6 @@
> +#
> +# ACPI Configuration for ARM64
> +#
> +
> +config IORT_TABLE
> + bool
> diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
> new file mode 100644
> index 000..d01be6f
> --- /dev/null
> +++ b/drivers/acpi/arm64/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_IORT_TABLE) += iort.o
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> new file mode 100644
> index 000..f89056e
> --- /dev/null
> +++ b/drivers/acpi/arm64/iort.c
> @@ -0,0 +1,218 @@
> +/*
> + * Copyright (C) 2016, Semihalf
> + *   Author: Tomasz Nowicki 
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * This file implements early detection/parsing of I/O mapping
> + * reported to OS through firmware via I/O Remapping Table (IORT)
> + * IORT document number: ARM DEN 0049A
> + */
> +
> +#define pr_fmt(fmt)  "ACPI: IORT: " fmt
> +
> +#include 
> +#include 
> +#include 
> +
> +typedef acpi_status (*iort_find_node_callback)
> + (struct acpi_iort_node *node, void *context);
> +
> +/* Root pointer to the mapped IORT table */
> +static struct acpi_table_header *iort_table;
> +
> +static struct acpi_iort_node *
> +iort_scan_node(enum acpi_iort_node_type type,
> +iort_find_node_callback callback, void *context)
> +{
> + struct acpi_iort_node *iort_node, *iort_end;
> + struct acpi_table_iort *iort;
> + int i;
> +
> + /* Get the first IORT node */
> + iort = (struct acpi_table_iort *)iort_table;
>
Here, the same as I comments on Lorenzo's patch. If IORT is missed in the 
firmware,
then iort_table will be NULL,

Re: [PATCH v4 09/15] drivers: acpi: iort: add support for ARM SMMU platform devices creation

2016-08-18 Thread Dennis Chen

Hi Lorenzo,

On Mon, Aug 15, 2016 at 04:23:34PM +0100, Lorenzo Pieralisi wrote:
> In ARM ACPI systems, IOMMU components are specified through static
> IORT table entries. In order to create platform devices for the
> corresponding ARM SMMU components, IORT kernel code should be made
> able to parse IORT table entries and create platform devices
> dynamically.
> 
> This patch adds the generic IORT infrastructure required to create
> platform devices for ARM SMMUs.
> 
> ARM SMMU versions have different resources requirement therefore this
> patch also introduces an IORT specific structure (ie iort_iommu_config)
> that contains hooks (to be defined when the corresponding ARM SMMU
> driver support is added to the kernel) to be used to define the
> platform devices names, init the IOMMUs, count their resources and
> finally initialize them.
> 
> Signed-off-by: Lorenzo Pieralisi 
> Cc: Hanjun Guo 
> Cc: Tomasz Nowicki 
> Cc: "Rafael J. Wysocki" 
> ---
>  drivers/acpi/arm64/iort.c | 153 
> ++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index f6db3d8..4043071 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  struct iort_its_msi_chip {
> @@ -454,6 +455,157 @@ iort_get_device_domain(struct device *dev, u32 req_id)
>   return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
>  }
>  
> +struct iort_iommu_config {
> + const char *name;
> + int (*iommu_init)(struct acpi_iort_node *node);
> + bool (*iommu_is_coherent)(struct acpi_iort_node *node);
> + int (*iommu_count_resources)(struct acpi_iort_node *node);
> + void (*iommu_init_resources)(struct resource *res,
> +  struct acpi_iort_node *node);
> +};
> +
> +static const struct iort_iommu_config * __init
> +iort_get_iommu_config(struct acpi_iort_node *node)
> +{
> + return NULL;
> +}
> +
> +/**
> + * iort_add_smmu_platform_device() - Allocate a platform device for SMMU
> + * @fwnode: IORT node associated fwnode handle
> + * @node: Pointer to SMMU ACPI IORT node
> + *
> + * Returns: 0 on success, <0 failure
> + */
> +static int __init
> +iort_add_smmu_platform_device(struct fwnode_handle *fwnode,
> +   struct acpi_iort_node *node)
> +{
> + struct platform_device *pdev;
> + struct resource *r;
> + enum dev_dma_attr attr;
> + int ret, count;
> + const struct iort_iommu_config *ops =
> + iort_get_iommu_config(node);
> +
> + if (!ops)
> + return -ENODEV;
> +
> + pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
> + if (!pdev)
> + return PTR_ERR(pdev);
> +
> + count = ops->iommu_count_resources(node);
> +
> + r = kcalloc(count, sizeof(*r), GFP_KERNEL);
> + if (!r) {
> + ret = -ENOMEM;
> + goto dev_put;
> + }
> +
> + ops->iommu_init_resources(r, node);
> +
> + ret = platform_device_add_resources(pdev, r, count);
> + /*
> +  * Resources are duplicated in platform_device_add_resources,
> +  * free their allocated memory
> +  */
> + kfree(r);
> +
> + if (ret)
> + goto dev_put;
> +
> + /*
> +  * Add a copy of IORT node pointer to platform_data to
> +  * be used to retrieve IORT data information.
> +  */
> + ret = platform_device_add_data(pdev, , sizeof(node));
> + if (ret)
> + goto dev_put;
> +
> + pdev->dev.dma_mask = kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
> + if (!pdev->dev.dma_mask) {
> + ret = -ENOMEM;
> + goto dev_put;
> + }
> +
> + pdev->dev.fwnode = fwnode;
> +
> + /*
> +  * Set default dma mask value for the table walker,
> +  * to be overridden on probing with correct value.
> +  */
> + *pdev->dev.dma_mask = DMA_BIT_MASK(32);
> + pdev->dev.coherent_dma_mask = *pdev->dev.dma_mask;
> +
> + attr = ops->iommu_is_coherent(node) ?
> +  DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
> +
> + /* Configure DMA for the page table walker */
> + acpi_dma_configure(>dev, attr);
> +
> + ret = platform_device_add(pdev);
> + if (ret)
> + goto dma_deconfigure;
> +
> + return 0;
> +
> +dma_deconfigure:
> + acpi_dma_deconfigure(>dev);
> + kfree(pdev->dev.dma_mask);
> +
> +dev_put:
> + platform_device_put(pdev);
> +
> + return ret;
> +}
> +
> +static void __init iort_smmu_init(void)
> +{
> + struct acpi_iort_node *iort_node, *iort_end;
> + struct acpi_table_iort *iort;
> + struct fwnode_handle *fwnode;
> + int i, ret;
> +
> + /*
> +  * table and iort will both point to the start of IORT table, but
> +  * have

Re: [PATCH v4 09/15] drivers: acpi: iort: add support for ARM SMMU platform devices creation

2016-08-18 Thread Dennis Chen

Hi Lorenzo,

On Mon, Aug 15, 2016 at 04:23:34PM +0100, Lorenzo Pieralisi wrote:
> In ARM ACPI systems, IOMMU components are specified through static
> IORT table entries. In order to create platform devices for the
> corresponding ARM SMMU components, IORT kernel code should be made
> able to parse IORT table entries and create platform devices
> dynamically.
> 
> This patch adds the generic IORT infrastructure required to create
> platform devices for ARM SMMUs.
> 
> ARM SMMU versions have different resources requirement therefore this
> patch also introduces an IORT specific structure (ie iort_iommu_config)
> that contains hooks (to be defined when the corresponding ARM SMMU
> driver support is added to the kernel) to be used to define the
> platform devices names, init the IOMMUs, count their resources and
> finally initialize them.
> 
> Signed-off-by: Lorenzo Pieralisi 
> Cc: Hanjun Guo 
> Cc: Tomasz Nowicki 
> Cc: "Rafael J. Wysocki" 
> ---
>  drivers/acpi/arm64/iort.c | 153 
> ++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index f6db3d8..4043071 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  struct iort_its_msi_chip {
> @@ -454,6 +455,157 @@ iort_get_device_domain(struct device *dev, u32 req_id)
>   return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
>  }
>  
> +struct iort_iommu_config {
> + const char *name;
> + int (*iommu_init)(struct acpi_iort_node *node);
> + bool (*iommu_is_coherent)(struct acpi_iort_node *node);
> + int (*iommu_count_resources)(struct acpi_iort_node *node);
> + void (*iommu_init_resources)(struct resource *res,
> +  struct acpi_iort_node *node);
> +};
> +
> +static const struct iort_iommu_config * __init
> +iort_get_iommu_config(struct acpi_iort_node *node)
> +{
> + return NULL;
> +}
> +
> +/**
> + * iort_add_smmu_platform_device() - Allocate a platform device for SMMU
> + * @fwnode: IORT node associated fwnode handle
> + * @node: Pointer to SMMU ACPI IORT node
> + *
> + * Returns: 0 on success, <0 failure
> + */
> +static int __init
> +iort_add_smmu_platform_device(struct fwnode_handle *fwnode,
> +   struct acpi_iort_node *node)
> +{
> + struct platform_device *pdev;
> + struct resource *r;
> + enum dev_dma_attr attr;
> + int ret, count;
> + const struct iort_iommu_config *ops =
> + iort_get_iommu_config(node);
> +
> + if (!ops)
> + return -ENODEV;
> +
> + pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO);
> + if (!pdev)
> + return PTR_ERR(pdev);
> +
> + count = ops->iommu_count_resources(node);
> +
> + r = kcalloc(count, sizeof(*r), GFP_KERNEL);
> + if (!r) {
> + ret = -ENOMEM;
> + goto dev_put;
> + }
> +
> + ops->iommu_init_resources(r, node);
> +
> + ret = platform_device_add_resources(pdev, r, count);
> + /*
> +  * Resources are duplicated in platform_device_add_resources,
> +  * free their allocated memory
> +  */
> + kfree(r);
> +
> + if (ret)
> + goto dev_put;
> +
> + /*
> +  * Add a copy of IORT node pointer to platform_data to
> +  * be used to retrieve IORT data information.
> +  */
> + ret = platform_device_add_data(pdev, , sizeof(node));
> + if (ret)
> + goto dev_put;
> +
> + pdev->dev.dma_mask = kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL);
> + if (!pdev->dev.dma_mask) {
> + ret = -ENOMEM;
> + goto dev_put;
> + }
> +
> + pdev->dev.fwnode = fwnode;
> +
> + /*
> +  * Set default dma mask value for the table walker,
> +  * to be overridden on probing with correct value.
> +  */
> + *pdev->dev.dma_mask = DMA_BIT_MASK(32);
> + pdev->dev.coherent_dma_mask = *pdev->dev.dma_mask;
> +
> + attr = ops->iommu_is_coherent(node) ?
> +  DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT;
> +
> + /* Configure DMA for the page table walker */
> + acpi_dma_configure(>dev, attr);
> +
> + ret = platform_device_add(pdev);
> + if (ret)
> + goto dma_deconfigure;
> +
> + return 0;
> +
> +dma_deconfigure:
> + acpi_dma_deconfigure(>dev);
> + kfree(pdev->dev.dma_mask);
> +
> +dev_put:
> + platform_device_put(pdev);
> +
> + return ret;
> +}
> +
> +static void __init iort_smmu_init(void)
> +{
> + struct acpi_iort_node *iort_node, *iort_end;
> + struct acpi_table_iort *iort;
> + struct fwnode_handle *fwnode;
> + int i, ret;
> +
> + /*
> +  * table and iort will both point to the start of IORT table, but
> +  * have different struct types
> +  */
> + iort = (struct acpi_table_iort *)iort_table;
>

Re: [RFC PATCH v3 00/13] ACPI IORT ARM SMMU v3 support

2016-07-25 Thread Dennis Chen

On Mon, Jul 25, 2016 at 09:36:41AM +0100, Lorenzo Pieralisi wrote:
> On Mon, Jul 25, 2016 at 01:53:32PM +0800, Dennis Chen wrote:
> > Hi
> > On Wed, Jul 20, 2016 at 12:23:22PM +0100, Lorenzo Pieralisi wrote:
> > > This RFC patch series is v3 of a previous posting:
> > > 
> > > https://lkml.org/lkml/2016/6/7/523
> > > 
> > > v2 -> v3
> > >   - Rebased on top of dependencies series [1][2][3](v4.7-rc3)
> > >   - Added back reliance on ACPI early probing infrastructure
> > >   - Patch[1-3] merged through other dependent series
> > >   - Added back IOMMU fwnode generalization
> > >   - Move SMMU v3 static functions configuration to IORT code
> > >   - Implemented generic IOMMU fwspec API
> > >   - Added code to implement fwnode platform device look-up
> > > 
> > > v1 -> v2:
> > >   - Rebased on top of dependencies series [1][2][3](v4.7-rc1)
> > >   - Removed IOMMU fwnode generalization
> > >   - Implemented ARM SMMU v3 ACPI probing instead of ARM SMMU v2
> > > owing to patch series dependencies [1]
> > >   - Moved platform device creation logic to IORT code to
> > > generalize its usage for ARM SMMU v1-v2-v3 components
> > >   - Removed reliance on ACPI early device probing
> > >   - Created IORT specific iommu_xlate() translation hook leaving
> > > OF code unchanged according to v1 reviews
> > > 
> > > The ACPI IORT table provides information that allows instantiating
> > > ARM SMMU devices and carrying out id mappings between components on
> > > ARM based systems (devices, IOMMUs, interrupt controllers).
> > > 
> > > http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> > > 
> > > Building on basic IORT support, available through [2]:
> > > 
> > > this patchset enables ARM SMMU v3 support on ACPI systems.
> > > 
> > > Most of the code is aimed at building the required generic ACPI
> > > infrastructure to create and enable IOMMU components and to bring
> > > the IOMMU infrastructure for ACPI on par with DT, which is going to
> > > make future ARM SMMU components easier to integrate.
> > > 
> > > PATCH (1) adds a FWNODE_IOMMU type to the struct fwnode_handle type.
> > >   It is required to attach a fwnode identifier to platform
> > >   devices allocated/detected through IORT tables entries;
> > >   IOMMU devices have to have an identifier to look them up
> > >   eg IOMMU core layer carrying out id translation. This can be
> > >   done through a fwnode_handle (ie IOMMU platform devices created
> > >   out of IORT tables are not ACPI devices hence they can't be
> > >   allocated as such, otherwise they would have a fwnode_handle of
> > >   type FWNODE_ACPI). This patch requires discussion and it is key
> > >   to the RFC.
> > > 
> > > PATCH (2) makes use of the ACPI early probing API to add a linker script
> > >   section for probing devices via IORT ACPI kernel code.
> > > 
> > > PATCH (3) provides IORT support for registering IOMMU IORT node through
> > >   their fwnode handle.
> > > 
> > > PATCH (4) implements core code fwnode based platform devices look-up.
> > > 
> > > PATCH (5) extends iommu_fwspec so that it can be used on ACPI based
> > >   system by creating a generic IOMMU fwspec kernel layer.
> > > 
> > > PATCH (6) implements the of_dma_configure() API in ACPI world -
> > >   acpi_dma_configure() - and patches PCI and ACPI core code to
> > >   start making use of it.
> > > 
> > > PATCH (7) provides an IORT function to detect existence of specific type
> > >   of IORT components.
> > > 
> > > PATCH (8) creates the kernel infrastructure required to create ARM SMMU
> > >   platform devices for IORT nodes.
> > > 
> > > PATCH (9) refactors the ARM SMMU v3 driver so that the init functions are
> > >   split in a way that groups together code that probes through DT
> > >   and code that carries out HW registers FW agnostic probing, in
> > >   preparation for adding the ACPI probing path.
> > > 
> > > PATCH (10) rework ARM SMMU v3 platform driver registration to make it work
> > >on ACPI systems.
> > > 
> > > PATCH (11) Building on patch (8), it adds ARM SMMU v3 IORT I

Re: [RFC PATCH v3 00/13] ACPI IORT ARM SMMU v3 support

2016-07-25 Thread Dennis Chen

On Mon, Jul 25, 2016 at 09:36:41AM +0100, Lorenzo Pieralisi wrote:
> On Mon, Jul 25, 2016 at 01:53:32PM +0800, Dennis Chen wrote:
> > Hi
> > On Wed, Jul 20, 2016 at 12:23:22PM +0100, Lorenzo Pieralisi wrote:
> > > This RFC patch series is v3 of a previous posting:
> > > 
> > > https://lkml.org/lkml/2016/6/7/523
> > > 
> > > v2 -> v3
> > >   - Rebased on top of dependencies series [1][2][3](v4.7-rc3)
> > >   - Added back reliance on ACPI early probing infrastructure
> > >   - Patch[1-3] merged through other dependent series
> > >   - Added back IOMMU fwnode generalization
> > >   - Move SMMU v3 static functions configuration to IORT code
> > >   - Implemented generic IOMMU fwspec API
> > >   - Added code to implement fwnode platform device look-up
> > > 
> > > v1 -> v2:
> > >   - Rebased on top of dependencies series [1][2][3](v4.7-rc1)
> > >   - Removed IOMMU fwnode generalization
> > >   - Implemented ARM SMMU v3 ACPI probing instead of ARM SMMU v2
> > > owing to patch series dependencies [1]
> > >   - Moved platform device creation logic to IORT code to
> > > generalize its usage for ARM SMMU v1-v2-v3 components
> > >   - Removed reliance on ACPI early device probing
> > >   - Created IORT specific iommu_xlate() translation hook leaving
> > > OF code unchanged according to v1 reviews
> > > 
> > > The ACPI IORT table provides information that allows instantiating
> > > ARM SMMU devices and carrying out id mappings between components on
> > > ARM based systems (devices, IOMMUs, interrupt controllers).
> > > 
> > > http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> > > 
> > > Building on basic IORT support, available through [2]:
> > > 
> > > this patchset enables ARM SMMU v3 support on ACPI systems.
> > > 
> > > Most of the code is aimed at building the required generic ACPI
> > > infrastructure to create and enable IOMMU components and to bring
> > > the IOMMU infrastructure for ACPI on par with DT, which is going to
> > > make future ARM SMMU components easier to integrate.
> > > 
> > > PATCH (1) adds a FWNODE_IOMMU type to the struct fwnode_handle type.
> > >   It is required to attach a fwnode identifier to platform
> > >   devices allocated/detected through IORT tables entries;
> > >   IOMMU devices have to have an identifier to look them up
> > >   eg IOMMU core layer carrying out id translation. This can be
> > >   done through a fwnode_handle (ie IOMMU platform devices created
> > >   out of IORT tables are not ACPI devices hence they can't be
> > >   allocated as such, otherwise they would have a fwnode_handle of
> > >   type FWNODE_ACPI). This patch requires discussion and it is key
> > >   to the RFC.
> > > 
> > > PATCH (2) makes use of the ACPI early probing API to add a linker script
> > >   section for probing devices via IORT ACPI kernel code.
> > > 
> > > PATCH (3) provides IORT support for registering IOMMU IORT node through
> > >   their fwnode handle.
> > > 
> > > PATCH (4) implements core code fwnode based platform devices look-up.
> > > 
> > > PATCH (5) extends iommu_fwspec so that it can be used on ACPI based
> > >   system by creating a generic IOMMU fwspec kernel layer.
> > > 
> > > PATCH (6) implements the of_dma_configure() API in ACPI world -
> > >   acpi_dma_configure() - and patches PCI and ACPI core code to
> > >   start making use of it.
> > > 
> > > PATCH (7) provides an IORT function to detect existence of specific type
> > >   of IORT components.
> > > 
> > > PATCH (8) creates the kernel infrastructure required to create ARM SMMU
> > >   platform devices for IORT nodes.
> > > 
> > > PATCH (9) refactors the ARM SMMU v3 driver so that the init functions are
> > >   split in a way that groups together code that probes through DT
> > >   and code that carries out HW registers FW agnostic probing, in
> > >   preparation for adding the ACPI probing path.
> > > 
> > > PATCH (10) rework ARM SMMU v3 platform driver registration to make it work
> > >on ACPI systems.
> > > 
> > > PATCH (11) Building on patch (8), it adds ARM SMMU v3 IORT I

Re: [RFC PATCH v3 00/13] ACPI IORT ARM SMMU v3 support

2016-07-24 Thread Dennis Chen

Hi
On Wed, Jul 20, 2016 at 12:23:22PM +0100, Lorenzo Pieralisi wrote:
> This RFC patch series is v3 of a previous posting:
> 
> https://lkml.org/lkml/2016/6/7/523
> 
> v2 -> v3
>   - Rebased on top of dependencies series [1][2][3](v4.7-rc3)
>   - Added back reliance on ACPI early probing infrastructure
>   - Patch[1-3] merged through other dependent series
>   - Added back IOMMU fwnode generalization
>   - Move SMMU v3 static functions configuration to IORT code
>   - Implemented generic IOMMU fwspec API
>   - Added code to implement fwnode platform device look-up
> 
> v1 -> v2:
>   - Rebased on top of dependencies series [1][2][3](v4.7-rc1)
>   - Removed IOMMU fwnode generalization
>   - Implemented ARM SMMU v3 ACPI probing instead of ARM SMMU v2
> owing to patch series dependencies [1]
>   - Moved platform device creation logic to IORT code to
> generalize its usage for ARM SMMU v1-v2-v3 components
>   - Removed reliance on ACPI early device probing
>   - Created IORT specific iommu_xlate() translation hook leaving
> OF code unchanged according to v1 reviews
> 
> The ACPI IORT table provides information that allows instantiating
> ARM SMMU devices and carrying out id mappings between components on
> ARM based systems (devices, IOMMUs, interrupt controllers).
> 
> http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> 
> Building on basic IORT support, available through [2]:
> 
> this patchset enables ARM SMMU v3 support on ACPI systems.
> 
> Most of the code is aimed at building the required generic ACPI
> infrastructure to create and enable IOMMU components and to bring
> the IOMMU infrastructure for ACPI on par with DT, which is going to
> make future ARM SMMU components easier to integrate.
> 
> PATCH (1) adds a FWNODE_IOMMU type to the struct fwnode_handle type.
>   It is required to attach a fwnode identifier to platform
>   devices allocated/detected through IORT tables entries;
>   IOMMU devices have to have an identifier to look them up
>   eg IOMMU core layer carrying out id translation. This can be
>   done through a fwnode_handle (ie IOMMU platform devices created
>   out of IORT tables are not ACPI devices hence they can't be
>   allocated as such, otherwise they would have a fwnode_handle of
>   type FWNODE_ACPI). This patch requires discussion and it is key
>   to the RFC.
> 
> PATCH (2) makes use of the ACPI early probing API to add a linker script
>   section for probing devices via IORT ACPI kernel code.
> 
> PATCH (3) provides IORT support for registering IOMMU IORT node through
>   their fwnode handle.
> 
> PATCH (4) implements core code fwnode based platform devices look-up.
> 
> PATCH (5) extends iommu_fwspec so that it can be used on ACPI based
>   system by creating a generic IOMMU fwspec kernel layer.
> 
> PATCH (6) implements the of_dma_configure() API in ACPI world -
>   acpi_dma_configure() - and patches PCI and ACPI core code to
>   start making use of it.
> 
> PATCH (7) provides an IORT function to detect existence of specific type
>   of IORT components.
> 
> PATCH (8) creates the kernel infrastructure required to create ARM SMMU
>   platform devices for IORT nodes.
> 
> PATCH (9) refactors the ARM SMMU v3 driver so that the init functions are
>   split in a way that groups together code that probes through DT
>   and code that carries out HW registers FW agnostic probing, in
>   preparation for adding the ACPI probing path.
> 
> PATCH (10) rework ARM SMMU v3 platform driver registration to make it work
>on ACPI systems.
> 
> PATCH (11) Building on patch (8), it adds ARM SMMU v3 IORT IOMMU
>operations to create and probe ARM SMMU v3 components.
> 
> PATCH (12) Extend the IORT iort_node_map_rid() to work on a type mask
>instead of a single type so that the translation API can
>be used on a range of components.
> 
> PATCH (13) provides IORT infrastructure to carry out IOMMU configuration
>for devices and hook it up to the previously introduced ACPI
>DMA configure API.
> 
> This patchset is built on top and depends on these three patch series:
> 
> [1] R.Murphy "Generic DT bindings for PCI and ARM SMMU v3" v4
> https://marc.info/?l=devicetree=146739193215518=2
> 
> [2] T.Nowicki "Introduce ACPI world to ITS irqchip" v7
> https://marc.info/?l=linux-arm-kernel=146642080022289=2
> 
> [3] T.Nowicki "Support for ARM64 ACPI based PCI host controller" v8
> http://marc.info/?l=linux-acpi=146462129816292=2
> 
> and is provided for early review/testing purposes here:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/linux.git 
> acpi/iort-smmu-v3i
>
I thought I can got all the 13 patches applied with the above git tree, but I 
can't

Re: [RFC PATCH v3 00/13] ACPI IORT ARM SMMU v3 support

2016-07-24 Thread Dennis Chen

Hi
On Wed, Jul 20, 2016 at 12:23:22PM +0100, Lorenzo Pieralisi wrote:
> This RFC patch series is v3 of a previous posting:
> 
> https://lkml.org/lkml/2016/6/7/523
> 
> v2 -> v3
>   - Rebased on top of dependencies series [1][2][3](v4.7-rc3)
>   - Added back reliance on ACPI early probing infrastructure
>   - Patch[1-3] merged through other dependent series
>   - Added back IOMMU fwnode generalization
>   - Move SMMU v3 static functions configuration to IORT code
>   - Implemented generic IOMMU fwspec API
>   - Added code to implement fwnode platform device look-up
> 
> v1 -> v2:
>   - Rebased on top of dependencies series [1][2][3](v4.7-rc1)
>   - Removed IOMMU fwnode generalization
>   - Implemented ARM SMMU v3 ACPI probing instead of ARM SMMU v2
> owing to patch series dependencies [1]
>   - Moved platform device creation logic to IORT code to
> generalize its usage for ARM SMMU v1-v2-v3 components
>   - Removed reliance on ACPI early device probing
>   - Created IORT specific iommu_xlate() translation hook leaving
> OF code unchanged according to v1 reviews
> 
> The ACPI IORT table provides information that allows instantiating
> ARM SMMU devices and carrying out id mappings between components on
> ARM based systems (devices, IOMMUs, interrupt controllers).
> 
> http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf
> 
> Building on basic IORT support, available through [2]:
> 
> this patchset enables ARM SMMU v3 support on ACPI systems.
> 
> Most of the code is aimed at building the required generic ACPI
> infrastructure to create and enable IOMMU components and to bring
> the IOMMU infrastructure for ACPI on par with DT, which is going to
> make future ARM SMMU components easier to integrate.
> 
> PATCH (1) adds a FWNODE_IOMMU type to the struct fwnode_handle type.
>   It is required to attach a fwnode identifier to platform
>   devices allocated/detected through IORT tables entries;
>   IOMMU devices have to have an identifier to look them up
>   eg IOMMU core layer carrying out id translation. This can be
>   done through a fwnode_handle (ie IOMMU platform devices created
>   out of IORT tables are not ACPI devices hence they can't be
>   allocated as such, otherwise they would have a fwnode_handle of
>   type FWNODE_ACPI). This patch requires discussion and it is key
>   to the RFC.
> 
> PATCH (2) makes use of the ACPI early probing API to add a linker script
>   section for probing devices via IORT ACPI kernel code.
> 
> PATCH (3) provides IORT support for registering IOMMU IORT node through
>   their fwnode handle.
> 
> PATCH (4) implements core code fwnode based platform devices look-up.
> 
> PATCH (5) extends iommu_fwspec so that it can be used on ACPI based
>   system by creating a generic IOMMU fwspec kernel layer.
> 
> PATCH (6) implements the of_dma_configure() API in ACPI world -
>   acpi_dma_configure() - and patches PCI and ACPI core code to
>   start making use of it.
> 
> PATCH (7) provides an IORT function to detect existence of specific type
>   of IORT components.
> 
> PATCH (8) creates the kernel infrastructure required to create ARM SMMU
>   platform devices for IORT nodes.
> 
> PATCH (9) refactors the ARM SMMU v3 driver so that the init functions are
>   split in a way that groups together code that probes through DT
>   and code that carries out HW registers FW agnostic probing, in
>   preparation for adding the ACPI probing path.
> 
> PATCH (10) rework ARM SMMU v3 platform driver registration to make it work
>on ACPI systems.
> 
> PATCH (11) Building on patch (8), it adds ARM SMMU v3 IORT IOMMU
>operations to create and probe ARM SMMU v3 components.
> 
> PATCH (12) Extend the IORT iort_node_map_rid() to work on a type mask
>instead of a single type so that the translation API can
>be used on a range of components.
> 
> PATCH (13) provides IORT infrastructure to carry out IOMMU configuration
>for devices and hook it up to the previously introduced ACPI
>DMA configure API.
> 
> This patchset is built on top and depends on these three patch series:
> 
> [1] R.Murphy "Generic DT bindings for PCI and ARM SMMU v3" v4
> https://marc.info/?l=devicetree=146739193215518=2
> 
> [2] T.Nowicki "Introduce ACPI world to ITS irqchip" v7
> https://marc.info/?l=linux-arm-kernel=146642080022289=2
> 
> [3] T.Nowicki "Support for ARM64 ACPI based PCI host controller" v8
> http://marc.info/?l=linux-acpi=146462129816292=2
> 
> and is provided for early review/testing purposes here:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/linux.git 
> acpi/iort-smmu-v3i
>
I thought I can got all the 13 patches applied with the above git tree, but I 
can't

Re: [PATCH v11 0/8] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 1/3: iommu changes

2016-07-20 Thread Dennis Chen

On Wed, Jul 20, 2016 at 01:03:00PM +0200, Auger Eric wrote:
> Hi Dennis
> On 20/07/2016 11:56, Dennis Chen wrote:
> > Hi Eric,
> > 
> > On Tue, Jul 19, 2016 at 12:55:03PM +, Eric Auger wrote:
> >> This series introduces the msi-iommu api used to:
> >>
> >> - allocate/free resources for MSI IOMMU mapping
> >> - set the MSI iova window aperture
> >> - map/unmap physical addresses onto MSI IOVAs.
> >> - determine whether an msi needs to be iommu mapped
> >> - overwrite an msi_msg PA address with its pre-allocated/mapped IOVA
> >>
> >> Also a new iommu domain attribute, DOMAIN_ATTR_MSI_GEOMETRY is introduced
> >> to report the MSI iova window geometry (aperture and iommu-msi API 
> >> support).
> >>
> >> Currently:
> >> - iommu driver is supposed to allocate/free MSI mapping resources
> >> - VFIO subsystem is supposed to set the MSI IOVA aperture.
> >> - The MSI layer is supposed to allocate/free iova mappings and overwrite
> >>   msi_msg with IOVA at composition time
> >>
> >> More details & context can be found at:
> >> http://www.linaro.org/blog/core-dump/kvm-pciemsi-passthrough-armarm64/
> >>
> >> Best Regards
> >>
> >> Eric
> >>
> >> Git: complete series available at
> >> https://github.com/eauger/linux/tree/v4.7-rc7-passthrough-v11
> >>
> > Why can't I find this new series on your git tree:
> > https://git.linaro.org/people/eric.auger/linux.git
> you are not looking at the right git repo: see github one above.
> > ?
> > Also, do I need to download all the 3-part patches to test the PCIe NIC 
> > passthru
> > as I did on your v9 series?
> Yes you need to take the 3 parts. You should have everything that is
> needed on the above branch. In case you do not work on Cavium, you
> should not cherry-pick
> "vfio: pci: HACK! workaround thunderx pci_try_reset_bus crash"
>
Thanks Eric, I've got all the 3 parts from the github git tree in my local 
repos.
Currently I think the major platform of mine is AMD overdrive. So I will test 
this patch set
on that platform first.

Thanks,
Dennis
> 
> Thanks
> 
> Eric
> > 
> > Thanks,
> > Dennis 
> >>
> >> see part III for wrap-up details.
> >>
> >> History:
> >> v10 -> v11:
> >> - no change in the series, just incremented for consistency
> >> - added a temporary patch in the branch:
> >>   "iommu/iova: FIXUP! validate iova_domain input to put_iova_domain"
> >>   originally sent by Nate and adapted for this use case. This is currently
> >>   under discussion on the ML. The crash typically occurs in case unsafe
> >>   interrupts are discovered while allow_unsafe_interrupts is not set.
> >>
> >> v9 -> v10:
> >> - split error management in iommu_msi_set_aperture
> >>
> >> v8 -> v9:
> >> - rename iommu_domain_msi_geometry programmable flag into 
> >> iommu_msi_supported
> >> - introduce msi_apperture_valid helper and use this instead of 
> >> is_aperture_set
> >>
> >> v7 -> v8:
> >> - The API is retargetted for MSI: renamed msi-iommu
> >>   all "dma-reserved" namings removed
> >> - now implemented upon dma-iommu (get, put, init), ie. reuse iova_cookie,
> >>   and iova API
> >> - msi mapping resources now are guaranteed to exist during the whole iommu
> >>   domain's lifetime. No need to lock to garantee the cookie integrity
> >> - removed alloc/free_reserved_reserved_iova_domain. We now have a single
> >>   function that sets the aperture, looking like iommu_dma_init_domain.
> >> - we now use a list instead of an RB-tree
> >> - prot is not propagated anymore at domain creation due to the retargetting
> >>   for MSI
> >> - iommu_domain pointer removed from doorbell_mapping struct
> >> - replaced DOMAIN_ATTR_MSI_MAPPING by DOMAIN_ATTR_MSI_GEOMETRY
> >>
> >> v6 -> v7:
> >> - fixed known lock bugs and multiple page sized slots matching
> >>   (I only have a single MSI frame made of a single page)
> >> - reserved_iova_cookie now pointing to a struct that encapsulates the
> >>   iova domain handle + protection attribute passed from VFIO (Alex' req)
> >> - 2 new functions exposed: iommu_msi_mapping_translate_msg,
> >>   iommu_msi_mapping_desc_to_domain: not sure this is the right 
> >> location/proto
> >>   though
> >> - iommu_put_reserved_iova now takes a phys_addr_t
>

Re: [PATCH v11 0/8] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 1/3: iommu changes

2016-07-20 Thread Dennis Chen

On Wed, Jul 20, 2016 at 01:03:00PM +0200, Auger Eric wrote:
> Hi Dennis
> On 20/07/2016 11:56, Dennis Chen wrote:
> > Hi Eric,
> > 
> > On Tue, Jul 19, 2016 at 12:55:03PM +, Eric Auger wrote:
> >> This series introduces the msi-iommu api used to:
> >>
> >> - allocate/free resources for MSI IOMMU mapping
> >> - set the MSI iova window aperture
> >> - map/unmap physical addresses onto MSI IOVAs.
> >> - determine whether an msi needs to be iommu mapped
> >> - overwrite an msi_msg PA address with its pre-allocated/mapped IOVA
> >>
> >> Also a new iommu domain attribute, DOMAIN_ATTR_MSI_GEOMETRY is introduced
> >> to report the MSI iova window geometry (aperture and iommu-msi API 
> >> support).
> >>
> >> Currently:
> >> - iommu driver is supposed to allocate/free MSI mapping resources
> >> - VFIO subsystem is supposed to set the MSI IOVA aperture.
> >> - The MSI layer is supposed to allocate/free iova mappings and overwrite
> >>   msi_msg with IOVA at composition time
> >>
> >> More details & context can be found at:
> >> http://www.linaro.org/blog/core-dump/kvm-pciemsi-passthrough-armarm64/
> >>
> >> Best Regards
> >>
> >> Eric
> >>
> >> Git: complete series available at
> >> https://github.com/eauger/linux/tree/v4.7-rc7-passthrough-v11
> >>
> > Why can't I find this new series on your git tree:
> > https://git.linaro.org/people/eric.auger/linux.git
> you are not looking at the right git repo: see github one above.
> > ?
> > Also, do I need to download all the 3-part patches to test the PCIe NIC 
> > passthru
> > as I did on your v9 series?
> Yes you need to take the 3 parts. You should have everything that is
> needed on the above branch. In case you do not work on Cavium, you
> should not cherry-pick
> "vfio: pci: HACK! workaround thunderx pci_try_reset_bus crash"
>
Thanks Eric, I've got all the 3 parts from the github git tree in my local 
repos.
Currently I think the major platform of mine is AMD overdrive. So I will test 
this patch set
on that platform first.

Thanks,
Dennis
> 
> Thanks
> 
> Eric
> > 
> > Thanks,
> > Dennis 
> >>
> >> see part III for wrap-up details.
> >>
> >> History:
> >> v10 -> v11:
> >> - no change in the series, just incremented for consistency
> >> - added a temporary patch in the branch:
> >>   "iommu/iova: FIXUP! validate iova_domain input to put_iova_domain"
> >>   originally sent by Nate and adapted for this use case. This is currently
> >>   under discussion on the ML. The crash typically occurs in case unsafe
> >>   interrupts are discovered while allow_unsafe_interrupts is not set.
> >>
> >> v9 -> v10:
> >> - split error management in iommu_msi_set_aperture
> >>
> >> v8 -> v9:
> >> - rename iommu_domain_msi_geometry programmable flag into 
> >> iommu_msi_supported
> >> - introduce msi_apperture_valid helper and use this instead of 
> >> is_aperture_set
> >>
> >> v7 -> v8:
> >> - The API is retargetted for MSI: renamed msi-iommu
> >>   all "dma-reserved" namings removed
> >> - now implemented upon dma-iommu (get, put, init), ie. reuse iova_cookie,
> >>   and iova API
> >> - msi mapping resources now are guaranteed to exist during the whole iommu
> >>   domain's lifetime. No need to lock to garantee the cookie integrity
> >> - removed alloc/free_reserved_reserved_iova_domain. We now have a single
> >>   function that sets the aperture, looking like iommu_dma_init_domain.
> >> - we now use a list instead of an RB-tree
> >> - prot is not propagated anymore at domain creation due to the retargetting
> >>   for MSI
> >> - iommu_domain pointer removed from doorbell_mapping struct
> >> - replaced DOMAIN_ATTR_MSI_MAPPING by DOMAIN_ATTR_MSI_GEOMETRY
> >>
> >> v6 -> v7:
> >> - fixed known lock bugs and multiple page sized slots matching
> >>   (I only have a single MSI frame made of a single page)
> >> - reserved_iova_cookie now pointing to a struct that encapsulates the
> >>   iova domain handle + protection attribute passed from VFIO (Alex' req)
> >> - 2 new functions exposed: iommu_msi_mapping_translate_msg,
> >>   iommu_msi_mapping_desc_to_domain: not sure this is the right 
> >> location/proto
> >>   though
> >> - iommu_put_reserved_iova now takes a phys_addr_t
>

Re: [PATCH v11 4/8] iommu/msi-iommu: initialization

2016-07-20 Thread Dennis Chen

Hi Eric,
Some small questions/comments below:

On Tue, Jul 19, 2016 at 12:55:07PM +, Eric Auger wrote:
> iommu_get/put_msi_cookie allocates/frees the resource used to store
> and ref count the MSI doorbell mappings. iommu_msi_set_aperture
> initializes the iova domain used for MSI IOVA allocation and sets the
> iommu domain's msi geometry.
> 
> The implementation relies on dma-iommu API and iova API.
> 
> New msi functions are fully implemented if CONFIG_IOMMU_MSI is set.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> v10:
> - split error management in iommu_msi_set_aperture
> 
> v9:
> - remove is_aperture_set and use iommu_domain_msi_aperture_valid helper
>   instead
> - set iommu domain's msi geometry
> 
> v8:
> - new design where msi-iommu relies on dma-iommu
> - remove the iommu_domain * from the doorbell_mapping struct
> - added is_aperture_set
> 
> v7:
> - fix locking
> - add iova_cache_get/put
> - static inline functions when CONFIG_IOMMU_DMA_RESERVED is not set
> - introduce struct reserved_iova_domain to encapsulate prot info &
>   add prot parameter in alloc_reserved_iova_domain
> 
> v5 -> v6:
> - use spin lock instead of mutex
> 
> v3 -> v4:
> - formerly in "iommu/arm-smmu: implement alloc/free_reserved_iova_domain" &
>   "iommu: add alloc/free_reserved_iova_domain"
> 
> v2 -> v3:
> - remove iommu_alloc_reserved_iova_domain & iommu_free_reserved_iova_domain
>   static implementation in case CONFIG_IOMMU_API is not set
> 
> v1 -> v2:
> - moved from vfio API to IOMMU API
> ---
>  drivers/iommu/Kconfig |   7 
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/msi-iommu.c | 100 
> ++
>  include/linux/msi-iommu.h |  65 ++
>  4 files changed, 173 insertions(+)
>  create mode 100644 drivers/iommu/msi-iommu.c
>  create mode 100644 include/linux/msi-iommu.h
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index ad08603..5ea1610 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -74,6 +74,11 @@ config IOMMU_DMA
>   select IOMMU_IOVA
>   select NEED_SG_DMA_LENGTH
>  
> +# IOMMU MSI mapping
> +config IOMMU_MSI
> + bool
> + select IOMMU_DMA
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PPC_E500MC || (COMPILE_TEST && PPC)
> @@ -296,6 +301,7 @@ config SPAPR_TCE_IOMMU
>  config ARM_SMMU
>   bool "ARM Ltd. System MMU (SMMU) Support"
>   depends on (ARM64 || ARM) && MMU
> + select IOMMU_MSI
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_DMA_USE_IOMMU if ARM
> @@ -309,6 +315,7 @@ config ARM_SMMU
>  config ARM_SMMU_V3
>   bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
>   depends on ARM64 && PCI
> + select IOMMU_MSI
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select GENERIC_MSI_IRQ_DOMAIN
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index c6edb31..a381e66 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -2,6 +2,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o
>  obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
> +obj-$(CONFIG_IOMMU_MSI) += msi-iommu.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/msi-iommu.c b/drivers/iommu/msi-iommu.c
> new file mode 100644
> index 000..de02ede
> --- /dev/null
> +++ b/drivers/iommu/msi-iommu.c
> @@ -0,0 +1,100 @@
> +/*
> + * Reserved IOVA Management
> + *
> + * Copyright (c) 2015 Linaro Ltd.
> + *  www.linaro.org
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct doorbell_mapping {
> + struct kref kref;
> + struct list_headnext;
> + phys_addr_t addr;
> + dma_addr_t  iova;
> + size_t  size;
> +};
> +
> +struct doorbell_mapping_info {
> + struct list_head list; /* list of doorbell mapping entries */
> + spinlock_t lock;
> +};
> +
> +int iommu_get_msi_cookie(struct iommu_domain *domain)
> +{
> + struct doorbell_mapping_info *dmi;
> + int ret;
> +
> + if (domain->msi_cookie || domain->iova_cookie)
> + return -EINVAL;
> +
> + ret = iommu_get_dma_cookie(domain);
> + if (ret)
> + return ret;
>

Re: [PATCH v11 4/8] iommu/msi-iommu: initialization

2016-07-20 Thread Dennis Chen

Hi Eric,
Some small questions/comments below:

On Tue, Jul 19, 2016 at 12:55:07PM +, Eric Auger wrote:
> iommu_get/put_msi_cookie allocates/frees the resource used to store
> and ref count the MSI doorbell mappings. iommu_msi_set_aperture
> initializes the iova domain used for MSI IOVA allocation and sets the
> iommu domain's msi geometry.
> 
> The implementation relies on dma-iommu API and iova API.
> 
> New msi functions are fully implemented if CONFIG_IOMMU_MSI is set.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> v10:
> - split error management in iommu_msi_set_aperture
> 
> v9:
> - remove is_aperture_set and use iommu_domain_msi_aperture_valid helper
>   instead
> - set iommu domain's msi geometry
> 
> v8:
> - new design where msi-iommu relies on dma-iommu
> - remove the iommu_domain * from the doorbell_mapping struct
> - added is_aperture_set
> 
> v7:
> - fix locking
> - add iova_cache_get/put
> - static inline functions when CONFIG_IOMMU_DMA_RESERVED is not set
> - introduce struct reserved_iova_domain to encapsulate prot info &
>   add prot parameter in alloc_reserved_iova_domain
> 
> v5 -> v6:
> - use spin lock instead of mutex
> 
> v3 -> v4:
> - formerly in "iommu/arm-smmu: implement alloc/free_reserved_iova_domain" &
>   "iommu: add alloc/free_reserved_iova_domain"
> 
> v2 -> v3:
> - remove iommu_alloc_reserved_iova_domain & iommu_free_reserved_iova_domain
>   static implementation in case CONFIG_IOMMU_API is not set
> 
> v1 -> v2:
> - moved from vfio API to IOMMU API
> ---
>  drivers/iommu/Kconfig |   7 
>  drivers/iommu/Makefile|   1 +
>  drivers/iommu/msi-iommu.c | 100 
> ++
>  include/linux/msi-iommu.h |  65 ++
>  4 files changed, 173 insertions(+)
>  create mode 100644 drivers/iommu/msi-iommu.c
>  create mode 100644 include/linux/msi-iommu.h
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index ad08603..5ea1610 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -74,6 +74,11 @@ config IOMMU_DMA
>   select IOMMU_IOVA
>   select NEED_SG_DMA_LENGTH
>  
> +# IOMMU MSI mapping
> +config IOMMU_MSI
> + bool
> + select IOMMU_DMA
> +
>  config FSL_PAMU
>   bool "Freescale IOMMU support"
>   depends on PPC_E500MC || (COMPILE_TEST && PPC)
> @@ -296,6 +301,7 @@ config SPAPR_TCE_IOMMU
>  config ARM_SMMU
>   bool "ARM Ltd. System MMU (SMMU) Support"
>   depends on (ARM64 || ARM) && MMU
> + select IOMMU_MSI
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select ARM_DMA_USE_IOMMU if ARM
> @@ -309,6 +315,7 @@ config ARM_SMMU
>  config ARM_SMMU_V3
>   bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
>   depends on ARM64 && PCI
> + select IOMMU_MSI
>   select IOMMU_API
>   select IOMMU_IO_PGTABLE_LPAE
>   select GENERIC_MSI_IRQ_DOMAIN
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index c6edb31..a381e66 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -2,6 +2,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o
>  obj-$(CONFIG_IOMMU_API) += iommu-traces.o
>  obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
>  obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
> +obj-$(CONFIG_IOMMU_MSI) += msi-iommu.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
>  obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
> diff --git a/drivers/iommu/msi-iommu.c b/drivers/iommu/msi-iommu.c
> new file mode 100644
> index 000..de02ede
> --- /dev/null
> +++ b/drivers/iommu/msi-iommu.c
> @@ -0,0 +1,100 @@
> +/*
> + * Reserved IOVA Management
> + *
> + * Copyright (c) 2015 Linaro Ltd.
> + *  www.linaro.org
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct doorbell_mapping {
> + struct kref kref;
> + struct list_headnext;
> + phys_addr_t addr;
> + dma_addr_t  iova;
> + size_t  size;
> +};
> +
> +struct doorbell_mapping_info {
> + struct list_head list; /* list of doorbell mapping entries */
> + spinlock_t lock;
> +};
> +
> +int iommu_get_msi_cookie(struct iommu_domain *domain)
> +{
> + struct doorbell_mapping_info *dmi;
> + int ret;
> +
> + if (domain->msi_cookie || domain->iova_cookie)
> + return -EINVAL;
> +
> + ret = iommu_get_dma_cookie(domain);
> + if (ret)
> + return ret;
> +
> + dmi =

Re: [PATCH v11 0/8] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 1/3: iommu changes

2016-07-20 Thread Dennis Chen

Hi Eric,

On Tue, Jul 19, 2016 at 12:55:03PM +, Eric Auger wrote:
> This series introduces the msi-iommu api used to:
> 
> - allocate/free resources for MSI IOMMU mapping
> - set the MSI iova window aperture
> - map/unmap physical addresses onto MSI IOVAs.
> - determine whether an msi needs to be iommu mapped
> - overwrite an msi_msg PA address with its pre-allocated/mapped IOVA
> 
> Also a new iommu domain attribute, DOMAIN_ATTR_MSI_GEOMETRY is introduced
> to report the MSI iova window geometry (aperture and iommu-msi API support).
> 
> Currently:
> - iommu driver is supposed to allocate/free MSI mapping resources
> - VFIO subsystem is supposed to set the MSI IOVA aperture.
> - The MSI layer is supposed to allocate/free iova mappings and overwrite
>   msi_msg with IOVA at composition time
> 
> More details & context can be found at:
> http://www.linaro.org/blog/core-dump/kvm-pciemsi-passthrough-armarm64/
> 
> Best Regards
> 
> Eric
> 
> Git: complete series available at
> https://github.com/eauger/linux/tree/v4.7-rc7-passthrough-v11
>
Why can't I find this new series on your git tree:
https://git.linaro.org/people/eric.auger/linux.git
?
Also, do I need to download all the 3-part patches to test the PCIe NIC passthru
as I did on your v9 series?

Thanks,
Dennis 
>
> see part III for wrap-up details.
> 
> History:
> v10 -> v11:
> - no change in the series, just incremented for consistency
> - added a temporary patch in the branch:
>   "iommu/iova: FIXUP! validate iova_domain input to put_iova_domain"
>   originally sent by Nate and adapted for this use case. This is currently
>   under discussion on the ML. The crash typically occurs in case unsafe
>   interrupts are discovered while allow_unsafe_interrupts is not set.
> 
> v9 -> v10:
> - split error management in iommu_msi_set_aperture
> 
> v8 -> v9:
> - rename iommu_domain_msi_geometry programmable flag into iommu_msi_supported
> - introduce msi_apperture_valid helper and use this instead of is_aperture_set
> 
> v7 -> v8:
> - The API is retargetted for MSI: renamed msi-iommu
>   all "dma-reserved" namings removed
> - now implemented upon dma-iommu (get, put, init), ie. reuse iova_cookie,
>   and iova API
> - msi mapping resources now are guaranteed to exist during the whole iommu
>   domain's lifetime. No need to lock to garantee the cookie integrity
> - removed alloc/free_reserved_reserved_iova_domain. We now have a single
>   function that sets the aperture, looking like iommu_dma_init_domain.
> - we now use a list instead of an RB-tree
> - prot is not propagated anymore at domain creation due to the retargetting
>   for MSI
> - iommu_domain pointer removed from doorbell_mapping struct
> - replaced DOMAIN_ATTR_MSI_MAPPING by DOMAIN_ATTR_MSI_GEOMETRY
> 
> v6 -> v7:
> - fixed known lock bugs and multiple page sized slots matching
>   (I only have a single MSI frame made of a single page)
> - reserved_iova_cookie now pointing to a struct that encapsulates the
>   iova domain handle + protection attribute passed from VFIO (Alex' req)
> - 2 new functions exposed: iommu_msi_mapping_translate_msg,
>   iommu_msi_mapping_desc_to_domain: not sure this is the right location/proto
>   though
> - iommu_put_reserved_iova now takes a phys_addr_t
> - everything now is cleanup on iommu_domain destruction
> 
> RFC v5 -> patch v6:
> - split to ease the review process
> - in dma-reserved-api use a spin lock instead of a mutex (reported by
>   Jean-Philippe)
> - revisit iommu_get_reserved_iova API to pass a size parameter upon
>   Marc's request
> - Consistently use the page order passed when creating the iova domain.
> - init reserved_binding_list (reported by Julien)
> 
> RFC v4 -> RFC v5:
> - take into account Thomas' comments on MSI related patches
>   - split "msi: IOMMU map the doorbell address when needed"
>   - increase readability and add comments
>   - fix style issues
>  - split "iommu: Add DOMAIN_ATTR_MSI_MAPPING attribute"
>  - platform ITS now advertises IOMMU_CAP_INTR_REMAP
>  - fix compilation issue with CONFIG_IOMMU API unset
>  - arm-smmu-v3 now advertises DOMAIN_ATTR_MSI_MAPPING
> 
> RFC v3 -> v4:
> - Move doorbell mapping/unmapping in msi.c
> - fix ref count issue on set_affinity: in case of a change in the address
>   the previous address is decremented
> - doorbell map/unmap now is done on msi composition. Should allow the use
>   case for platform MSI controllers
> - create dma-reserved-iommu.h/c exposing/implementing a new API dedicated
>   to reserved IOVA management (looking like dma-iommu glue)
> - series reordering to ease the review:
>   - first part is related to IOMMU
>   - second related to MSI sub-system
>   - third related to VFIO (except arm-smmu IOMMU_CAP_INTR_REMAP removal)
> - expose the number of requested IOVA pages through VFIO_IOMMU_GET_INFO
>   [this partially addresses Marc's comments on iommu_get/put_single_reserved
>size/alignment problematic - which I did not ignore - but I don't know
>how much I can do at the

Re: [PATCH v11 0/8] KVM PCIe/MSI passthrough on ARM/ARM64: kernel part 1/3: iommu changes

2016-07-20 Thread Dennis Chen

Hi Eric,

On Tue, Jul 19, 2016 at 12:55:03PM +, Eric Auger wrote:
> This series introduces the msi-iommu api used to:
> 
> - allocate/free resources for MSI IOMMU mapping
> - set the MSI iova window aperture
> - map/unmap physical addresses onto MSI IOVAs.
> - determine whether an msi needs to be iommu mapped
> - overwrite an msi_msg PA address with its pre-allocated/mapped IOVA
> 
> Also a new iommu domain attribute, DOMAIN_ATTR_MSI_GEOMETRY is introduced
> to report the MSI iova window geometry (aperture and iommu-msi API support).
> 
> Currently:
> - iommu driver is supposed to allocate/free MSI mapping resources
> - VFIO subsystem is supposed to set the MSI IOVA aperture.
> - The MSI layer is supposed to allocate/free iova mappings and overwrite
>   msi_msg with IOVA at composition time
> 
> More details & context can be found at:
> http://www.linaro.org/blog/core-dump/kvm-pciemsi-passthrough-armarm64/
> 
> Best Regards
> 
> Eric
> 
> Git: complete series available at
> https://github.com/eauger/linux/tree/v4.7-rc7-passthrough-v11
>
Why can't I find this new series on your git tree:
https://git.linaro.org/people/eric.auger/linux.git
?
Also, do I need to download all the 3-part patches to test the PCIe NIC passthru
as I did on your v9 series?

Thanks,
Dennis 
>
> see part III for wrap-up details.
> 
> History:
> v10 -> v11:
> - no change in the series, just incremented for consistency
> - added a temporary patch in the branch:
>   "iommu/iova: FIXUP! validate iova_domain input to put_iova_domain"
>   originally sent by Nate and adapted for this use case. This is currently
>   under discussion on the ML. The crash typically occurs in case unsafe
>   interrupts are discovered while allow_unsafe_interrupts is not set.
> 
> v9 -> v10:
> - split error management in iommu_msi_set_aperture
> 
> v8 -> v9:
> - rename iommu_domain_msi_geometry programmable flag into iommu_msi_supported
> - introduce msi_apperture_valid helper and use this instead of is_aperture_set
> 
> v7 -> v8:
> - The API is retargetted for MSI: renamed msi-iommu
>   all "dma-reserved" namings removed
> - now implemented upon dma-iommu (get, put, init), ie. reuse iova_cookie,
>   and iova API
> - msi mapping resources now are guaranteed to exist during the whole iommu
>   domain's lifetime. No need to lock to garantee the cookie integrity
> - removed alloc/free_reserved_reserved_iova_domain. We now have a single
>   function that sets the aperture, looking like iommu_dma_init_domain.
> - we now use a list instead of an RB-tree
> - prot is not propagated anymore at domain creation due to the retargetting
>   for MSI
> - iommu_domain pointer removed from doorbell_mapping struct
> - replaced DOMAIN_ATTR_MSI_MAPPING by DOMAIN_ATTR_MSI_GEOMETRY
> 
> v6 -> v7:
> - fixed known lock bugs and multiple page sized slots matching
>   (I only have a single MSI frame made of a single page)
> - reserved_iova_cookie now pointing to a struct that encapsulates the
>   iova domain handle + protection attribute passed from VFIO (Alex' req)
> - 2 new functions exposed: iommu_msi_mapping_translate_msg,
>   iommu_msi_mapping_desc_to_domain: not sure this is the right location/proto
>   though
> - iommu_put_reserved_iova now takes a phys_addr_t
> - everything now is cleanup on iommu_domain destruction
> 
> RFC v5 -> patch v6:
> - split to ease the review process
> - in dma-reserved-api use a spin lock instead of a mutex (reported by
>   Jean-Philippe)
> - revisit iommu_get_reserved_iova API to pass a size parameter upon
>   Marc's request
> - Consistently use the page order passed when creating the iova domain.
> - init reserved_binding_list (reported by Julien)
> 
> RFC v4 -> RFC v5:
> - take into account Thomas' comments on MSI related patches
>   - split "msi: IOMMU map the doorbell address when needed"
>   - increase readability and add comments
>   - fix style issues
>  - split "iommu: Add DOMAIN_ATTR_MSI_MAPPING attribute"
>  - platform ITS now advertises IOMMU_CAP_INTR_REMAP
>  - fix compilation issue with CONFIG_IOMMU API unset
>  - arm-smmu-v3 now advertises DOMAIN_ATTR_MSI_MAPPING
> 
> RFC v3 -> v4:
> - Move doorbell mapping/unmapping in msi.c
> - fix ref count issue on set_affinity: in case of a change in the address
>   the previous address is decremented
> - doorbell map/unmap now is done on msi composition. Should allow the use
>   case for platform MSI controllers
> - create dma-reserved-iommu.h/c exposing/implementing a new API dedicated
>   to reserved IOVA management (looking like dma-iommu glue)
> - series reordering to ease the review:
>   - first part is related to IOMMU
>   - second related to MSI sub-system
>   - third related to VFIO (except arm-smmu IOMMU_CAP_INTR_REMAP removal)
> - expose the number of requested IOVA pages through VFIO_IOMMU_GET_INFO
>   [this partially addresses Marc's comments on iommu_get/put_single_reserved
>size/alignment problematic - which I did not ignore - but I don't know
>how much I can do at the

[tip:efi/urgent] efi/arm: Fix the format of EFI debug messages

2016-06-03 Thread tip-bot for Dennis Chen

Commit-ID:  c75343972b79ef5bd44c498a63b326e37470bbfc
Gitweb: http://git.kernel.org/tip/c75343972b79ef5bd44c498a63b326e37470bbfc
Author: Dennis Chen <dennis.c...@arm.com>
AuthorDate: Tue, 31 May 2016 11:23:44 +0100
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Fri, 3 Jun 2016 09:57:36 +0200

efi/arm: Fix the format of EFI debug messages

When both EFI and memblock debugging is enabled on the kernel command line:

  'efi=debug memblock=debug'

.. the debug messages for early_con look the following way:

 [0.00] efi:   0xe105-0xe105 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe130-0xe1300fff [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe820-0xe827 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0x0080-0x008001e7 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]
 [0.00] memblock_add: [0x80-0x8001e7] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
 [0.00] *
 ...

Note the misplaced '*' line, which happened because the memblock debug message
was printed while the EFI debug message was still being constructed..

This patch fixes the output to be the expected:

 [0.00] efi:   0xe105-0xe105 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe130-0xe1300fff [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe820-0xe827 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0x0080-0x008001e7 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]*
 [0.00] memblock_add: [0x80-0x8001e7] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
 ...

Note how the '*' is now in the proper EFI debug message line.

Signed-off-by: Dennis Chen <dennis.c...@arm.com>
Signed-off-by: Matt Fleming <m...@codeblueprint.co.uk>
Acked-by: Mark Rutland <mark.rutl...@arm.com>
Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
Cc: Catalin Marinas <catalin.mari...@arm.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Mark Salter <msal...@redhat.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Steve Capper <steve.cap...@arm.com>
Cc: Steve McIntyre <st...@einval.com>
Cc: Steven Rostedt <rost...@goodmis.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Will Deacon <will.dea...@arm.com>
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1464690224-4503-3-git-send-email-m...@codeblueprint.co.uk
[ Made the changelog more readable. ]
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 drivers/firmware/efi/arm-init.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index a850cbc..c49d50e 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -174,6 +174,7 @@ static __init void reserve_regions(void)
 {
efi_memory_desc_t *md;
u64 paddr, npages, size;
+   int resv;
 
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
@@ -190,12 +191,14 @@ static __init void reserve_regions(void)
paddr = md->phys_addr;
npages = md->num_pages;
 
+   resv = is_reserve_region(md);
if (efi_enabled(EFI_DBG)) {
char buf[64];
 
-   pr_info("  0x%012llx-0x%012llx %s",
+   pr_info("  0x%012llx-0x%012llx %s%s\n",
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-   efi_md_typeattr_format(buf, sizeof(buf), md));
+   efi_md_typeattr_format(buf, sizeof(buf), md),
+   resv ? "*" : "");
}
 
memrange_efi_to_native(, );
@@ -204,14 +207,9 @@ static __init void reserve_regions(void)
if (is_normal_ram(md))
early_init_dt_add_memory_arch(paddr, size);
 
-   if (is_reserve_region(md)) {
+   if (resv)
memblock_mark_nomap(paddr, size);
-   if (efi_enabled(EFI_DBG))
-   pr_cont("*");
-   }
 
-   if (efi_enabled(EFI_DBG))
-   pr_cont("\n");
}
 
set_bit(EFI_MEMMAP, );

[tip:efi/urgent] efi/arm: Fix the format of EFI debug messages

2016-06-03 Thread tip-bot for Dennis Chen

Commit-ID:  c75343972b79ef5bd44c498a63b326e37470bbfc
Gitweb: http://git.kernel.org/tip/c75343972b79ef5bd44c498a63b326e37470bbfc
Author: Dennis Chen 
AuthorDate: Tue, 31 May 2016 11:23:44 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 3 Jun 2016 09:57:36 +0200

efi/arm: Fix the format of EFI debug messages

When both EFI and memblock debugging is enabled on the kernel command line:

  'efi=debug memblock=debug'

.. the debug messages for early_con look the following way:

 [0.00] efi:   0xe105-0xe105 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe130-0xe1300fff [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe820-0xe827 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0x0080-0x008001e7 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]
 [0.00] memblock_add: [0x80-0x8001e7] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
 [0.00] *
 ...

Note the misplaced '*' line, which happened because the memblock debug message
was printed while the EFI debug message was still being constructed..

This patch fixes the output to be the expected:

 [0.00] efi:   0xe105-0xe105 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe130-0xe1300fff [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0xe820-0xe827 [Memory Mapped I/O  |RUN|  
|  |  |  |  |  |   |  |  |  |UC]
 [0.00] efi:   0x0080-0x008001e7 [Runtime Data   |RUN|  
|  |  |  |  |  |   |WB|WT|WC|UC]*
 [0.00] memblock_add: [0x80-0x8001e7] flags 0x0 
early_init_dt_add_memory_arch+0x54/0x5c
 ...

Note how the '*' is now in the proper EFI debug message line.

Signed-off-by: Dennis Chen 
Signed-off-by: Matt Fleming 
Acked-by: Mark Rutland 
Cc: Ard Biesheuvel 
Cc: Catalin Marinas 
Cc: Dan Williams 
Cc: Linus Torvalds 
Cc: Mark Salter 
Cc: Peter Zijlstra 
Cc: Steve Capper 
Cc: Steve McIntyre 
Cc: Steven Rostedt 
Cc: Thomas Gleixner 
Cc: Will Deacon 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1464690224-4503-3-git-send-email-m...@codeblueprint.co.uk
[ Made the changelog more readable. ]
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/arm-init.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index a850cbc..c49d50e 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -174,6 +174,7 @@ static __init void reserve_regions(void)
 {
efi_memory_desc_t *md;
u64 paddr, npages, size;
+   int resv;
 
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
@@ -190,12 +191,14 @@ static __init void reserve_regions(void)
paddr = md->phys_addr;
npages = md->num_pages;
 
+   resv = is_reserve_region(md);
if (efi_enabled(EFI_DBG)) {
char buf[64];
 
-   pr_info("  0x%012llx-0x%012llx %s",
+   pr_info("  0x%012llx-0x%012llx %s%s\n",
paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-   efi_md_typeattr_format(buf, sizeof(buf), md));
+   efi_md_typeattr_format(buf, sizeof(buf), md),
+   resv ? "*" : "");
}
 
memrange_efi_to_native(, );
@@ -204,14 +207,9 @@ static __init void reserve_regions(void)
if (is_normal_ram(md))
early_init_dt_add_memory_arch(paddr, size);
 
-   if (is_reserve_region(md)) {
+   if (resv)
memblock_mark_nomap(paddr, size);
-   if (efi_enabled(EFI_DBG))
-   pr_cont("*");
-   }
 
-   if (efi_enabled(EFI_DBG))
-   pr_cont("\n");
}
 
set_bit(EFI_MEMMAP, );

Re: [PATCH v7 14/15] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-05-27 Thread Dennis Chen

On 25 May 2016 at 06:35, David Daney <ddaney.c...@gmail.com> wrote:
> From: Hanjun Guo <hanjun@linaro.org>
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo <hanjun@linaro.org>
> Signed-off-by: Ganapatrao Kulkarni <gkulka...@caviumnetworks.com>
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter <rrich...@cavium.com>
> [david.da...@cavium.com reorderd and combinded with other patches in
> Hanjun Guo's original set, removed get_mpidr_in_madt() and use
> acpi_map_madt_entry() instead.]
> Signed-off-by: David Daney <david.da...@cavium.com>
> ---

I've tested this patch on Juno board with home made SRAT and SLIT
table, so feel free to add:
Tested-by: Dennis Chen <dennis.c...@arm.com>

-- 
Regards,
Dennis

Re: [PATCH v7 14/15] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-05-27 Thread Dennis Chen

On 25 May 2016 at 06:35, David Daney  wrote:
> From: Hanjun Guo 
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo 
> Signed-off-by: Ganapatrao Kulkarni 
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter 
> [david.da...@cavium.com reorderd and combinded with other patches in
> Hanjun Guo's original set, removed get_mpidr_in_madt() and use
> acpi_map_madt_entry() instead.]
> Signed-off-by: David Daney 
> ---

I've tested this patch on Juno board with home made SRAT and SLIT
table, so feel free to add:
Tested-by: Dennis Chen 

-- 
Regards,
Dennis

Re: [PATCH v7 07/15] arm64, numa: Cleanup NUMA disabled messages.

2016-05-27 Thread Dennis Chen

On 25 May 2016 at 06:35, David Daney <ddaney.c...@gmail.com> wrote:
> From: David Daney <david.da...@cavium.com>
>
> As noted by Dennis Chen, we don't want to print "No NUMA configuration
> found" if NUMA was forced off from the command line.
>
> Change the type of numa_off to bool, and clean up printing code.
> Print "NUMA disabled" if forced off on command line and "No NUMA
> configuration found" if there was no firmware NUMA information.
>
> Signed-off-by: David Daney <david.da...@cavium.com>
> Acked-by: Catalin Marinas <catalin.mari...@arm.com>
> ---
>  arch/arm64/mm/numa.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> index 6cb03f9..1def1de 100644
> --- a/arch/arm64/mm/numa.c
> +++ b/arch/arm64/mm/numa.c
> @@ -29,7 +29,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = 
> NUMA_NO_NODE };
>
>  static int numa_distance_cnt;
>  static u8 *numa_distance;
> -static int numa_off;
> +static bool numa_off;
>
>  static __init int numa_parse_early_param(char *opt)
>  {
> @@ -37,7 +37,7 @@ static __init int numa_parse_early_param(char *opt)
> return -EINVAL;
> if (!strncmp(opt, "off", 3)) {
> pr_info("%s\n", "NUMA turned off");
> -   numa_off = 1;
> +   numa_off = true;
> }
> return 0;
>  }
> @@ -362,7 +362,10 @@ static int __init dummy_numa_init(void)
> int ret;
> struct memblock_region *mblk;
>
> -   pr_info("%s\n", "No NUMA configuration found");
> +   if (numa_off)
> +   pr_info("NUMA disabled\n"); /* Forced off on command line. */
> +   else
> +   pr_info("No NUMA configuration found\n");
> pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
>0LLU, PFN_PHYS(max_pfn) - 1);
>
> @@ -375,7 +378,7 @@ static int __init dummy_numa_init(void)
> return ret;
> }
>
> -   numa_off = 1;
> +   numa_off = true;
> return 0;
>  }
>
> --
> 1.7.11.7
>

Reviewed-by: Dennis Chen <dennis.c...@arm.com>

-- 
Regards,
Dennis

Re: [PATCH v7 07/15] arm64, numa: Cleanup NUMA disabled messages.

2016-05-27 Thread Dennis Chen

On 25 May 2016 at 06:35, David Daney  wrote:
> From: David Daney 
>
> As noted by Dennis Chen, we don't want to print "No NUMA configuration
> found" if NUMA was forced off from the command line.
>
> Change the type of numa_off to bool, and clean up printing code.
> Print "NUMA disabled" if forced off on command line and "No NUMA
> configuration found" if there was no firmware NUMA information.
>
> Signed-off-by: David Daney 
> Acked-by: Catalin Marinas 
> ---
>  arch/arm64/mm/numa.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
> index 6cb03f9..1def1de 100644
> --- a/arch/arm64/mm/numa.c
> +++ b/arch/arm64/mm/numa.c
> @@ -29,7 +29,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = 
> NUMA_NO_NODE };
>
>  static int numa_distance_cnt;
>  static u8 *numa_distance;
> -static int numa_off;
> +static bool numa_off;
>
>  static __init int numa_parse_early_param(char *opt)
>  {
> @@ -37,7 +37,7 @@ static __init int numa_parse_early_param(char *opt)
> return -EINVAL;
> if (!strncmp(opt, "off", 3)) {
> pr_info("%s\n", "NUMA turned off");
> -   numa_off = 1;
> +   numa_off = true;
> }
> return 0;
>  }
> @@ -362,7 +362,10 @@ static int __init dummy_numa_init(void)
> int ret;
> struct memblock_region *mblk;
>
> -   pr_info("%s\n", "No NUMA configuration found");
> +   if (numa_off)
> +   pr_info("NUMA disabled\n"); /* Forced off on command line. */
> +   else
> +   pr_info("No NUMA configuration found\n");
> pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
>        0LLU, PFN_PHYS(max_pfn) - 1);
>
> @@ -375,7 +378,7 @@ static int __init dummy_numa_init(void)
> return ret;
> }
>
> -   numa_off = 1;
> +   numa_off = true;
> return 0;
>  }
>
> --
> 1.7.11.7
>

Reviewed-by: Dennis Chen 

-- 
Regards,
Dennis

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-27 Thread Dennis Chen

Hi Hanjun,

Thanks for the clarification and some little comments ;-)

On 27 April 2016 at 12:04, Hanjun Guo <hanjun@linaro.org> wrote:
> Hi Dennis, David,
>
> Sorry for the late reply, please see my comments below.
>
>
> On 2016/4/27 9:14, David Daney wrote:
>>
>> On 04/21/2016 03:06 AM, Dennis Chen wrote:
>>>
>>> On 20 April 2016 at 09:40, David Daney <ddaney.c...@gmail.com> wrote:
>>
>> [...]
>>>>
>>>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>>>> +void __init acpi_numa_gicc_affinity_init(struct
>>>> acpi_srat_gicc_affinity *pa)
>>>> +{
>>>> +   int pxm, node;
>>>> +   u64 mpidr;
>>>> +
>>>> +   if (srat_disabled())
>>>> +   return;
>>>> +
>>>> +   if (pa->header.length < sizeof(struct
>>>> acpi_srat_gicc_affinity)) {
>>>> +   pr_err("SRAT: Invalid SRAT header length: %d\n",
>>>> +   pa->header.length);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>>>> +   return;
>>>> +
>>>> +   if (cpus_in_srat >= NR_CPUS) {
>>>> +   pr_warn_once("SRAT: cpu_to_node_map[%d] is too small,
>>>> may not be able to use all cpus\n",
>>>> +NR_CPUS);
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   pxm = pa->proximity_domain;
>>>> +   node = acpi_map_pxm_to_node(pxm);
>>>> +
>>>> +   if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>>>> +   pr_err("SRAT: Too many proximity domains %d\n", pxm);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   if (get_mpidr_in_madt(pa->acpi_processor_uid, )) {
>>>> +   pr_err("SRAT: PXM %d with ACPI ID %d has no valid
>>>> MPIDR in MADT\n",
>>>> +   pxm, pa->acpi_processor_uid);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   early_node_cpu_hwid[cpus_in_srat].node_id = node;
>>>> +   early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>>>> +   node_set(node, numa_nodes_parsed);
>>>> +   cpus_in_srat++;
>>>> +   pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>>>> +   pxm, mpidr, node, cpus_in_srat);
>>>> +}
>>>
>>>
>>> What does the *cpu* means in above pr_info function? If it's the
>>> logical processor ID or ACPI processor UID, then I suggest to use
>>> pa->acpi_processor_uid instead of cpus_in_srat, I understand the
>
>
> I think print cpus_in_srat is pointless here, as the logic cpu number
> is allocated by OS when initializing SMP by scanning MADT table. As
> Dennis said, it's just a count number, not a number mapping to MPIDR.
>
> ACPI processor UID is the key value to connect MADT, SRAT, DSDT.
>
> For MADT, it will have MPIDR and ACPI processor UID, and OS will
> create mappings to MPIDR and cpu logical number,
> ACPI processor UID <--> MPIDR <--> CPU logical number
>
> In SRAT, there is ACPI processor UID represented, mappings will be
> ACPI processor UID <--> PXM <--> NUMA node logical number
>
> So we can use ACPI processor UID to get the MPIDR by scanning the
> MADT, then we can map NUMA node logical number to cpu logical
> number later.
>
Right, kernel will record the logical cpu index info (begins from 0,
the boot cpu) into the cpu_possible bit map by parsing the MADT GICC
sub-table. So I am thinking here if we can reduce the parsing walk by
only one, because I see the acpi_numa_gicc_affinity_init() calls
get_mpidr_in_madt() to traverse the entire MADT, actually the kernel
will also traverse the MADT in smp_init_cpus(), merge them into one?
>
>>> cpus_in_srat is just a count number of the entries of GICC Affinity
>>> Struct instance in SRAT, correct me if I am wrong. So at least it sees
>>> to me, the above pr_info will output message looks like:
>>> SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
>>> SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
>>> SRAT:

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-27 Thread Dennis Chen

Hi Hanjun,

Thanks for the clarification and some little comments ;-)

On 27 April 2016 at 12:04, Hanjun Guo  wrote:
> Hi Dennis, David,
>
> Sorry for the late reply, please see my comments below.
>
>
> On 2016/4/27 9:14, David Daney wrote:
>>
>> On 04/21/2016 03:06 AM, Dennis Chen wrote:
>>>
>>> On 20 April 2016 at 09:40, David Daney  wrote:
>>
>> [...]
>>>>
>>>> +/* Callback for Proximity Domain -> ACPI processor UID mapping */
>>>> +void __init acpi_numa_gicc_affinity_init(struct
>>>> acpi_srat_gicc_affinity *pa)
>>>> +{
>>>> +   int pxm, node;
>>>> +   u64 mpidr;
>>>> +
>>>> +   if (srat_disabled())
>>>> +   return;
>>>> +
>>>> +   if (pa->header.length < sizeof(struct
>>>> acpi_srat_gicc_affinity)) {
>>>> +   pr_err("SRAT: Invalid SRAT header length: %d\n",
>>>> +   pa->header.length);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   if (!(pa->flags & ACPI_SRAT_GICC_ENABLED))
>>>> +   return;
>>>> +
>>>> +   if (cpus_in_srat >= NR_CPUS) {
>>>> +   pr_warn_once("SRAT: cpu_to_node_map[%d] is too small,
>>>> may not be able to use all cpus\n",
>>>> +NR_CPUS);
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   pxm = pa->proximity_domain;
>>>> +   node = acpi_map_pxm_to_node(pxm);
>>>> +
>>>> +   if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
>>>> +   pr_err("SRAT: Too many proximity domains %d\n", pxm);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   if (get_mpidr_in_madt(pa->acpi_processor_uid, )) {
>>>> +   pr_err("SRAT: PXM %d with ACPI ID %d has no valid
>>>> MPIDR in MADT\n",
>>>> +   pxm, pa->acpi_processor_uid);
>>>> +   bad_srat();
>>>> +   return;
>>>> +   }
>>>> +
>>>> +   early_node_cpu_hwid[cpus_in_srat].node_id = node;
>>>> +   early_node_cpu_hwid[cpus_in_srat].cpu_hwid =  mpidr;
>>>> +   node_set(node, numa_nodes_parsed);
>>>> +   cpus_in_srat++;
>>>> +   pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d cpu %d\n",
>>>> +   pxm, mpidr, node, cpus_in_srat);
>>>> +}
>>>
>>>
>>> What does the *cpu* means in above pr_info function? If it's the
>>> logical processor ID or ACPI processor UID, then I suggest to use
>>> pa->acpi_processor_uid instead of cpus_in_srat, I understand the
>
>
> I think print cpus_in_srat is pointless here, as the logic cpu number
> is allocated by OS when initializing SMP by scanning MADT table. As
> Dennis said, it's just a count number, not a number mapping to MPIDR.
>
> ACPI processor UID is the key value to connect MADT, SRAT, DSDT.
>
> For MADT, it will have MPIDR and ACPI processor UID, and OS will
> create mappings to MPIDR and cpu logical number,
> ACPI processor UID <--> MPIDR <--> CPU logical number
>
> In SRAT, there is ACPI processor UID represented, mappings will be
> ACPI processor UID <--> PXM <--> NUMA node logical number
>
> So we can use ACPI processor UID to get the MPIDR by scanning the
> MADT, then we can map NUMA node logical number to cpu logical
> number later.
>
Right, kernel will record the logical cpu index info (begins from 0,
the boot cpu) into the cpu_possible bit map by parsing the MADT GICC
sub-table. So I am thinking here if we can reduce the parsing walk by
only one, because I see the acpi_numa_gicc_affinity_init() calls
get_mpidr_in_madt() to traverse the entire MADT, actually the kernel
will also traverse the MADT in smp_init_cpus(), merge them into one?
>
>>> cpus_in_srat is just a count number of the entries of GICC Affinity
>>> Struct instance in SRAT, correct me if I am wrong. So at least it sees
>>> to me, the above pr_info will output message looks like:
>>> SRAT: PXM 0 -> MPIDR 0x100 -> Node 0 cpu 1
>>> SRAT: PXM 0 -> MPIDR 0x101 -> Node 0 cpu 2
>>> SRAT: PXM 0 -> MPIDR 0x102 -> Node 0 cpu 3
>>>
&g

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-21 Thread Dennis Chen

On 20 April 2016 at 09:40, David Daney  wrote:
> From: Hanjun Guo 
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo 
> Signed-off-by: Ganapatrao Kulkarni 
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter 
> [david.da...@cavium.com reorderd and combinded with other patches in Hanjun 
> Guo's original set]
> Signed-off-by: David Daney 
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile|   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 
> ++
>  arch/arm64/kernel/smp.c   |   2 +
>  arch/arm64/mm/numa.c  |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return 
> NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS(MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)  (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)   += efi.o 
> efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)+= pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)   += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)  += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)+= 
> acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)   += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + * Author: Hanjun Guo 
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +   u32 node_id;/* logical node containing this CPU */
> +   u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +   int i;
> +
> +   for (i = 0; i < cpus_in_srat; i++) {
> +   if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +   return early_node_cpu_hwid[i].node_id;
> +   }
> +
> +   return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +   unsigned long madt_end, entry;
> +   struct acpi_table_madt *madt;
> +   acpi_size tbl_size;
> +
> +   if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +   (struct acpi_table_header **), _size)))
> +   return -ENODEV;
> +
> +   entry = (unsigned long)madt;
> +   madt_end =

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-21 Thread Dennis Chen

On 20 April 2016 at 09:40, David Daney  wrote:
> From: Hanjun Guo 
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo 
> Signed-off-by: Ganapatrao Kulkarni 
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter 
> [david.da...@cavium.com reorderd and combinded with other patches in Hanjun 
> Guo's original set]
> Signed-off-by: David Daney 
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile|   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 
> ++
>  arch/arm64/kernel/smp.c   |   2 +
>  arch/arm64/mm/numa.c  |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return 
> NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS(MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)  (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)   += efi.o 
> efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)+= pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)   += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)  += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)+= 
> acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)   += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + * Author: Hanjun Guo 
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +   u32 node_id;/* logical node containing this CPU */
> +   u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +   int i;
> +
> +   for (i = 0; i < cpus_in_srat; i++) {
> +   if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +   return early_node_cpu_hwid[i].node_id;
> +   }
> +
> +   return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +   unsigned long madt_end, entry;
> +   struct acpi_table_madt *madt;
> +   acpi_size tbl_size;
> +
> +   if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +   (struct acpi_table_header **), _size)))
> +   return -ENODEV;
> +
> +   entry = (unsigned long)madt;
> +   madt_end = entry + madt->header.length;
> +
> +   /* Parse all entries looking for a match. */
> +   entry += sizeof(struct acpi_table_madt);
> +   while (entry +

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-20 Thread Dennis Chen

On 20 April 2016 at 09:40, David Daney  wrote:
> From: Hanjun Guo 
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo 
> Signed-off-by: Ganapatrao Kulkarni 
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter 
> [david.da...@cavium.com reorderd and combinded with other patches in Hanjun 
> Guo's original set]
> Signed-off-by: David Daney 
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile|   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 
> ++
>  arch/arm64/kernel/smp.c   |   2 +
>  arch/arm64/mm/numa.c  |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return 
> NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS(MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)  (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)   += efi.o 
> efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)+= pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)   += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)  += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)+= 
> acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)   += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + * Author: Hanjun Guo 
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +   u32 node_id;/* logical node containing this CPU */
> +   u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +   int i;
> +
> +   for (i = 0; i < cpus_in_srat; i++) {
> +   if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +   return early_node_cpu_hwid[i].node_id;
> +   }
> +
> +   return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +   unsigned long madt_end, entry;
> +   struct acpi_table_madt *madt;
> +   acpi_size tbl_size;
> +
> +   if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +   (struct acpi_table_header **), _size)))
> +   return -ENODEV;
> +
> +   entry = (unsigned long)madt;
> +   madt_end =

Re: [PATCH v5 12/14] arm64, acpi, numa: NUMA support based on SRAT and SLIT

2016-04-20 Thread Dennis Chen

On 20 April 2016 at 09:40, David Daney  wrote:
> From: Hanjun Guo 
>
> Introduce a new file to hold ACPI based NUMA information parsing from
> SRAT and SLIT.
>
> SRAT includes the CPU ACPI ID to Proximity Domain mappings and memory
> ranges to Proximity Domain mapping.  SLIT has the information of inter
> node distances(relative number for access latency).
>
> Signed-off-by: Hanjun Guo 
> Signed-off-by: Ganapatrao Kulkarni 
> [rrich...@cavium.com Reworked for numa v10 series ]
> Signed-off-by: Robert Richter 
> [david.da...@cavium.com reorderd and combinded with other patches in Hanjun 
> Guo's original set]
> Signed-off-by: David Daney 
> ---
>  arch/arm64/include/asm/acpi.h |   8 +++
>  arch/arm64/include/asm/numa.h |   2 +
>  arch/arm64/kernel/Makefile|   1 +
>  arch/arm64/kernel/acpi_numa.c | 149 
> ++
>  arch/arm64/kernel/smp.c   |   2 +
>  arch/arm64/mm/numa.c  |   5 +-
>  6 files changed, 166 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kernel/acpi_numa.c
>
> diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
> index aee323b..4b13ecd 100644
> --- a/arch/arm64/include/asm/acpi.h
> +++ b/arch/arm64/include/asm/acpi.h
> @@ -113,4 +113,12 @@ static inline const char *acpi_get_enable_method(int cpu)
>  pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr);
>  #endif
>
> +#ifdef CONFIG_ACPI_NUMA
> +int arm64_acpi_numa_init(void);
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid);
> +#else
> +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; }
> +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return 
> NUMA_NO_NODE; }
> +#endif /* CONFIG_ACPI_NUMA */
> +
>  #endif /*_ASM_ACPI_H*/
> diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
> index e9b4f29..600887e 100644
> --- a/arch/arm64/include/asm/numa.h
> +++ b/arch/arm64/include/asm/numa.h
> @@ -5,6 +5,8 @@
>
>  #ifdef CONFIG_NUMA
>
> +#define NR_NODE_MEMBLKS(MAX_NUMNODES * 2)
> +
>  /* currently, arm64 implements flat NUMA topology */
>  #define parent_node(node)  (node)
>
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 3793003..69569c6 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -42,6 +42,7 @@ arm64-obj-$(CONFIG_EFI)   += efi.o 
> efi-entry.stub.o
>  arm64-obj-$(CONFIG_PCI)+= pci.o
>  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
>  arm64-obj-$(CONFIG_ACPI)   += acpi.o
> +arm64-obj-$(CONFIG_ACPI_NUMA)  += acpi_numa.o
>  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)+= 
> acpi_parking_protocol.o
>  arm64-obj-$(CONFIG_PARAVIRT)   += paravirt.o
>  arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
> diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
> new file mode 100644
> index 000..fd72070
> --- /dev/null
> +++ b/arch/arm64/kernel/acpi_numa.c
> @@ -0,0 +1,149 @@
> +/*
> + * ACPI 5.1 based NUMA setup for ARM64
> + * Lots of code was borrowed from arch/x86/mm/srat.c
> + *
> + * Copyright 2004 Andi Kleen, SuSE Labs.
> + * Copyright (C) 2013-2016, Linaro Ltd.
> + * Author: Hanjun Guo 
> + *
> + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
> + *
> + * Called from acpi_numa_init while reading the SRAT and SLIT tables.
> + * Assumes all memory regions belonging to a single proximity domain
> + * are in one chunk. Holes between them will be included in the node.
> + */
> +
> +#define pr_fmt(fmt) "ACPI: NUMA: " fmt
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +static int cpus_in_srat;
> +
> +struct __node_cpu_hwid {
> +   u32 node_id;/* logical node containing this CPU */
> +   u64 cpu_hwid;   /* MPIDR for this CPU */
> +};
> +
> +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = {
> +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} };
> +
> +int acpi_numa_get_nid(unsigned int cpu, u64 hwid)
> +{
> +   int i;
> +
> +   for (i = 0; i < cpus_in_srat; i++) {
> +   if (hwid == early_node_cpu_hwid[i].cpu_hwid)
> +   return early_node_cpu_hwid[i].node_id;
> +   }
> +
> +   return NUMA_NO_NODE;
> +}
> +
> +static int __init get_mpidr_in_madt(int acpi_id, u64 *mpidr)
> +{
> +   unsigned long madt_end, entry;
> +   struct acpi_table_madt *madt;
> +   acpi_size tbl_size;
> +
> +   if (ACPI_FAILURE(acpi_get_table_with_size(ACPI_SIG_MADT, 0,
> +   (struct acpi_table_header **), _size)))
> +   return -ENODEV;
> +
> +   entry = (unsigned long)madt;
> +   madt_end = entry + madt->header.length;
> +
> +   /* Parse all entries looking for a match. */
> +   entry += sizeof(struct acpi_table_madt);
> +   while (entry +

Re: [PATCH V5 1/9] ACPI: Honor ACPI _CCA attribute setting

2015-11-02 Thread Dennis Chen

On Mon, Nov 02, 2015 at 09:51:46AM -0600, Suravee Suthikulanit wrote:
> Hi Dennis / Hanjun,
> 
> On 11/2/2015 5:58 AM, Hanjun Guo wrote:
> >Hi Dennis,
> >
> >On 11/02/2015 12:02 PM, Dennis Chen wrote:
> >>On Thu, Oct 29, 2015 at 6:50 AM, Suravee Suthikulpanit
> >> wrote:
> >>>From: Jeremy Linton 
> >>>
> >>>ACPI configurations can now mark devices as noncoherent,
> >>>support that choice.
> >>>
> >>>NOTE: This is required to support USB on ARM Juno Development Board.
> >>>
> >>>Signed-off-by: Jeremy Linton 
> >>>Signed-off-by: Suravee Suthikulpanit 
> >>>CC: Bjorn Helgaas 
> >>>CC: Catalin Marinas 
> >>>CC: Rob Herring 
> >>>CC: Will Deacon 
> >>>CC: Rafael J. Wysocki 
> >>>---
> >>>  include/acpi/acpi_bus.h | 5 +++--
> >>>  1 file changed, 3 insertions(+), 2 deletions(-)
> >>>
> >>>diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> >>>index d11eff8..0f131d2 100644
> >>>--- a/include/acpi/acpi_bus.h
> >>>+++ b/include/acpi/acpi_bus.h
> >>>@@ -407,7 +407,7 @@ static inline bool acpi_check_dma(struct
> >>>acpi_device *adev, bool *coherent)
> >>>   * case 1. Do not support and disable DMA.
> >>>   * case 2. Support but rely on arch-specific cache maintenance for
> >>>   * non-coherence DMA operations.
> >>>- * Currently, we implement case 1 above.
> >>>+ * Currently, we implement case 2 above.
> >>>   *
> >>>   * For the case when _CCA is missing (i.e. cca_seen=0) and
> >>>   * platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
> >>>@@ -415,7 +415,8 @@ static inline bool acpi_check_dma(struct
> >>>acpi_device *adev, bool *coherent)
> >>>   *
> >>>   * See acpi_init_coherency() for more info.
> >>>   */
> >>>-if (adev->flags.coherent_dma) {
> >>>+if (adev->flags.coherent_dma ||
> >>>+(adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
> >>>  ret = true;
> >>>  if (coherent)
> >>>  *coherent = adev->flags.coherent_dma;
> >>
> >>Hi Suravee,
> >>
> >>The acpi_check_dma function has been removed in patch 6 of this patch
> >>set, why it is still be used
> >>here, am I missing something? If the acpi_check_dma will be used in
> >>the future, personally I'd like
> >
> >I think this patch just to let people know that there is
> >case that arch-specific cache maintenance is still needed
> >for ACPI (such as Juno board), and in the later patches will
> >cover this case.
> >
> >acpi_check_dma() will be replaced by acpi_get_dma_attr(),
> >and in acpi_get_dma_attr() will cover that case and will
> >be easily understood. (Suravee, correct me if I'm wrong :) )
> 
> Thanks Hanjun for filling in the info.
> 
> Yes, this is mainly to document the logic changes required by Juno,
> which would be more clear than just merging this change in the later
> patch.
>
Clear.
 
> >>to use IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) while not CONFIG_ARM64
> >>macro here,
> 
> We could have used CONFIG_ACPI_CCA_REQUIRED here, but this will be
> replaced by logic in patch 5, and removed in patch 6 anyways. So, I
> think it is okay. Eventually, the rest of the logic will be using
> CONFIG_ACPI_CCA_REQUIRED.
> 
> or since _CCA attribute
> >>is arch-specific, it's reasonable to leave the _CCA handling policy to
> >>the arch-specific code. For example,
> >>with a link weak function like acpi_arch_check_dma() as a default
> >>handling if no arch-specific code
> >>provided, the actual _CCA handling will be implemented in the ARM,
> >>Intel or other Arch if required.
> >
> >Actually Intel platform don't need _CCA and it's coherent
> >in default, since _CCA is in ACPI spec, I would like it's
> >handled in ACPI core.
> >
> >Thanks
> >Hanjun
> 
> I also agree with Hanjun that the CCA parsing should be handled by
> the ACPI core driver. Since we are using the
> CONFIG_ACPI_CCA_REQUIRED, we should not need to have arch-specific
> code. If the ACPI spec gets more complicate in the future, we can
> revisit this. :)
> 
Hmm, seems I have no objection currently if we only think about intel and
arm arch. Things maybe a little bit complicated if more Archs becomes 
ACPI awareness, if any. Good to see the patch set upstream soon :) Thank you
Suravee and Hanjun.

> Thanks,
> Suravee
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V5 1/9] ACPI: Honor ACPI _CCA attribute setting

2015-11-02 Thread Dennis Chen

On Mon, Nov 02, 2015 at 09:51:46AM -0600, Suravee Suthikulanit wrote:
> Hi Dennis / Hanjun,
> 
> On 11/2/2015 5:58 AM, Hanjun Guo wrote:
> >Hi Dennis,
> >
> >On 11/02/2015 12:02 PM, Dennis Chen wrote:
> >>On Thu, Oct 29, 2015 at 6:50 AM, Suravee Suthikulpanit
> >><suravee.suthikulpa...@amd.com> wrote:
> >>>From: Jeremy Linton <jeremy.lin...@arm.com>
> >>>
> >>>ACPI configurations can now mark devices as noncoherent,
> >>>support that choice.
> >>>
> >>>NOTE: This is required to support USB on ARM Juno Development Board.
> >>>
> >>>Signed-off-by: Jeremy Linton <jeremy.lin...@arm.com>
> >>>Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpa...@amd.com>
> >>>CC: Bjorn Helgaas <bhelg...@google.com>
> >>>CC: Catalin Marinas <catalin.mari...@arm.com>
> >>>CC: Rob Herring <robh...@kernel.org>
> >>>CC: Will Deacon <will.dea...@arm.com>
> >>>CC: Rafael J. Wysocki <r...@rjwysocki.net>
> >>>---
> >>>  include/acpi/acpi_bus.h | 5 +++--
> >>>  1 file changed, 3 insertions(+), 2 deletions(-)
> >>>
> >>>diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> >>>index d11eff8..0f131d2 100644
> >>>--- a/include/acpi/acpi_bus.h
> >>>+++ b/include/acpi/acpi_bus.h
> >>>@@ -407,7 +407,7 @@ static inline bool acpi_check_dma(struct
> >>>acpi_device *adev, bool *coherent)
> >>>   * case 1. Do not support and disable DMA.
> >>>   * case 2. Support but rely on arch-specific cache maintenance for
> >>>   * non-coherence DMA operations.
> >>>- * Currently, we implement case 1 above.
> >>>+ * Currently, we implement case 2 above.
> >>>   *
> >>>   * For the case when _CCA is missing (i.e. cca_seen=0) and
> >>>   * platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
> >>>@@ -415,7 +415,8 @@ static inline bool acpi_check_dma(struct
> >>>acpi_device *adev, bool *coherent)
> >>>   *
> >>>   * See acpi_init_coherency() for more info.
> >>>   */
> >>>-if (adev->flags.coherent_dma) {
> >>>+if (adev->flags.coherent_dma ||
> >>>+(adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
> >>>  ret = true;
> >>>  if (coherent)
> >>>  *coherent = adev->flags.coherent_dma;
> >>
> >>Hi Suravee,
> >>
> >>The acpi_check_dma function has been removed in patch 6 of this patch
> >>set, why it is still be used
> >>here, am I missing something? If the acpi_check_dma will be used in
> >>the future, personally I'd like
> >
> >I think this patch just to let people know that there is
> >case that arch-specific cache maintenance is still needed
> >for ACPI (such as Juno board), and in the later patches will
> >cover this case.
> >
> >acpi_check_dma() will be replaced by acpi_get_dma_attr(),
> >and in acpi_get_dma_attr() will cover that case and will
> >be easily understood. (Suravee, correct me if I'm wrong :) )
> 
> Thanks Hanjun for filling in the info.
> 
> Yes, this is mainly to document the logic changes required by Juno,
> which would be more clear than just merging this change in the later
> patch.
>
Clear.
 
> >>to use IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) while not CONFIG_ARM64
> >>macro here,
> 
> We could have used CONFIG_ACPI_CCA_REQUIRED here, but this will be
> replaced by logic in patch 5, and removed in patch 6 anyways. So, I
> think it is okay. Eventually, the rest of the logic will be using
> CONFIG_ACPI_CCA_REQUIRED.
> 
> or since _CCA attribute
> >>is arch-specific, it's reasonable to leave the _CCA handling policy to
> >>the arch-specific code. For example,
> >>with a link weak function like acpi_arch_check_dma() as a default
> >>handling if no arch-specific code
> >>provided, the actual _CCA handling will be implemented in the ARM,
> >>Intel or other Arch if required.
> >
> >Actually Intel platform don't need _CCA and it's coherent
> >in default, since _CCA is in ACPI spec, I would like it's
> >handled in ACPI core.
> >
> >Thanks
> >Hanjun
> 
> I also agree with Hanjun that the CCA parsing should be handled by
> the ACPI core driver. Since we are using the
> CONFIG_ACPI_CCA_REQUIRED, we should not need to have arch-specific
> code. If the ACPI spec gets more complicate in the future, we can
> revisit this. :)
> 
Hmm, seems I have no objection currently if we only think about intel and
arm arch. Things maybe a little bit complicated if more Archs becomes 
ACPI awareness, if any. Good to see the patch set upstream soon :) Thank you
Suravee and Hanjun.

> Thanks,
> Suravee
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V5 1/9] ACPI: Honor ACPI _CCA attribute setting

2015-11-01 Thread Dennis Chen

On Thu, Oct 29, 2015 at 6:50 AM, Suravee Suthikulpanit 
 wrote:
> From: Jeremy Linton 
> 
> ACPI configurations can now mark devices as noncoherent,
> support that choice.
> 
> NOTE: This is required to support USB on ARM Juno Development Board.
> 
> Signed-off-by: Jeremy Linton 
> Signed-off-by: Suravee Suthikulpanit 
> CC: Bjorn Helgaas 
> CC: Catalin Marinas 
> CC: Rob Herring 
> CC: Will Deacon 
> CC: Rafael J. Wysocki 
> ---
>  include/acpi/acpi_bus.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> index d11eff8..0f131d2 100644
> --- a/include/acpi/acpi_bus.h
> +++ b/include/acpi/acpi_bus.h
> @@ -407,7 +407,7 @@ static inline bool acpi_check_dma(struct acpi_device 
> *adev, bool *coherent)
>* case 1. Do not support and disable DMA.
>* case 2. Support but rely on arch-specific cache maintenance for
>* non-coherence DMA operations.
> -  * Currently, we implement case 1 above.
> +  * Currently, we implement case 2 above.
>*
>* For the case when _CCA is missing (i.e. cca_seen=0) and
>* platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
> @@ -415,7 +415,8 @@ static inline bool acpi_check_dma(struct acpi_device 
> *adev, bool *coherent)
>*
>* See acpi_init_coherency() for more info.
>*/
> - if (adev->flags.coherent_dma) {
> + if (adev->flags.coherent_dma ||
> + (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
>   ret = true;
>   if (coherent)
>   *coherent = adev->flags.coherent_dma;

Hi Suravee,

The acpi_check_dma function has been removed in patch 6 of this patch set, why 
it is still be used
here, am I missing something? If the acpi_check_dma will be used in the future, 
personally I'd like
to use IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) while not CONFIG_ARM64 macro here, 
or since _CCA attribute
is arch-specific, it's reasonable to leave the _CCA handling policy to the 
arch-specific code. For example,
with a link weak function like acpi_arch_check_dma() as a default handling if 
no arch-specific code 
provided, the actual _CCA handling will be implemented in the ARM, Intel or 
other Arch if required.

Thanks,
Dennis

> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V5 1/9] ACPI: Honor ACPI _CCA attribute setting

2015-11-01 Thread Dennis Chen

On Thu, Oct 29, 2015 at 6:50 AM, Suravee Suthikulpanit 
 wrote:
> From: Jeremy Linton 
> 
> ACPI configurations can now mark devices as noncoherent,
> support that choice.
> 
> NOTE: This is required to support USB on ARM Juno Development Board.
> 
> Signed-off-by: Jeremy Linton 
> Signed-off-by: Suravee Suthikulpanit 
> CC: Bjorn Helgaas 
> CC: Catalin Marinas 
> CC: Rob Herring 
> CC: Will Deacon 
> CC: Rafael J. Wysocki 
> ---
>  include/acpi/acpi_bus.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> index d11eff8..0f131d2 100644
> --- a/include/acpi/acpi_bus.h
> +++ b/include/acpi/acpi_bus.h
> @@ -407,7 +407,7 @@ static inline bool acpi_check_dma(struct acpi_device 
> *adev, bool *coherent)
>* case 1. Do not support and disable DMA.
>* case 2. Support but rely on arch-specific cache maintenance for
>* non-coherence DMA operations.
> -  * Currently, we implement case 1 above.
> +  * Currently, we implement case 2 above.
>*
>* For the case when _CCA is missing (i.e. cca_seen=0) and
>* platform specifies ACPI_CCA_REQUIRED, we do not support DMA,
> @@ -415,7 +415,8 @@ static inline bool acpi_check_dma(struct acpi_device 
> *adev, bool *coherent)
>*
>* See acpi_init_coherency() for more info.
>*/
> - if (adev->flags.coherent_dma) {
> + if (adev->flags.coherent_dma ||
> + (adev->flags.cca_seen && IS_ENABLED(CONFIG_ARM64))) {
>   ret = true;
>   if (coherent)
>   *coherent = adev->flags.coherent_dma;

Hi Suravee,

The acpi_check_dma function has been removed in patch 6 of this patch set, why 
it is still be used
here, am I missing something? If the acpi_check_dma will be used in the future, 
personally I'd like
to use IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) while not CONFIG_ARM64 macro here, 
or since _CCA attribute
is arch-specific, it's reasonable to leave the _CCA handling policy to the 
arch-specific code. For example,
with a link weak function like acpi_arch_check_dma() as a default handling if 
no arch-specific code 
provided, the actual _CCA handling will be implemented in the ARM, Intel or 
other Arch if required.

Thanks,
Dennis

> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ACPI / ARM64: Get configuration base address of ECAM via ACPI MCFG table

2015-08-30 Thread Dennis Chen

On Fri, Aug 28, 2015 at 03:39:43PM +0100, Lorenzo Pieralisi wrote:
> Hi Dennis,
> 
> You should CC linux-...@vger.kernel.org and the PCI subsystem
> maintainer next time.
>
>

Good reminder! Thanks, mate ;-)
 
> On Fri, Aug 28, 2015 at 01:49:23PM +0100, Dennis Chen wrote:
> > This patch will fall back to ACPI MCFG table if _CBA method fails
> > to get the configuration base address of ECAM. Firmware on ARM
> > platform uses MCFG table instead of _CBA method. This is needed
> > to scan the PCIe root complex for ARM SoC.
> 
> Code to enumerate PCI with ACPI on ARM64 is under review:
> 
> https://lkml.org/lkml/2015/6/8/443
>

Oops,seems I am late, just go through the code to scan the root:
https://lkml.org/lkml/2015/5/26/215
a little bit complicated, to be honest. Maybe I can have some input then.
 
> Having said that, I do not think this patch will be needed,
> you can't add code in the kernel because it may be needed in
> the future, I do not see how it can be possibly useful at present
> on ARM64 unless you pulled some patch dependencies; if that's the
> case you should have listed them.
> 
> This patch can't be review standalone since it has no use in
> the current kernel (at least for ARM64, it should be tested
> on x86 though).

I do have a patch set to enumerate all the downstream devices of
the PCIe root bridge. With this patch, I can focus on the enabling/fixing
of the drivers of those devices. As you can imagine, the patch have some 
redundant codes with PCI/ACPI codes now under x86 directory. It's reasonable 
to move those arch-agnostic codes to a common place. I am OK to keep
them pending as private as a test code base for me  

As for this patch, it's used to get the ecam address from MCFG instead of
_CBA which x86 is using, see acpi_pci_root_get_mcfg_addr() code.
So, it's for ARM64 and can be tested with uefi definitely. I'm missing some 
context here?


> Thanks,
> Lorenzo
> 
> > Signed-off-by: Dennis Chen 
> > ---
> >  drivers/pci/pci-acpi.c | 102 
> > ++---
> >  1 file changed, 97 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
> > index 314a625..211b9d9 100644
> > --- a/drivers/pci/pci-acpi.c
> > +++ b/drivers/pci/pci-acpi.c
> > @@ -27,18 +27,110 @@ const u8 pci_acpi_dsm_uuid[] = {
> > 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
> >  };
> >  
> > +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg)
> > +{
> > +   if (mcfg->header.revision != 1) {
> > +   pr_err("invalid header revision:%d in ACPI MCFG table\n",
> > +   mcfg->header.revision);
> > +   return -EINVAL;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static phys_addr_t __init acpi_get_mcfg_cba(u16 segment, u8 bus)
> > +{
> > +   struct acpi_table_header *table = NULL;
> > +   acpi_size tbl_size;
> > +   struct acpi_table_mcfg *mcfg = NULL;
> > +   struct acpi_mcfg_allocation *cfg_table, *cfg;
> > +   acpi_status status;
> > +   phys_addr_t cba = 0;
> > +   unsigned long i;
> > +   int entries;
> > +
> > +   if (acpi_disabled)
> > +   return 0;
> > +
> > +   status = acpi_get_table_with_size(ACPI_SIG_MCFG, 0, , _size);
> > +   if (ACPI_FAILURE(status)) {
> > +   const char *msg = acpi_format_exception(status);
> > +
> > +   pr_err("Failed to get MCFG table, %s\n", msg);
> > +   return 0;
> > +   }
> > +
> > +   if (table)
> > +   mcfg = (struct acpi_table_mcfg *)table;
> > +
> > +   entries = 0;
> > +   i = table->length - sizeof(struct acpi_table_mcfg);
> > +   while (i >= sizeof(struct acpi_mcfg_allocation)) {
> > +   entries++;
> > +   i -= sizeof(struct acpi_mcfg_allocation);
> > +   }
> > +   if (entries == 0) {
> > +   pr_err("ACPI MCFG table has no entries\n");
> > +   goto out;
> > +   }
> > +
> > +   cfg_table = (struct acpi_mcfg_allocation *) [1];
> > +   for (i = 0; i < entries; i++) {
> > +   cfg = _table[i];
> > +   if (acpi_mcfg_check_entry(mcfg))
> > +   goto out;
> > +
> > +   if (cfg->pci_segment == segment &&
> > +   cfg->start_bus_number <= bus &&
> > +   bus <= cfg->end_bus_number) {
> > +   cba = (phys_addr_t)cfg->address;
> > +   goto out;
> > +   }

Re: [PATCH] ACPI / ARM64: Get configuration base address of ECAM via ACPI MCFG table

2015-08-30 Thread Dennis Chen

On Fri, Aug 28, 2015 at 03:39:43PM +0100, Lorenzo Pieralisi wrote:
> Hi Dennis,
> 
> You should CC linux-...@vger.kernel.org and the PCI subsystem
> maintainer next time.
>
>

Good reminder! Thanks, mate ;-)
 
> On Fri, Aug 28, 2015 at 01:49:23PM +0100, Dennis Chen wrote:
> > This patch will fall back to ACPI MCFG table if _CBA method fails
> > to get the configuration base address of ECAM. Firmware on ARM
> > platform uses MCFG table instead of _CBA method. This is needed
> > to scan the PCIe root complex for ARM SoC.
> 
> Code to enumerate PCI with ACPI on ARM64 is under review:
> 
> https://lkml.org/lkml/2015/6/8/443
>

Oops,seems I am late, just go through the code to scan the root:
https://lkml.org/lkml/2015/5/26/215
a little bit complicated, to be honest. Maybe I can have some input then.
 
> Having said that, I do not think this patch will be needed,
> you can't add code in the kernel because it may be needed in
> the future, I do not see how it can be possibly useful at present
> on ARM64 unless you pulled some patch dependencies; if that's the
> case you should have listed them.
> 
> This patch can't be review standalone since it has no use in
> the current kernel (at least for ARM64, it should be tested
> on x86 though).

I do have a patch set to enumerate all the downstream devices of
the PCIe root bridge. With this patch, I can focus on the enabling/fixing
of the drivers of those devices. As you can imagine, the patch have some 
redundant codes with PCI/ACPI codes now under x86 directory. It's reasonable 
to move those arch-agnostic codes to a common place. I am OK to keep
them pending as private as a test code base for me  

As for this patch, it's used to get the ecam address from MCFG instead of
_CBA which x86 is using, see acpi_pci_root_get_mcfg_addr() code.
So, it's for ARM64 and can be tested with uefi definitely. I'm missing some 
context here?


> Thanks,
> Lorenzo
> 
> > Signed-off-by: Dennis Chen <dennis.c...@arm.com>
> > ---
> >  drivers/pci/pci-acpi.c | 102 
> > ++---
> >  1 file changed, 97 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
> > index 314a625..211b9d9 100644
> > --- a/drivers/pci/pci-acpi.c
> > +++ b/drivers/pci/pci-acpi.c
> > @@ -27,18 +27,110 @@ const u8 pci_acpi_dsm_uuid[] = {
> > 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
> >  };
> >  
> > +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg)
> > +{
> > +   if (mcfg->header.revision != 1) {
> > +   pr_err("invalid header revision:%d in ACPI MCFG table\n",
> > +   mcfg->header.revision);
> > +   return -EINVAL;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static phys_addr_t __init acpi_get_mcfg_cba(u16 segment, u8 bus)
> > +{
> > +   struct acpi_table_header *table = NULL;
> > +   acpi_size tbl_size;
> > +   struct acpi_table_mcfg *mcfg = NULL;
> > +   struct acpi_mcfg_allocation *cfg_table, *cfg;
> > +   acpi_status status;
> > +   phys_addr_t cba = 0;
> > +   unsigned long i;
> > +   int entries;
> > +
> > +   if (acpi_disabled)
> > +   return 0;
> > +
> > +   status = acpi_get_table_with_size(ACPI_SIG_MCFG, 0, , _size);
> > +   if (ACPI_FAILURE(status)) {
> > +   const char *msg = acpi_format_exception(status);
> > +
> > +   pr_err("Failed to get MCFG table, %s\n", msg);
> > +   return 0;
> > +   }
> > +
> > +   if (table)
> > +   mcfg = (struct acpi_table_mcfg *)table;
> > +
> > +   entries = 0;
> > +   i = table->length - sizeof(struct acpi_table_mcfg);
> > +   while (i >= sizeof(struct acpi_mcfg_allocation)) {
> > +   entries++;
> > +   i -= sizeof(struct acpi_mcfg_allocation);
> > +   }
> > +   if (entries == 0) {
> > +   pr_err("ACPI MCFG table has no entries\n");
> > +   goto out;
> > +   }
> > +
> > +   cfg_table = (struct acpi_mcfg_allocation *) [1];
> > +   for (i = 0; i < entries; i++) {
> > +   cfg = _table[i];
> > +   if (acpi_mcfg_check_entry(mcfg))
> > +   goto out;
> > +
> > +   if (cfg->pci_segment == segment &&
> > +   cfg->start_bus_number <= bus &&
> > +   bus <= cfg->end_bus_number) {
> > +   cba = (phys_addr_t)cfg->address;
> > +   goto out;
> > +

[PATCH] ACPI / ARM64: Get configuration base address of ECAM via ACPI MCFG table

2015-08-28 Thread Dennis Chen

This patch will fall back to ACPI MCFG table if _CBA method fails
to get the configuration base address of ECAM. Firmware on ARM
platform uses MCFG table instead of _CBA method. This is needed
to scan the PCIe root complex for ARM SoC.

Signed-off-by: Dennis Chen 
---
 drivers/pci/pci-acpi.c | 102 ++---
 1 file changed, 97 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 314a625..211b9d9 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -27,18 +27,110 @@ const u8 pci_acpi_dsm_uuid[] = {
0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
 };
 
+static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg)
+{
+   if (mcfg->header.revision != 1) {
+   pr_err("invalid header revision:%d in ACPI MCFG table\n",
+   mcfg->header.revision);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static phys_addr_t __init acpi_get_mcfg_cba(u16 segment, u8 bus)
+{
+   struct acpi_table_header *table = NULL;
+   acpi_size tbl_size;
+   struct acpi_table_mcfg *mcfg = NULL;
+   struct acpi_mcfg_allocation *cfg_table, *cfg;
+   acpi_status status;
+   phys_addr_t cba = 0;
+   unsigned long i;
+   int entries;
+
+   if (acpi_disabled)
+   return 0;
+
+   status = acpi_get_table_with_size(ACPI_SIG_MCFG, 0, , _size);
+   if (ACPI_FAILURE(status)) {
+   const char *msg = acpi_format_exception(status);
+
+   pr_err("Failed to get MCFG table, %s\n", msg);
+   return 0;
+   }
+
+   if (table)
+   mcfg = (struct acpi_table_mcfg *)table;
+
+   entries = 0;
+   i = table->length - sizeof(struct acpi_table_mcfg);
+   while (i >= sizeof(struct acpi_mcfg_allocation)) {
+   entries++;
+   i -= sizeof(struct acpi_mcfg_allocation);
+   }
+   if (entries == 0) {
+   pr_err("ACPI MCFG table has no entries\n");
+   goto out;
+   }
+
+   cfg_table = (struct acpi_mcfg_allocation *) [1];
+   for (i = 0; i < entries; i++) {
+   cfg = _table[i];
+   if (acpi_mcfg_check_entry(mcfg))
+   goto out;
+
+   if (cfg->pci_segment == segment &&
+   cfg->start_bus_number <= bus &&
+   bus <= cfg->end_bus_number) {
+   cba = (phys_addr_t)cfg->address;
+   goto out;
+   }
+   }
+out:
+   /*
+* acpi_get_table_with_size() creates MCFG table mapping that
+* should be released after parsing and before resuming boot
+*/
+   early_acpi_os_unmap_memory(table, tbl_size);
+   return cba;
+}
+
 phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
 {
acpi_status status = AE_NOT_EXIST;
-   unsigned long long mcfg_addr;
+   unsigned long long mcfg_addr, mcfg_seg, mcfg_bus;
+   u16 seg;
+   u8 bus;
+
+   if (!handle)
+   return 0;
 
-   if (handle)
-   status = acpi_evaluate_integer(handle, METHOD_NAME__CBA,
+   status = acpi_evaluate_integer(handle, METHOD_NAME__CBA,
   NULL, _addr);
+   if (ACPI_SUCCESS(status))
+   return (phys_addr_t)mcfg_addr;
+
+   pr_info("ACPI: Falling back to acpi mcfg table\n");
+
+   /*
+* Fall back to MCFG table if _CBA failed:
+* get the mcfg_addr via ACPI MCFG table. PCI Firmware v3.2 spec: 4.1.2
+*/
+   status = acpi_evaluate_integer(handle, METHOD_NAME__SEG,
+   NULL, _seg);
if (ACPI_FAILURE(status))
-   return 0;
+   mcfg_seg = 0;
+
+   status = acpi_evaluate_integer(handle, METHOD_NAME__BBN,
+   NULL, _bus);
+   if (ACPI_FAILURE(status))
+   mcfg_bus = 0;
+
+   seg = mcfg_seg & 0x;
+   bus = mcfg_bus & 0xFF;
 
-   return (phys_addr_t)mcfg_addr;
+   return acpi_get_mcfg_cba(seg, bus);
 }
 
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ACPI / ARM64: Get configuration base address of ECAM via ACPI MCFG table

2015-08-28 Thread Dennis Chen

This patch will fall back to ACPI MCFG table if _CBA method fails
to get the configuration base address of ECAM. Firmware on ARM
platform uses MCFG table instead of _CBA method. This is needed
to scan the PCIe root complex for ARM SoC.

Signed-off-by: Dennis Chen dennis.c...@arm.com
---
 drivers/pci/pci-acpi.c | 102 ++---
 1 file changed, 97 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 314a625..211b9d9 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -27,18 +27,110 @@ const u8 pci_acpi_dsm_uuid[] = {
0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d
 };
 
+static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg)
+{
+   if (mcfg-header.revision != 1) {
+   pr_err(invalid header revision:%d in ACPI MCFG table\n,
+   mcfg-header.revision);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static phys_addr_t __init acpi_get_mcfg_cba(u16 segment, u8 bus)
+{
+   struct acpi_table_header *table = NULL;
+   acpi_size tbl_size;
+   struct acpi_table_mcfg *mcfg = NULL;
+   struct acpi_mcfg_allocation *cfg_table, *cfg;
+   acpi_status status;
+   phys_addr_t cba = 0;
+   unsigned long i;
+   int entries;
+
+   if (acpi_disabled)
+   return 0;
+
+   status = acpi_get_table_with_size(ACPI_SIG_MCFG, 0, table, tbl_size);
+   if (ACPI_FAILURE(status)) {
+   const char *msg = acpi_format_exception(status);
+
+   pr_err(Failed to get MCFG table, %s\n, msg);
+   return 0;
+   }
+
+   if (table)
+   mcfg = (struct acpi_table_mcfg *)table;
+
+   entries = 0;
+   i = table-length - sizeof(struct acpi_table_mcfg);
+   while (i = sizeof(struct acpi_mcfg_allocation)) {
+   entries++;
+   i -= sizeof(struct acpi_mcfg_allocation);
+   }
+   if (entries == 0) {
+   pr_err(ACPI MCFG table has no entries\n);
+   goto out;
+   }
+
+   cfg_table = (struct acpi_mcfg_allocation *) mcfg[1];
+   for (i = 0; i  entries; i++) {
+   cfg = cfg_table[i];
+   if (acpi_mcfg_check_entry(mcfg))
+   goto out;
+
+   if (cfg-pci_segment == segment 
+   cfg-start_bus_number = bus 
+   bus = cfg-end_bus_number) {
+   cba = (phys_addr_t)cfg-address;
+   goto out;
+   }
+   }
+out:
+   /*
+* acpi_get_table_with_size() creates MCFG table mapping that
+* should be released after parsing and before resuming boot
+*/
+   early_acpi_os_unmap_memory(table, tbl_size);
+   return cba;
+}
+
 phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
 {
acpi_status status = AE_NOT_EXIST;
-   unsigned long long mcfg_addr;
+   unsigned long long mcfg_addr, mcfg_seg, mcfg_bus;
+   u16 seg;
+   u8 bus;
+
+   if (!handle)
+   return 0;
 
-   if (handle)
-   status = acpi_evaluate_integer(handle, METHOD_NAME__CBA,
+   status = acpi_evaluate_integer(handle, METHOD_NAME__CBA,
   NULL, mcfg_addr);
+   if (ACPI_SUCCESS(status))
+   return (phys_addr_t)mcfg_addr;
+
+   pr_info(ACPI: Falling back to acpi mcfg table\n);
+
+   /*
+* Fall back to MCFG table if _CBA failed:
+* get the mcfg_addr via ACPI MCFG table. PCI Firmware v3.2 spec: 4.1.2
+*/
+   status = acpi_evaluate_integer(handle, METHOD_NAME__SEG,
+   NULL, mcfg_seg);
if (ACPI_FAILURE(status))
-   return 0;
+   mcfg_seg = 0;
+
+   status = acpi_evaluate_integer(handle, METHOD_NAME__BBN,
+   NULL, mcfg_bus);
+   if (ACPI_FAILURE(status))
+   mcfg_bus = 0;
+
+   seg = mcfg_seg  0x;
+   bus = mcfg_bus  0xFF;
 
-   return (phys_addr_t)mcfg_addr;
+   return acpi_get_mcfg_cba(seg, bus);
 }
 
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-11 Thread Dennis Chen

> Could you please send me the output of 'lsusb -v -d 045e:07be' and 
> 'lsusb -v -
> d 045e:07bf' (running as root if possible) ?


Bus 001 Device 004: ID 045e:07bf Microsoft Corp. 
Device Descriptor:
  bLength18
  bDescriptorType 1
  bcdUSB   2.00
  bDeviceClass  239 Miscellaneous Device
  bDeviceSubClass 2 ?
  bDeviceProtocol 1 Interface Association
  bMaxPacketSize064
  idVendor   0x045e Microsoft Corp.
  idProduct  0x07bf 
  bcdDevice   21.52
  iManufacturer   1 QCM
  iProduct2 Microsoft LifeCam Rear
  iSerial 0 
  bNumConfigurations  1
  Configuration Descriptor:
bLength 9
bDescriptorType 2
wTotalLength 1982
bNumInterfaces  3
bConfigurationValue 1
iConfiguration  2 Microsoft LifeCam Rear
bmAttributes 0x80
  (Bus Powered)
MaxPower  250mA
Interface Association:
  bLength 8
  bDescriptorType11
  bFirstInterface 0
  bInterfaceCount 3
  bFunctionClass 14 Video
  bFunctionSubClass   3 Video Interface Collection
  bFunctionProtocol   0 
  iFunction   2 Microsoft LifeCam Rear
Interface Descriptor:
  bLength 9
  bDescriptorType 4
  bInterfaceNumber0
  bAlternateSetting   0
  bNumEndpoints   1
  bInterfaceClass14 Video
  bInterfaceSubClass  1 Video Control
  bInterfaceProtocol  1 
  iInterface  2 Microsoft LifeCam Rear
  VideoControl Interface Descriptor:
bLength14
bDescriptorType36
bDescriptorSubtype  1 (HEADER)
bcdUVC   1.50
wTotalLength  105
dwClockFrequency  150.00MHz
bInCollection   2
baInterfaceNr( 0)   1
baInterfaceNr( 1)   2
  VideoControl Interface Descriptor:
bLength18
bDescriptorType36
bDescriptorSubtype  2 (INPUT_TERMINAL)
bTerminalID 1
wTerminalType  0x0201 Camera Sensor
bAssocTerminal  0
iTerminal   0 
wObjectiveFocalLengthMin  0
wObjectiveFocalLengthMax  0
wOcularFocalLength0
bControlSize  3
bmControls   0x0a0e
  Auto-Exposure Mode
  Auto-Exposure Priority
  Exposure Time (Absolute)
  Zoom (Absolute)
  PanTilt (Absolute)
  VideoControl Interface Descriptor:
bLength13
bDescriptorType36
bDescriptorSubtype  5 (PROCESSING_UNIT)
bUnitID 2
bSourceID   1
wMaxMultiplier  0
bControlSize3
bmControls 0x155b
  Brightness
  Contrast
  Saturation
  Sharpness
  White Balance Temperature
  Backlight Compensation
  Power Line Frequency
  White Balance Temperature, Auto
iProcessing 0 
bmVideoStandards 0x 0
  VideoControl Interface Descriptor:
bLength29
bDescriptorType36
bDescriptorSubtype  6 (EXTENSION_UNIT)
bUnitID 4
guidExtensionCode {29a787c9-d359-6945-8467-ff0849fc19e8}
bNumControl22
bNrPins 1
baSourceID( 0)  2
bControlSize4
bmControls( 0)   0xbf
bmControls( 1)   0xfb
bmControls( 2)   0xff
bmControls( 3)   0x00
iExtension  0 
  VideoControl Interface Descriptor:
bLength13
bDescriptorType36
bDescriptorSubtype  7 (unknown)
Invalid desc subtype: 0b 04 00 03 cd 3e 00 cd 3e 00
  VideoControl Interface Descriptor:
bLength 9
bDescriptorType36
bDescriptorSubtype  3 (OUTPUT_TERMINAL)
bTerminalID 8
wTerminalType  0x0101 USB Streaming
bAssocTerminal  0
bSourceID   4
iTerminal   0 
  VideoControl Interface Descriptor:
bLength 9
bDescriptorType36
bDescriptorSubtype  3 (OUTPUT_TERMINAL)
bTerminalID 9
wTerminalType  0x0101 USB Streaming
bAssocTerminal  0
bSourceID  11
iTerminal   0 
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x81  EP 1 IN
bmAttributes3

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-11 Thread Dennis Chen

 Could you please send me the output of 'lsusb -v -d 045e:07be' and 
 'lsusb -v -
 d 045e:07bf' (running as root if possible) ?


Bus 001 Device 004: ID 045e:07bf Microsoft Corp. 
Device Descriptor:
  bLength18
  bDescriptorType 1
  bcdUSB   2.00
  bDeviceClass  239 Miscellaneous Device
  bDeviceSubClass 2 ?
  bDeviceProtocol 1 Interface Association
  bMaxPacketSize064
  idVendor   0x045e Microsoft Corp.
  idProduct  0x07bf 
  bcdDevice   21.52
  iManufacturer   1 QCM
  iProduct2 Microsoft LifeCam Rear
  iSerial 0 
  bNumConfigurations  1
  Configuration Descriptor:
bLength 9
bDescriptorType 2
wTotalLength 1982
bNumInterfaces  3
bConfigurationValue 1
iConfiguration  2 Microsoft LifeCam Rear
bmAttributes 0x80
  (Bus Powered)
MaxPower  250mA
Interface Association:
  bLength 8
  bDescriptorType11
  bFirstInterface 0
  bInterfaceCount 3
  bFunctionClass 14 Video
  bFunctionSubClass   3 Video Interface Collection
  bFunctionProtocol   0 
  iFunction   2 Microsoft LifeCam Rear
Interface Descriptor:
  bLength 9
  bDescriptorType 4
  bInterfaceNumber0
  bAlternateSetting   0
  bNumEndpoints   1
  bInterfaceClass14 Video
  bInterfaceSubClass  1 Video Control
  bInterfaceProtocol  1 
  iInterface  2 Microsoft LifeCam Rear
  VideoControl Interface Descriptor:
bLength14
bDescriptorType36
bDescriptorSubtype  1 (HEADER)
bcdUVC   1.50
wTotalLength  105
dwClockFrequency  150.00MHz
bInCollection   2
baInterfaceNr( 0)   1
baInterfaceNr( 1)   2
  VideoControl Interface Descriptor:
bLength18
bDescriptorType36
bDescriptorSubtype  2 (INPUT_TERMINAL)
bTerminalID 1
wTerminalType  0x0201 Camera Sensor
bAssocTerminal  0
iTerminal   0 
wObjectiveFocalLengthMin  0
wObjectiveFocalLengthMax  0
wOcularFocalLength0
bControlSize  3
bmControls   0x0a0e
  Auto-Exposure Mode
  Auto-Exposure Priority
  Exposure Time (Absolute)
  Zoom (Absolute)
  PanTilt (Absolute)
  VideoControl Interface Descriptor:
bLength13
bDescriptorType36
bDescriptorSubtype  5 (PROCESSING_UNIT)
bUnitID 2
bSourceID   1
wMaxMultiplier  0
bControlSize3
bmControls 0x155b
  Brightness
  Contrast
  Saturation
  Sharpness
  White Balance Temperature
  Backlight Compensation
  Power Line Frequency
  White Balance Temperature, Auto
iProcessing 0 
bmVideoStandards 0x 0
  VideoControl Interface Descriptor:
bLength29
bDescriptorType36
bDescriptorSubtype  6 (EXTENSION_UNIT)
bUnitID 4
guidExtensionCode {29a787c9-d359-6945-8467-ff0849fc19e8}
bNumControl22
bNrPins 1
baSourceID( 0)  2
bControlSize4
bmControls( 0)   0xbf
bmControls( 1)   0xfb
bmControls( 2)   0xff
bmControls( 3)   0x00
iExtension  0 
  VideoControl Interface Descriptor:
bLength13
bDescriptorType36
bDescriptorSubtype  7 (unknown)
Invalid desc subtype: 0b 04 00 03 cd 3e 00 cd 3e 00
  VideoControl Interface Descriptor:
bLength 9
bDescriptorType36
bDescriptorSubtype  3 (OUTPUT_TERMINAL)
bTerminalID 8
wTerminalType  0x0101 USB Streaming
bAssocTerminal  0
bSourceID   4
iTerminal   0 
  VideoControl Interface Descriptor:
bLength 9
bDescriptorType36
bDescriptorSubtype  3 (OUTPUT_TERMINAL)
bTerminalID 9
wTerminalType  0x0101 USB Streaming
bAssocTerminal  0
bSourceID  11
iTerminal   0 
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x81  EP 1 IN
bmAttributes3

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen

> Is this needed ? Looking at the patch your cameras are UVC-compliant 
> and 
> should thus be picked by the uvcvideo driver without any change to 
> the code.

The cameras are UVC-compliant but are not recognized by the uvc driver.
The patch forces the uvc driver to pick up the camera if present.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen

Add support for the Microsoft Surface Pro 3 Cameras.

Signed-off-by: Dennis Chen 
---
 drivers/media/usb/uvc/uvc_driver.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/media/usb/uvc/uvc_driver.c 
b/drivers/media/usb/uvc/uvc_driver.c
index 5970dd6..ec5a407 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -2538,6 +2538,22 @@ static struct usb_device_id uvc_ids[] = {
  .bInterfaceSubClass   = 1,
  .bInterfaceProtocol   = 0,
  .driver_info  = UVC_QUIRK_FORCE_Y8 },
+   /*Microsoft Surface Pro 3 Front Camera*/
+   { .match_flags  = USB_DEVICE_ID_MATCH_DEVICE
+   | USB_DEVICE_ID_MATCH_INT_INFO,
+ .idVendor = 0x045e,
+ .idProduct= 0x07be,
+ .bInterfaceClass  = USB_CLASS_VIDEO,
+ .bInterfaceSubClass   = 1,
+ .bInterfaceProtocol   = 1 },
+   /* Microsoft Surface Pro 3 Rear */
+   { .match_flags  = USB_DEVICE_ID_MATCH_DEVICE
+   | USB_DEVICE_ID_MATCH_INT_INFO,
+ .idVendor = 0x045e,
+ .idProduct= 0x07bf,
+ .bInterfaceClass  = USB_CLASS_VIDEO,
+ .bInterfaceSubClass   = 1,
+ .bInterfaceProtocol   = 1 },
/* Generic USB Video Class */
{ USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, 0) },
{}
-- 
2.4.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen

Add support for the Microsoft Surface Pro 3 Cameras.

Signed-off-by: Dennis Chen barracks...@gmail.com
---
 drivers/media/usb/uvc/uvc_driver.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/media/usb/uvc/uvc_driver.c 
b/drivers/media/usb/uvc/uvc_driver.c
index 5970dd6..ec5a407 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -2538,6 +2538,22 @@ static struct usb_device_id uvc_ids[] = {
  .bInterfaceSubClass   = 1,
  .bInterfaceProtocol   = 0,
  .driver_info  = UVC_QUIRK_FORCE_Y8 },
+   /*Microsoft Surface Pro 3 Front Camera*/
+   { .match_flags  = USB_DEVICE_ID_MATCH_DEVICE
+   | USB_DEVICE_ID_MATCH_INT_INFO,
+ .idVendor = 0x045e,
+ .idProduct= 0x07be,
+ .bInterfaceClass  = USB_CLASS_VIDEO,
+ .bInterfaceSubClass   = 1,
+ .bInterfaceProtocol   = 1 },
+   /* Microsoft Surface Pro 3 Rear */
+   { .match_flags  = USB_DEVICE_ID_MATCH_DEVICE
+   | USB_DEVICE_ID_MATCH_INT_INFO,
+ .idVendor = 0x045e,
+ .idProduct= 0x07bf,
+ .bInterfaceClass  = USB_CLASS_VIDEO,
+ .bInterfaceSubClass   = 1,
+ .bInterfaceProtocol   = 1 },
/* Generic USB Video Class */
{ USB_INTERFACE_INFO(USB_CLASS_VIDEO, 1, 0) },
{}
-- 
2.4.2
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] USB: uvc: add support for the Microsoft Surface Pro 3 Cameras

2015-06-09 Thread Dennis Chen

 Is this needed ? Looking at the patch your cameras are UVC-compliant 
 and 
 should thus be picked by the uvcvideo driver without any change to 
 the code.

The cameras are UVC-compliant but are not recognized by the uvc driver.
The patch forces the uvc driver to pick up the camera if present.

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] Staging: comedi: daqboard2000: fixed whitespace coding style issue

2015-06-05 Thread Dennis Chen

>From 843d038eec5ac2c59d3138f19ae52828098c7d50 Mon Sep 17 00:00:00 2001
From: Dennis Chen 
Date: Fri, 5 Jun 2015 15:42:37 -0700
Subject: [PATCH] Staging: comedi: daqboard2000: fixed whitespace coding style
 issue

Fixed whitespace coding style issue.

Signed-off-by: Dennis Chen 
---
 drivers/staging/comedi/drivers/daqboard2000.c | 112 +-
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/staging/comedi/drivers/daqboard2000.c 
b/drivers/staging/comedi/drivers/daqboard2000.c
index f97d18d..b2c6bc6 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -32,72 +32,72 @@ http://www.comedi.org in the comedi_nonfree_firmware 
tarball.
 Configuration options: not applicable, uses PCI auto config
 */
 /*
-   This card was obviously never intended to leave the Windows world,
-   since it lacked all kind of hardware documentation (except for cable
-   pinouts, plug and pray has something to catch up with yet).
-
-   With some help from our swedish distributor, we got the Windows sourcecode
-   for the card, and here are the findings so far.
-
-   1. A good document that describes the PCI interface chip is 9080db-106.pdf
-  available from http://www.plxtech.com/products/io/pci9080 
-
-   2. The initialization done so far is:
-a. program the FPGA (windows code sans a lot of error messages)
-   b.
-
-   3. Analog out seems to work OK with DAC's disabled, if DAC's are enabled,
-  you have to output values to all enabled DAC's until result appears, I
-  guess that it has something to do with pacer clocks, but the source
-  gives me no clues. I'll keep it simple so far.
-
-   4. Analog in.
-Each channel in the scanlist seems to be controlled by four
-   control words:
-
-Word0:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-Word1:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   This card was obviously never intended to leave the Windows world,
+   since it lacked all kind of hardware documentation (except for cable
+   pinouts, plug and pray has something to catch up with yet).
+
+   With some help from our swedish distributor, we got the Windows
+   sourcecode for the card, and here are the findings so far.
+
+   1. A good document that describes the PCI interface chip is
+   9080db-106.pdf available from http://www.plxtech.com/products/io/pci9080
+
+   2. The initialization done so far is:
+   a. program the FPGA (windows code sans a lot of error messages)
+   b.
+
+   3. Analog out seems to work OK with DAC's disabled, if DAC's are
+   enabled, you have to output values to all enabled DAC's until result
+   appears, I guess that it has something to do with pacer clocks, but the
+   source gives me no clues. I'll keep it simple so far.
+
+   4. Analog in.
+   Each channel in the scanlist seems to be controlled by four
+   control words:
+
+   Word0:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Word1:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   | |   | | | | |
-   +--+--+   | | | | +-- Digital input (??)
+  +--+--+   | | | | +-- Digital input (??)
  |  | | | + 10 us settling time
  |  | | +-- Suspend acquisition (last to scan)
  |  | + Simultaneous sample and hold
  |  +-- Signed data format
  +- Correction offset low
 
-Word2:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   | | | | | | | | | |
-   +-+ +--+--+ +++ +++ +--+--+
-  |   | |   | +- Expansion channel
+   Word2:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  | | | | | | | | | |
+  +-+ +--+--+ +++ +++ +--+--+
+ |   | |   | +- Expansion channel
  |   | |   +--- Expansion gain
-  |   | +--- Channel (low)
+ |   | +--- Channel (low)
  |   +- Correction offset high
  +- Correction gain low
-Wo

[PATCH] Staging: comedi: daqboard2000: fixed whitespace coding style issue

2015-06-05 Thread Dennis Chen

From 843d038eec5ac2c59d3138f19ae52828098c7d50 Mon Sep 17 00:00:00 2001
From: Dennis Chen barracks...@gmail.com
Date: Fri, 5 Jun 2015 15:42:37 -0700
Subject: [PATCH] Staging: comedi: daqboard2000: fixed whitespace coding style
 issue

Fixed whitespace coding style issue.

Signed-off-by: Dennis Chen barracks...@gmail.com
---
 drivers/staging/comedi/drivers/daqboard2000.c | 112 +-
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/staging/comedi/drivers/daqboard2000.c 
b/drivers/staging/comedi/drivers/daqboard2000.c
index f97d18d..b2c6bc6 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -32,72 +32,72 @@ http://www.comedi.org in the comedi_nonfree_firmware 
tarball.
 Configuration options: not applicable, uses PCI auto config
 */
 /*
-   This card was obviously never intended to leave the Windows world,
-   since it lacked all kind of hardware documentation (except for cable
-   pinouts, plug and pray has something to catch up with yet).
-
-   With some help from our swedish distributor, we got the Windows sourcecode
-   for the card, and here are the findings so far.
-
-   1. A good document that describes the PCI interface chip is 9080db-106.pdf
-  available from http://www.plxtech.com/products/io/pci9080 
-
-   2. The initialization done so far is:
-a. program the FPGA (windows code sans a lot of error messages)
-   b.
-
-   3. Analog out seems to work OK with DAC's disabled, if DAC's are enabled,
-  you have to output values to all enabled DAC's until result appears, I
-  guess that it has something to do with pacer clocks, but the source
-  gives me no clues. I'll keep it simple so far.
-
-   4. Analog in.
-Each channel in the scanlist seems to be controlled by four
-   control words:
-
-Word0:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-Word1:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+   This card was obviously never intended to leave the Windows world,
+   since it lacked all kind of hardware documentation (except for cable
+   pinouts, plug and pray has something to catch up with yet).
+
+   With some help from our swedish distributor, we got the Windows
+   sourcecode for the card, and here are the findings so far.
+
+   1. A good document that describes the PCI interface chip is
+   9080db-106.pdf available from http://www.plxtech.com/products/io/pci9080
+
+   2. The initialization done so far is:
+   a. program the FPGA (windows code sans a lot of error messages)
+   b.
+
+   3. Analog out seems to work OK with DAC's disabled, if DAC's are
+   enabled, you have to output values to all enabled DAC's until result
+   appears, I guess that it has something to do with pacer clocks, but the
+   source gives me no clues. I'll keep it simple so far.
+
+   4. Analog in.
+   Each channel in the scanlist seems to be controlled by four
+   control words:
+
+   Word0:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+   Word1:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   | |   | | | | |
-   +--+--+   | | | | +-- Digital input (??)
+  +--+--+   | | | | +-- Digital input (??)
  |  | | | + 10 us settling time
  |  | | +-- Suspend acquisition (last to scan)
  |  | + Simultaneous sample and hold
  |  +-- Signed data format
  +- Correction offset low
 
-Word2:
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-  ! | | | ! | | | ! | | | ! | | | !
-  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   | | | | | | | | | |
-   +-+ +--+--+ +++ +++ +--+--+
-  |   | |   | +- Expansion channel
+   Word2:
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ! | | | ! | | | ! | | | ! | | | !
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  | | | | | | | | | |
+  +-+ +--+--+ +++ +++ +--+--+
+ |   | |   | +- Expansion channel
  |   | |   +--- Expansion gain
-  |   | +--- Channel (low)
+ |   | +--- Channel (low)
  |   +- Correction offset high
  +- Correction gain low

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-25 Thread Dennis Chen

On Mon, Aug 25, 2014 at 10:04 PM, Gleb Natapov  wrote:
> On Mon, Aug 25, 2014 at 11:16:34AM +0800, Dennis Chen wrote:
>> On Sun, Aug 24, 2014 at 5:38 PM, Gleb Natapov  wrote:
>> > On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>> >> This patch is used to construct the eptp in vmx mode with values
>> >> readed from MSR according to the intel x86 software developer's
>> >> manual.
>> >>
>> >>  static u64 construct_eptp(unsigned long root_hpa)
>> >>  {
>> >> -u64 eptp;
>> >> +u64 eptp, pwl;
>> >> +
>> >> +if (cpu_has_vmx_ept_4levels())
>> >> +pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>> >> +else {
>> >> +WARN(1, "Unsupported page-walk length of 4.\n");
>> > Page-walk length of 4 is the only one that is supported.
>> >
>> Since there is a bit 6 in IA32_VMX_EPT_VPID_CAP MSR indicating the
>> support for the page-walk length, I think sanity check is necessary.
>> But I just checked the code, it's already done in the hardware_setup()
>> function which will disable ept feature if the page-wake length is not
>> 4. Gleb, any comments for the memory type check part?
> Looks fine, but are there CPUs out there that do not support WB for eptp? 
> Since
> there was no bug reports about it I assume no.

Hmm, currently I can't find a x86 processor that don't support WB for
eptp, also there is no relevant bug reported.
I just read the intel SDM 24.6.11: SW should read the VMX capability
MSR_IA32_VMX_EPT_VPID_CAP to determine
what EPT MT are supported. But looks like this is not a big concern in
the community, so let's go back this thread if
we encounter one unfornately in the future. Thanks for the comments.

>
> --
> Gleb.



-- 
Den
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-25 Thread Dennis Chen

On Mon, Aug 25, 2014 at 10:04 PM, Gleb Natapov g...@kernel.org wrote:
 On Mon, Aug 25, 2014 at 11:16:34AM +0800, Dennis Chen wrote:
 On Sun, Aug 24, 2014 at 5:38 PM, Gleb Natapov g...@kernel.org wrote:
  On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
  This patch is used to construct the eptp in vmx mode with values
  readed from MSR according to the intel x86 software developer's
  manual.
 
   static u64 construct_eptp(unsigned long root_hpa)
   {
  -u64 eptp;
  +u64 eptp, pwl;
  +
  +if (cpu_has_vmx_ept_4levels())
  +pwl = VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
  +else {
  +WARN(1, Unsupported page-walk length of 4.\n);
  Page-walk length of 4 is the only one that is supported.
 
 Since there is a bit 6 in IA32_VMX_EPT_VPID_CAP MSR indicating the
 support for the page-walk length, I think sanity check is necessary.
 But I just checked the code, it's already done in the hardware_setup()
 function which will disable ept feature if the page-wake length is not
 4. Gleb, any comments for the memory type check part?
 Looks fine, but are there CPUs out there that do not support WB for eptp? 
 Since
 there was no bug reports about it I assume no.

Hmm, currently I can't find a x86 processor that don't support WB for
eptp, also there is no relevant bug reported.
I just read the intel SDM 24.6.11: SW should read the VMX capability
MSR_IA32_VMX_EPT_VPID_CAP to determine
what EPT MT are supported. But looks like this is not a big concern in
the community, so let's go back this thread if
we encounter one unfornately in the future. Thanks for the comments.


 --
 Gleb.



-- 
Den
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Mon, Aug 25, 2014 at 7:14 AM, Wanpeng Li  wrote:
> Please Cc kvm ml.

You've done that for me, thanks. The page-walk length sanity check has
been done in the hardware_setup() function, so it's not necessary in
this patch, but I still think it does make sense for the memory type
check, any comments, guys?

> On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>>This patch is used to construct the eptp in vmx mode with values
>>readed from MSR according to the intel x86 software developer's
>>manual.
>>
>>Signed-off-by: Dennis Chen 
>>---
>> arch/x86/include/asm/vmx.h |1 +
>> arch/x86/kvm/vmx.c |   21 +
>> 2 files changed, 18 insertions(+), 4 deletions(-)
>>
>>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>index bcbfade..bf82a77 100644
>>--- a/arch/x86/include/asm/vmx.h
>>+++ b/arch/x86/include/asm/vmx.h
>>@@ -417,6 +417,7 @@ enum vmcs_field {
>> #define VMX_EPT_GAW_EPTP_SHIFT3
>> #define VMX_EPT_AD_ENABLE_BIT(1ull << 6)
>> #define VMX_EPT_DEFAULT_MT0x6ull
>>+#define VMX_EPT_UC_MT0x0ull
>> #define VMX_EPT_READABLE_MASK0x1ull
>> #define VMX_EPT_WRITABLE_MASK0x2ull
>> #define VMX_EPT_EXECUTABLE_MASK0x4ull
>>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>index bfe11cf..7add5ce 100644
>>--- a/arch/x86/kvm/vmx.c
>>+++ b/arch/x86/kvm/vmx.c
>>@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
>>unsigned long cr0)
>>
>> static u64 construct_eptp(unsigned long root_hpa)
>> {
>>-u64 eptp;
>>+u64 eptp, pwl;
>>+
>>+if (cpu_has_vmx_ept_4levels())
>>+pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>>+else {
>>+WARN(1, "Unsupported page-walk length of 4.\n");
>>+BUG();
>>+}
>>+
>>+if (cpu_has_vmx_eptp_writeback())
>>+eptp = VMX_EPT_DEFAULT_MT | pwl;
>>+else if (cpu_has_vmx_eptp_uncacheable())
>>+eptp = VMX_EPT_UC_MT | pwl;
>>+else {
>>+WARN(1, "Unsupported memory type config in vmx eptp.\n");
>>+BUG();
>>+}
>>
>>-/* TODO write the value reading from MSR */
>>-eptp = VMX_EPT_DEFAULT_MT |
>>-VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>> if (enable_ept_ad_bits)
>> eptp |= VMX_EPT_AD_ENABLE_BIT;
>> eptp |= (root_hpa & PAGE_MASK);
>>--
>>1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Sun, Aug 24, 2014 at 5:38 PM, Gleb Natapov  wrote:
> On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>> This patch is used to construct the eptp in vmx mode with values
>> readed from MSR according to the intel x86 software developer's
>> manual.
>>
>>  static u64 construct_eptp(unsigned long root_hpa)
>>  {
>> -u64 eptp;
>> +u64 eptp, pwl;
>> +
>> +if (cpu_has_vmx_ept_4levels())
>> +pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>> +else {
>> +WARN(1, "Unsupported page-walk length of 4.\n");
> Page-walk length of 4 is the only one that is supported.
>
Since there is a bit 6 in IA32_VMX_EPT_VPID_CAP MSR indicating the
support for the page-walk length, I think sanity check is necessary.
But I just checked the code, it's already done in the hardware_setup()
function which will disable ept feature if the page-wake length is not
4. Gleb, any comments for the memory type check part?

>> +BUG();
>> +}
>> +
>> +if (cpu_has_vmx_eptp_writeback())
>> +eptp = VMX_EPT_DEFAULT_MT | pwl;
>> +else if (cpu_has_vmx_eptp_uncacheable())
>> +eptp = VMX_EPT_UC_MT | pwl;
>> +else {
>> +WARN(1, "Unsupported memory type config in vmx eptp.\n");
>> +BUG();
>> +}
>>
>> -/* TODO write the value reading from MSR */
>> -eptp = VMX_EPT_DEFAULT_MT |
>> -VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>>  if (enable_ept_ad_bits)
>>  eptp |= VMX_EPT_AD_ENABLE_BIT;
>>  eptp |= (root_hpa & PAGE_MASK);
>> --
>> 1.7.9.5
>
> --
> Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Sun, Aug 24, 2014 at 5:38 PM, Gleb Natapov g...@kernel.org wrote:
 On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
 This patch is used to construct the eptp in vmx mode with values
 readed from MSR according to the intel x86 software developer's
 manual.

  static u64 construct_eptp(unsigned long root_hpa)
  {
 -u64 eptp;
 +u64 eptp, pwl;
 +
 +if (cpu_has_vmx_ept_4levels())
 +pwl = VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
 +else {
 +WARN(1, Unsupported page-walk length of 4.\n);
 Page-walk length of 4 is the only one that is supported.

Since there is a bit 6 in IA32_VMX_EPT_VPID_CAP MSR indicating the
support for the page-walk length, I think sanity check is necessary.
But I just checked the code, it's already done in the hardware_setup()
function which will disable ept feature if the page-wake length is not
4. Gleb, any comments for the memory type check part?

 +BUG();
 +}
 +
 +if (cpu_has_vmx_eptp_writeback())
 +eptp = VMX_EPT_DEFAULT_MT | pwl;
 +else if (cpu_has_vmx_eptp_uncacheable())
 +eptp = VMX_EPT_UC_MT | pwl;
 +else {
 +WARN(1, Unsupported memory type config in vmx eptp.\n);
 +BUG();
 +}

 -/* TODO write the value reading from MSR */
 -eptp = VMX_EPT_DEFAULT_MT |
 -VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
  if (enable_ept_ad_bits)
  eptp |= VMX_EPT_AD_ENABLE_BIT;
  eptp |= (root_hpa  PAGE_MASK);
 --
 1.7.9.5

 --
 Gleb.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Mon, Aug 25, 2014 at 7:14 AM, Wanpeng Li wanpeng...@linux.intel.com wrote:
 Please Cc kvm ml.

You've done that for me, thanks. The page-walk length sanity check has
been done in the hardware_setup() function, so it's not necessary in
this patch, but I still think it does make sense for the memory type
check, any comments, guys?

 On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
This patch is used to construct the eptp in vmx mode with values
readed from MSR according to the intel x86 software developer's
manual.

Signed-off-by: Dennis Chen kernel.org@gmail.com
---
 arch/x86/include/asm/vmx.h |1 +
 arch/x86/kvm/vmx.c |   21 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..bf82a77 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -417,6 +417,7 @@ enum vmcs_field {
 #define VMX_EPT_GAW_EPTP_SHIFT3
 #define VMX_EPT_AD_ENABLE_BIT(1ull  6)
 #define VMX_EPT_DEFAULT_MT0x6ull
+#define VMX_EPT_UC_MT0x0ull
 #define VMX_EPT_READABLE_MASK0x1ull
 #define VMX_EPT_WRITABLE_MASK0x2ull
 #define VMX_EPT_EXECUTABLE_MASK0x4ull
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf..7add5ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
unsigned long cr0)

 static u64 construct_eptp(unsigned long root_hpa)
 {
-u64 eptp;
+u64 eptp, pwl;
+
+if (cpu_has_vmx_ept_4levels())
+pwl = VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
+else {
+WARN(1, Unsupported page-walk length of 4.\n);
+BUG();
+}
+
+if (cpu_has_vmx_eptp_writeback())
+eptp = VMX_EPT_DEFAULT_MT | pwl;
+else if (cpu_has_vmx_eptp_uncacheable())
+eptp = VMX_EPT_UC_MT | pwl;
+else {
+WARN(1, Unsupported memory type config in vmx eptp.\n);
+BUG();
+}

-/* TODO write the value reading from MSR */
-eptp = VMX_EPT_DEFAULT_MT |
-VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
 if (enable_ept_ad_bits)
 eptp |= VMX_EPT_AD_ENABLE_BIT;
 eptp |= (root_hpa  PAGE_MASK);
--
1.7.9.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-23 Thread Dennis Chen

CC'ing Avi  and Paolo...

Avi/Paolo, any comments?--Den

On Sun, Aug 24, 2014 at 11:54 AM, Dennis Chen  wrote:
> This patch is used to construct the eptp in vmx mode with values
> readed from MSR according to the intel x86 software developer's
> manual.
>
> Signed-off-by: Dennis Chen 
> ---
>  arch/x86/include/asm/vmx.h |1 +
>  arch/x86/kvm/vmx.c |   21 +
>  2 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index bcbfade..bf82a77 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -417,6 +417,7 @@ enum vmcs_field {
>  #define VMX_EPT_GAW_EPTP_SHIFT3
>  #define VMX_EPT_AD_ENABLE_BIT(1ull << 6)
>  #define VMX_EPT_DEFAULT_MT0x6ull
> +#define VMX_EPT_UC_MT0x0ull
>  #define VMX_EPT_READABLE_MASK0x1ull
>  #define VMX_EPT_WRITABLE_MASK0x2ull
>  #define VMX_EPT_EXECUTABLE_MASK0x4ull
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index bfe11cf..7add5ce 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
> unsigned long cr0)
>
>  static u64 construct_eptp(unsigned long root_hpa)
>  {
> -u64 eptp;
> +u64 eptp, pwl;
> +
> +if (cpu_has_vmx_ept_4levels())
> +pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
> +else {
> +WARN(1, "Unsupported page-walk length of 4.\n");
> +BUG();
> +}
> +
> +if (cpu_has_vmx_eptp_writeback())
> +eptp = VMX_EPT_DEFAULT_MT | pwl;
> +else if (cpu_has_vmx_eptp_uncacheable())
> +eptp = VMX_EPT_UC_MT | pwl;
> +else {
> +WARN(1, "Unsupported memory type config in vmx eptp.\n");
> +BUG();
> +}
>
> -/* TODO write the value reading from MSR */
> -eptp = VMX_EPT_DEFAULT_MT |
> -VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>  if (enable_ept_ad_bits)
>  eptp |= VMX_EPT_AD_ENABLE_BIT;
>  eptp |= (root_hpa & PAGE_MASK);
> --
> 1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-23 Thread Dennis Chen

This patch is used to construct the eptp in vmx mode with values
readed from MSR according to the intel x86 software developer's
manual.

Signed-off-by: Dennis Chen 
---
 arch/x86/include/asm/vmx.h |1 +
 arch/x86/kvm/vmx.c |   21 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..bf82a77 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -417,6 +417,7 @@ enum vmcs_field {
 #define VMX_EPT_GAW_EPTP_SHIFT3
 #define VMX_EPT_AD_ENABLE_BIT(1ull << 6)
 #define VMX_EPT_DEFAULT_MT0x6ull
+#define VMX_EPT_UC_MT0x0ull
 #define VMX_EPT_READABLE_MASK0x1ull
 #define VMX_EPT_WRITABLE_MASK0x2ull
 #define VMX_EPT_EXECUTABLE_MASK0x4ull
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf..7add5ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
unsigned long cr0)

 static u64 construct_eptp(unsigned long root_hpa)
 {
-u64 eptp;
+u64 eptp, pwl;
+
+if (cpu_has_vmx_ept_4levels())
+pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
+else {
+WARN(1, "Unsupported page-walk length of 4.\n");
+BUG();
+}
+
+if (cpu_has_vmx_eptp_writeback())
+eptp = VMX_EPT_DEFAULT_MT | pwl;
+else if (cpu_has_vmx_eptp_uncacheable())
+eptp = VMX_EPT_UC_MT | pwl;
+else {
+WARN(1, "Unsupported memory type config in vmx eptp.\n");
+BUG();
+}

-/* TODO write the value reading from MSR */
-eptp = VMX_EPT_DEFAULT_MT |
-VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
 if (enable_ept_ad_bits)
 eptp |= VMX_EPT_AD_ENABLE_BIT;
 eptp |= (root_hpa & PAGE_MASK);
-- 
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-23 Thread Dennis Chen

This patch is used to construct the eptp in vmx mode with values
readed from MSR according to the intel x86 software developer's
manual.

Signed-off-by: Dennis Chen kernel.org@gmail.com
---
 arch/x86/include/asm/vmx.h |1 +
 arch/x86/kvm/vmx.c |   21 +
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..bf82a77 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -417,6 +417,7 @@ enum vmcs_field {
 #define VMX_EPT_GAW_EPTP_SHIFT3
 #define VMX_EPT_AD_ENABLE_BIT(1ull  6)
 #define VMX_EPT_DEFAULT_MT0x6ull
+#define VMX_EPT_UC_MT0x0ull
 #define VMX_EPT_READABLE_MASK0x1ull
 #define VMX_EPT_WRITABLE_MASK0x2ull
 #define VMX_EPT_EXECUTABLE_MASK0x4ull
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf..7add5ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
unsigned long cr0)

 static u64 construct_eptp(unsigned long root_hpa)
 {
-u64 eptp;
+u64 eptp, pwl;
+
+if (cpu_has_vmx_ept_4levels())
+pwl = VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
+else {
+WARN(1, Unsupported page-walk length of 4.\n);
+BUG();
+}
+
+if (cpu_has_vmx_eptp_writeback())
+eptp = VMX_EPT_DEFAULT_MT | pwl;
+else if (cpu_has_vmx_eptp_uncacheable())
+eptp = VMX_EPT_UC_MT | pwl;
+else {
+WARN(1, Unsupported memory type config in vmx eptp.\n);
+BUG();
+}

-/* TODO write the value reading from MSR */
-eptp = VMX_EPT_DEFAULT_MT |
-VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
 if (enable_ept_ad_bits)
 eptp |= VMX_EPT_AD_ENABLE_BIT;
 eptp |= (root_hpa  PAGE_MASK);
-- 
1.7.9.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-23 Thread Dennis Chen

CC'ing Avi  and Paolo...

Avi/Paolo, any comments?--Den

On Sun, Aug 24, 2014 at 11:54 AM, Dennis Chen kernel.org@gmail.com wrote:
 This patch is used to construct the eptp in vmx mode with values
 readed from MSR according to the intel x86 software developer's
 manual.

 Signed-off-by: Dennis Chen kernel.org@gmail.com
 ---
  arch/x86/include/asm/vmx.h |1 +
  arch/x86/kvm/vmx.c |   21 +
  2 files changed, 18 insertions(+), 4 deletions(-)

 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
 index bcbfade..bf82a77 100644
 --- a/arch/x86/include/asm/vmx.h
 +++ b/arch/x86/include/asm/vmx.h
 @@ -417,6 +417,7 @@ enum vmcs_field {
  #define VMX_EPT_GAW_EPTP_SHIFT3
  #define VMX_EPT_AD_ENABLE_BIT(1ull  6)
  #define VMX_EPT_DEFAULT_MT0x6ull
 +#define VMX_EPT_UC_MT0x0ull
  #define VMX_EPT_READABLE_MASK0x1ull
  #define VMX_EPT_WRITABLE_MASK0x2ull
  #define VMX_EPT_EXECUTABLE_MASK0x4ull
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index bfe11cf..7add5ce 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
 unsigned long cr0)

  static u64 construct_eptp(unsigned long root_hpa)
  {
 -u64 eptp;
 +u64 eptp, pwl;
 +
 +if (cpu_has_vmx_ept_4levels())
 +pwl = VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
 +else {
 +WARN(1, Unsupported page-walk length of 4.\n);
 +BUG();
 +}
 +
 +if (cpu_has_vmx_eptp_writeback())
 +eptp = VMX_EPT_DEFAULT_MT | pwl;
 +else if (cpu_has_vmx_eptp_uncacheable())
 +eptp = VMX_EPT_UC_MT | pwl;
 +else {
 +WARN(1, Unsupported memory type config in vmx eptp.\n);
 +BUG();
 +}

 -/* TODO write the value reading from MSR */
 -eptp = VMX_EPT_DEFAULT_MT |
 -VMX_EPT_DEFAULT_GAW  VMX_EPT_GAW_EPTP_SHIFT;
  if (enable_ept_ad_bits)
  eptp |= VMX_EPT_AD_ENABLE_BIT;
  eptp |= (root_hpa  PAGE_MASK);
 --
 1.7.9.5
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] race condition fixing in sysfs_create_dir

2013-07-30 Thread Dennis Chen


On 07/26/2013 09:38 PM, Tejun Heo wrote:

Hello,

On Fri, Jul 26, 2013 at 05:59:00PM +0800, Dennis Chen wrote:

On 07/26/2013 05:49 PM, Dennis Chen wrote:


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:
 PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
  sysfs_create_dir() { sysfs_remove_dir() {
  ...  ...
  if (kobj->parent)
spin_lock(_assoc_lock);
 parent_sd = kobj->parent->sd;  <- kobj->sd = NULL;
  else 
spin_unlock(_assoc_lock);
 parent_sd = _root;
Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj->sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...

I don't think sysfs is supposed to handle multiple actors trying to
populate and destroy the directory at the same time at all, so this
seems kinda moot.  Do you have a case where this actually matters?

Thanks.


hello,Tejun. Nice. But seems I still have different opinion :). If you look at 
the 'sysfs_do_create_link_sd()'
code, you will find a comment "target->sd can go away beneath us but is 
protected with sysfs_assoc_lock.
Fetch target_sd from it", don't you think the sysfs_create_dir is the same as 
the sysfs_do_create_link_sd()
essentially? if the answer is yes meaning the parent dir can go away when its 
sub-dir is creating by sysfs_create_dir,
then the similar action should be taken as sysfs_create_link does. right?

 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] race condition fixing in sysfs_create_dir

2013-07-30 Thread Dennis Chen


On 07/26/2013 09:38 PM, Tejun Heo wrote:

Hello,

On Fri, Jul 26, 2013 at 05:59:00PM +0800, Dennis Chen wrote:

On 07/26/2013 05:49 PM, Dennis Chen wrote:


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:
 PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
  sysfs_create_dir() { sysfs_remove_dir() {
  ...  ...
  if (kobj-parent)
spin_lock(sysfs_assoc_lock);
 parent_sd = kobj-parent-sd;  - kobj-sd = NULL;
  else 
spin_unlock(sysfs_assoc_lock);
 parent_sd = sysfs_root;
Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj-sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...

I don't think sysfs is supposed to handle multiple actors trying to
populate and destroy the directory at the same time at all, so this
seems kinda moot.  Do you have a case where this actually matters?

Thanks.


hello,Tejun. Nice. But seems I still have different opinion :). If you look at 
the 'sysfs_do_create_link_sd()'
code, you will find a comment target-sd can go away beneath us but is 
protected with sysfs_assoc_lock.
Fetch target_sd from it, don't you think the sysfs_create_dir is the same as 
the sysfs_do_create_link_sd()
essentially? if the answer is yes meaning the parent dir can go away when its 
sub-dir is creating by sysfs_create_dir,
then the similar action should be taken as sysfs_create_link does. right?

 


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] race condition fixing in sysfs_create_dir

2013-07-26 Thread Dennis Chen


On 07/26/2013 05:49 PM, Dennis Chen wrote:


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:
 PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
  sysfs_create_dir() { sysfs_remove_dir() {
  ...  ...
  if (kobj->parent)
spin_lock(_assoc_lock);
 parent_sd = kobj->parent->sd;  <- kobj->sd = NULL;
  else 
spin_unlock(_assoc_lock);
 parent_sd = _root;
Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj->sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...
Signed-off-by: Dennis Chen 
Cc: Greg Kroah-Hartman 
Cc: Tejun Heo 


should be Tejun Heo 


Cc: Wang Cong 
---
fs/sysfs/dir.c |   11 ++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..114073d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -746,13 +746,22 @@ int sysfs_create_dir(struct kobject * kobj)
 BUG_ON(!kobj);
+   spin_lock(_assoc_lock);
 if (kobj->parent)
 parent_sd = kobj->parent->sd;
 else
 parent_sd = _root;
-   if (!parent_sd)
+   if (!parent_sd) {
+   spin_unlock(_assoc_lock);
 return -ENOENT;
+   }
+   spin_unlock(_assoc_lock);
+   /* TODO: although the sysfs is in a slow path, but in the operation
+   * followed, we still have a window to let the sysfs_remove_dir to
+   * free the memory space pointered by parent_sd till we inc its ref
+   * count in __sysfs_add_one()
+   */
 if (sysfs_ns_type(parent_sd))
 ns = kobj->ktype->namespace(kobj);


Re CC Tejun whose email addess <@suse.de> is obsolete :)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] race condition fixing in sysfs_create_dir

2013-07-26 Thread Dennis Chen


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:

PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
 sysfs_create_dir() { sysfs_remove_dir() {
 ...  ...
 if (kobj->parent)
spin_lock(_assoc_lock);
parent_sd = kobj->parent->sd;  <- kobj->sd = NULL;
 else 
spin_unlock(_assoc_lock);
parent_sd = _root;

Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj->sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...

Signed-off-by: Dennis Chen 
Cc: Greg Kroah-Hartman 
Cc: Tejun Heo 
Cc: Wang Cong 
---
fs/sysfs/dir.c |   11 ++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..114073d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -746,13 +746,22 @@ int sysfs_create_dir(struct kobject * kobj)
BUG_ON(!kobj);

+   spin_lock(_assoc_lock);
if (kobj->parent)
parent_sd = kobj->parent->sd;
else
parent_sd = _root;

-   if (!parent_sd)
+   if (!parent_sd) {
+   spin_unlock(_assoc_lock);
return -ENOENT;
+   }
+   spin_unlock(_assoc_lock);
+   /* TODO: although the sysfs is in a slow path, but in the operation
+   * followed, we still have a window to let the sysfs_remove_dir to
+   * free the memory space pointered by parent_sd till we inc its ref
+   * count in __sysfs_add_one()
+   */

if (sysfs_ns_type(parent_sd))
ns = kobj->ktype->namespace(kobj);

--

1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH]Fix kernel NULL function pointer dereference in vfs_kern_mount()

2013-07-26 Thread Dennis Chen


On 07/23/2013 04:12 PM, Dennis Chen wrote:


This patch is try to fix a potential NULL function pointer dereference in the 
exported
vfs_kern_mount() function which can be triggered by following kernel module 
code piece:
static struct vfsmount *npfs_mnt;
static struct file_system_type np_fs_type = {
 .name= "npfs",
 .kill_sb  = npfs_kill_sb,
 .fs_flags = FS_USERNS_MOUNT,
};
static int fsmod_init(void)
{
 int err = -ENOMEM;
 err = register_filesystem(_fs_type);
 if (!err) {
 npfs_mnt = kern_mount(_fs_type);
 if (IS_ERR(npfs_mnt)) {
 printk(KERN_ERR "npfs: could not mount!\n");
 unregister_filesystem(_fs_type);
 return PTR_ERR(npfs_mnt);
 }
 }
 return err;
}
I happened to forget to implement a (*mount)() function in the np_fs_type 
instance which
is buggy definitely, but as an exported function to the outside, kernel should 
not suppose
the caller will always take the right action. In this scenario, I have to 
reboot my machine
to clean the loaded kernel module.
Signed-off-by: Dennis Chen 
Cc: Alexander Viro 
---
fs/filesystems.c |5 +
fs/namespace.c   |2 ++
fs/super.c   |   10 ++
3 files changed, 17 insertions(+)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 92567d9..6d6b162 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -71,6 +71,11 @@ int register_filesystem(struct file_system_type * fs)
 int res = 0;
 struct file_system_type ** p;
  
+   if (!fs)

+   return -ENODEV;
+   if (!fs->name)
+   return -EINVAL;
+
 BUG_ON(strchr(fs->name, '.'));
 if (fs->next)
 return -EBUSY;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..ad17692 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2758,6 +2758,8 @@ void put_mnt_ns(struct mnt_namespace *ns)
  struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
  {
 struct vfsmount *mnt;
+   if (!type)
+   return ERR_PTR(-ENODEV);
 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
 if (!IS_ERR(mnt)) {
 /*
diff --git a/fs/super.c b/fs/super.c
index 68307c0..d995a19 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1101,11 +1101,21 @@ mount_fs(struct file_system_type *type, int flags, 
const char *name, void *data)
 goto out_free_secdata;
 }
  
+   if (!type->mount) {

+   error = -EINVAL;
+   printk(KERN_ERR "No 'mount' method defined in %s\n", 
type->name);
+   goto out_free_secdata;
+   }
 root = type->mount(type, flags, name, data);
 if (IS_ERR(root)) {
 error = PTR_ERR(root);
 goto out_free_secdata;
 }
+   if (!root) {
+   error = -EINVAL;
+   goto out_free_secdata;
+   }
+
 sb = root->d_sb;
 BUG_ON(!sb);
 WARN_ON(!sb->s_bdi);


Any feedback for this patch? If we don't need it meaning the kernel module 
developer
should take the responsibility to make sure his/her codes will not crash 
something,
but in my opinion, as exported kernel function, it should do its best to avoid 
put the
kernel in a unstable state, while not transfer the responsibility to the user, 
right?

den

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH]Fix kernel NULL function pointer dereference in vfs_kern_mount()

2013-07-26 Thread Dennis Chen


On 07/23/2013 04:12 PM, Dennis Chen wrote:


This patch is try to fix a potential NULL function pointer dereference in the 
exported
vfs_kern_mount() function which can be triggered by following kernel module 
code piece:
static struct vfsmount *npfs_mnt;
static struct file_system_type np_fs_type = {
 .name= npfs,
 .kill_sb  = npfs_kill_sb,
 .fs_flags = FS_USERNS_MOUNT,
};
static int fsmod_init(void)
{
 int err = -ENOMEM;
 err = register_filesystem(np_fs_type);
 if (!err) {
 npfs_mnt = kern_mount(np_fs_type);
 if (IS_ERR(npfs_mnt)) {
 printk(KERN_ERR npfs: could not mount!\n);
 unregister_filesystem(np_fs_type);
 return PTR_ERR(npfs_mnt);
 }
 }
 return err;
}
I happened to forget to implement a (*mount)() function in the np_fs_type 
instance which
is buggy definitely, but as an exported function to the outside, kernel should 
not suppose
the caller will always take the right action. In this scenario, I have to 
reboot my machine
to clean the loaded kernel module.
Signed-off-by: Dennis Chen xsc...@tnsoft.com.cn
Cc: Alexander Viro v...@zeniv.linux.org.uk
---
fs/filesystems.c |5 +
fs/namespace.c   |2 ++
fs/super.c   |   10 ++
3 files changed, 17 insertions(+)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 92567d9..6d6b162 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -71,6 +71,11 @@ int register_filesystem(struct file_system_type * fs)
 int res = 0;
 struct file_system_type ** p;
  
+   if (!fs)

+   return -ENODEV;
+   if (!fs-name)
+   return -EINVAL;
+
 BUG_ON(strchr(fs-name, '.'));
 if (fs-next)
 return -EBUSY;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..ad17692 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2758,6 +2758,8 @@ void put_mnt_ns(struct mnt_namespace *ns)
  struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
  {
 struct vfsmount *mnt;
+   if (!type)
+   return ERR_PTR(-ENODEV);
 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type-name, data);
 if (!IS_ERR(mnt)) {
 /*
diff --git a/fs/super.c b/fs/super.c
index 68307c0..d995a19 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1101,11 +1101,21 @@ mount_fs(struct file_system_type *type, int flags, 
const char *name, void *data)
 goto out_free_secdata;
 }
  
+   if (!type-mount) {

+   error = -EINVAL;
+   printk(KERN_ERR No 'mount' method defined in %s\n, 
type-name);
+   goto out_free_secdata;
+   }
 root = type-mount(type, flags, name, data);
 if (IS_ERR(root)) {
 error = PTR_ERR(root);
 goto out_free_secdata;
 }
+   if (!root) {
+   error = -EINVAL;
+   goto out_free_secdata;
+   }
+
 sb = root-d_sb;
 BUG_ON(!sb);
 WARN_ON(!sb-s_bdi);


Any feedback for this patch? If we don't need it meaning the kernel module 
developer
should take the responsibility to make sure his/her codes will not crash 
something,
but in my opinion, as exported kernel function, it should do its best to avoid 
put the
kernel in a unstable state, while not transfer the responsibility to the user, 
right?

den

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] race condition fixing in sysfs_create_dir

2013-07-26 Thread Dennis Chen


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:

PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
 sysfs_create_dir() { sysfs_remove_dir() {
 ...  ...
 if (kobj-parent)
spin_lock(sysfs_assoc_lock);
parent_sd = kobj-parent-sd;  - kobj-sd = NULL;
 else 
spin_unlock(sysfs_assoc_lock);
parent_sd = sysfs_root;

Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj-sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...

Signed-off-by: Dennis Chen xsc...@tnsoft.com.cn
Cc: Greg Kroah-Hartman gre...@linuxfoundation.org
Cc: Tejun Heo te...@suse.de
Cc: Wang Cong xiyou.wangc...@gmail.com
---
fs/sysfs/dir.c |   11 ++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..114073d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -746,13 +746,22 @@ int sysfs_create_dir(struct kobject * kobj)
BUG_ON(!kobj);

+   spin_lock(sysfs_assoc_lock);
if (kobj-parent)
parent_sd = kobj-parent-sd;
else
parent_sd = sysfs_root;

-   if (!parent_sd)
+   if (!parent_sd) {
+   spin_unlock(sysfs_assoc_lock);
return -ENOENT;
+   }
+   spin_unlock(sysfs_assoc_lock);
+   /* TODO: although the sysfs is in a slow path, but in the operation
+   * followed, we still have a window to let the sysfs_remove_dir to
+   * free the memory space pointered by parent_sd till we inc its ref
+   * count in __sysfs_add_one()
+   */

if (sysfs_ns_type(parent_sd))
ns = kobj-ktype-namespace(kobj);

--

1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] race condition fixing in sysfs_create_dir

2013-07-26 Thread Dennis Chen


On 07/26/2013 05:49 PM, Dennis Chen wrote:


The patch is trying its best to avoid creating a dir under a parent dir which 
is removing from
the system:
 PATH0 (create a dir under 'PARENT/...') PATH1 (remove the 
'PARENT/...')
  sysfs_create_dir() { sysfs_remove_dir() {
  ...  ...
  if (kobj-parent)
spin_lock(sysfs_assoc_lock);
 parent_sd = kobj-parent-sd;  - kobj-sd = NULL;
  else 
spin_unlock(sysfs_assoc_lock);
 parent_sd = sysfs_root;
Suppose PATH1 enter the critical section first, then PATH0 begin to execute before 
kobj-sd
has been reset to NULL, possibly PATH0 will get a non-NULL parent_sd since lack 
of the
sysfs_assoc_lock protection in PATH0. In this case, PATH0 think it has a valid 
parent_sd which
can be freed by PATH1 in the followed, refer to the comments in the patch. 
Maybe we need
to figure out a perfect solution to solve the race condition, although the 
codes in question are
in slow path...
Signed-off-by: Dennis Chen xsc...@tnsoft.com.cn
Cc: Greg Kroah-Hartman gre...@linuxfoundation.org
Cc: Tejun Heo te...@suse.de


should be Tejun Heo t...@kernel.org


Cc: Wang Cong xiyou.wangc...@gmail.com
---
fs/sysfs/dir.c |   11 ++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e068e74..114073d 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -746,13 +746,22 @@ int sysfs_create_dir(struct kobject * kobj)
 BUG_ON(!kobj);
+   spin_lock(sysfs_assoc_lock);
 if (kobj-parent)
 parent_sd = kobj-parent-sd;
 else
 parent_sd = sysfs_root;
-   if (!parent_sd)
+   if (!parent_sd) {
+   spin_unlock(sysfs_assoc_lock);
 return -ENOENT;
+   }
+   spin_unlock(sysfs_assoc_lock);
+   /* TODO: although the sysfs is in a slow path, but in the operation
+   * followed, we still have a window to let the sysfs_remove_dir to
+   * free the memory space pointered by parent_sd till we inc its ref
+   * count in __sysfs_add_one()
+   */
 if (sysfs_ns_type(parent_sd))
 ns = kobj-ktype-namespace(kobj);


Re CC Tejun whose email addess @suse.de is obsolete :)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH]Fix kernel NULL function pointer dereference in vfs_kern_mount()

2013-07-23 Thread Dennis Chen


This patch is try to fix a potential NULL function pointer dereference in the 
exported
vfs_kern_mount() function which can be triggered by following kernel module 
code piece:

static struct vfsmount *npfs_mnt;
static struct file_system_type np_fs_type = {
.name   = "npfs",
.kill_sb= npfs_kill_sb,
.fs_flags   = FS_USERNS_MOUNT,
};

static int fsmod_init(void)
{
int err = -ENOMEM;

err = register_filesystem(_fs_type);
if (!err) {
npfs_mnt = kern_mount(_fs_type);
if (IS_ERR(npfs_mnt)) {
printk(KERN_ERR "npfs: could not mount!\n");
unregister_filesystem(_fs_type);
return PTR_ERR(npfs_mnt);
}
}

return err;
}

I happened to forget to implement a (*mount)() function in the np_fs_type 
instance which
is buggy definitely, but as an exported function to the outside, kernel should 
not suppose
the caller will always take the right action. In this scenario, I have to 
reboot my machine
to clean the loaded kernel module.

Signed-off-by: Dennis Chen 
Cc: Alexander Viro 
---
fs/filesystems.c |5 +
fs/namespace.c   |2 ++
fs/super.c   |   10 ++
3 files changed, 17 insertions(+)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 92567d9..6d6b162 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -71,6 +71,11 @@ int register_filesystem(struct file_system_type * fs)
int res = 0;
struct file_system_type ** p;
 
+   if (!fs)

+   return -ENODEV;
+   if (!fs->name)
+   return -EINVAL;
+
BUG_ON(strchr(fs->name, '.'));
if (fs->next)
return -EBUSY;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..ad17692 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2758,6 +2758,8 @@ void put_mnt_ns(struct mnt_namespace *ns)
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
struct vfsmount *mnt;
+   if (!type)
+   return ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
if (!IS_ERR(mnt)) {
/*
diff --git a/fs/super.c b/fs/super.c
index 68307c0..d995a19 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1101,11 +1101,21 @@ mount_fs(struct file_system_type *type, int flags, 
const char *name, void *data)
goto out_free_secdata;
}
 
+   if (!type->mount) {

+   error = -EINVAL;
+   printk(KERN_ERR "No 'mount' method defined in %s\n", 
type->name);
+   goto out_free_secdata;
+   }
root = type->mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
}
+   if (!root) {
+   error = -EINVAL;
+   goto out_free_secdata;
+   }
+
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH]Fix kernel NULL function pointer dereference in vfs_kern_mount()

2013-07-23 Thread Dennis Chen


This patch is try to fix a potential NULL function pointer dereference in the 
exported
vfs_kern_mount() function which can be triggered by following kernel module 
code piece:

static struct vfsmount *npfs_mnt;
static struct file_system_type np_fs_type = {
.name   = npfs,
.kill_sb= npfs_kill_sb,
.fs_flags   = FS_USERNS_MOUNT,
};

static int fsmod_init(void)
{
int err = -ENOMEM;

err = register_filesystem(np_fs_type);
if (!err) {
npfs_mnt = kern_mount(np_fs_type);
if (IS_ERR(npfs_mnt)) {
printk(KERN_ERR npfs: could not mount!\n);
unregister_filesystem(np_fs_type);
return PTR_ERR(npfs_mnt);
}
}

return err;
}

I happened to forget to implement a (*mount)() function in the np_fs_type 
instance which
is buggy definitely, but as an exported function to the outside, kernel should 
not suppose
the caller will always take the right action. In this scenario, I have to 
reboot my machine
to clean the loaded kernel module.

Signed-off-by: Dennis Chen xsc...@tnsoft.com.cn
Cc: Alexander Viro v...@zeniv.linux.org.uk
---
fs/filesystems.c |5 +
fs/namespace.c   |2 ++
fs/super.c   |   10 ++
3 files changed, 17 insertions(+)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 92567d9..6d6b162 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -71,6 +71,11 @@ int register_filesystem(struct file_system_type * fs)
int res = 0;
struct file_system_type ** p;
 
+   if (!fs)

+   return -ENODEV;
+   if (!fs-name)
+   return -EINVAL;
+
BUG_ON(strchr(fs-name, '.'));
if (fs-next)
return -EBUSY;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..ad17692 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2758,6 +2758,8 @@ void put_mnt_ns(struct mnt_namespace *ns)
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
struct vfsmount *mnt;
+   if (!type)
+   return ERR_PTR(-ENODEV);
mnt = vfs_kern_mount(type, MS_KERNMOUNT, type-name, data);
if (!IS_ERR(mnt)) {
/*
diff --git a/fs/super.c b/fs/super.c
index 68307c0..d995a19 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1101,11 +1101,21 @@ mount_fs(struct file_system_type *type, int flags, 
const char *name, void *data)
goto out_free_secdata;
}
 
+   if (!type-mount) {

+   error = -EINVAL;
+   printk(KERN_ERR No 'mount' method defined in %s\n, 
type-name);
+   goto out_free_secdata;
+   }
root = type-mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
}
+   if (!root) {
+   error = -EINVAL;
+   goto out_free_secdata;
+   }
+
sb = root-d_sb;
BUG_ON(!sb);
WARN_ON(!sb-s_bdi);

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: CPU utilization between physical CPU and virtual CPU in KVM

2012-10-15 Thread Dennis Chen

Any body can be help about this or  a little bit clues?  Thanks!

On Mon, Oct 8, 2012 at 3:01 PM, Dennis Chen  wrote:
> Hi All,
>
> I am confused by the following observed scenario:
>
> In my 4-CPU (KVM supported, 2 core with 2 thread for each) host
> machine box, I create only one VM with 3-vCPU through virsh/libvirt
> tools and also I pin this VM process to the physical processor 3. I
> guess the CPU utilization for the processor 3 will not exceed 100%,
> then I create 3 process (dead loop-- while(1);) and bind each of them
> to vCPU[0-2] respectively, through the "top -c" command in VM
> environment, I can see the CPU utilization for each of the vCPU is
> about 100%, but interesting, I found that the CPU utilization of
> processor 3 in the host machine is about 300% with "toc -c" command.
> why does a single process bound to a CPU can get ~300% cpu bandwidth
> in this case, does the kernel scheduler dispatch the idle cycle
> capacity of the CPUs to the virtual CPU of the VM, other word, the
> scheduler knows the vCPU info in the VM process?
>
> For the same case, if I create another 4 new dead-loop processes and
> bind them to the physical CPU[0-3] equally, then I find the vCPU0/1 in
> VM will not be 100%, eg. 32%,  (I think the scheduler in the guest OS
> doesn't know it's running in a virtual environment, so the utilization
> of the vCPU will not change to adapt to the physical processor
> utilization, but it did, why?
>
> -org-gnu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: CPU utilization between physical CPU and virtual CPU in KVM

2012-10-15 Thread Dennis Chen

Any body can be help about this or  a little bit clues?  Thanks!

On Mon, Oct 8, 2012 at 3:01 PM, Dennis Chen kernel.org@gmail.com wrote:
 Hi All,

 I am confused by the following observed scenario:

 In my 4-CPU (KVM supported, 2 core with 2 thread for each) host
 machine box, I create only one VM with 3-vCPU through virsh/libvirt
 tools and also I pin this VM process to the physical processor 3. I
 guess the CPU utilization for the processor 3 will not exceed 100%,
 then I create 3 process (dead loop-- while(1);) and bind each of them
 to vCPU[0-2] respectively, through the top -c command in VM
 environment, I can see the CPU utilization for each of the vCPU is
 about 100%, but interesting, I found that the CPU utilization of
 processor 3 in the host machine is about 300% with toc -c command.
 why does a single process bound to a CPU can get ~300% cpu bandwidth
 in this case, does the kernel scheduler dispatch the idle cycle
 capacity of the CPUs to the virtual CPU of the VM, other word, the
 scheduler knows the vCPU info in the VM process?

 For the same case, if I create another 4 new dead-loop processes and
 bind them to the physical CPU[0-3] equally, then I find the vCPU0/1 in
 VM will not be 100%, eg. 32%,  (I think the scheduler in the guest OS
 doesn't know it's running in a virtual environment, so the utilization
 of the vCPU will not change to adapt to the physical processor
 utilization, but it did, why?

 -org-gnu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

CPU utilization between physical CPU and virtual CPU in KVM

2012-10-08 Thread Dennis Chen

Hi All,

I am confused by the following observed scenario:

In my 4-CPU (KVM supported, 2 core with 2 thread for each) host
machine box, I create only one VM with 3-vCPU through virsh/libvirt
tools and also I pin this VM process to the physical processor 3. I
guess the CPU utilization for the processor 3 will not exceed 100%,
then I create 3 process (dead loop-- while(1);) and bind each of them
to vCPU[0-2] respectively, through the "top -c" command in VM
environment, I can see the CPU utilization for each of the vCPU is
about 100%, but interesting, I found that the CPU utilization of
processor 3 in the host machine is about 300% with "toc -c" command.
why does a single process bound to a CPU can get ~300% cpu bandwidth
in this case, does the kernel scheduler dispatch the idle cycle
capacity of the CPUs to the virtual CPU of the VM, other word, the
scheduler knows the vCPU info in the VM process?

For the same case, if I create another 4 new dead-loop processes and
bind them to the physical CPU[0-3] equally, then I find the vCPU0/1 in
VM will not be 100%, eg. 32%,  (I think the scheduler in the guest OS
doesn't know it's running in a virtual environment, so the utilization
of the vCPU will not change to adapt to the physical processor
utilization, but it did, why?

-org-gnu
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

CPU utilization between physical CPU and virtual CPU in KVM

2012-10-08 Thread Dennis Chen

Hi All,

I am confused by the following observed scenario:

In my 4-CPU (KVM supported, 2 core with 2 thread for each) host
machine box, I create only one VM with 3-vCPU through virsh/libvirt
tools and also I pin this VM process to the physical processor 3. I
guess the CPU utilization for the processor 3 will not exceed 100%,
then I create 3 process (dead loop-- while(1);) and bind each of them
to vCPU[0-2] respectively, through the top -c command in VM
environment, I can see the CPU utilization for each of the vCPU is
about 100%, but interesting, I found that the CPU utilization of
processor 3 in the host machine is about 300% with toc -c command.
why does a single process bound to a CPU can get ~300% cpu bandwidth
in this case, does the kernel scheduler dispatch the idle cycle
capacity of the CPUs to the virtual CPU of the VM, other word, the
scheduler knows the vCPU info in the VM process?

For the same case, if I create another 4 new dead-loop processes and
bind them to the physical CPU[0-3] equally, then I find the vCPU0/1 in
VM will not be 100%, eg. 32%,  (I think the scheduler in the guest OS
doesn't know it's running in a virtual environment, so the utilization
of the vCPU will not change to adapt to the physical processor
utilization, but it did, why?

-org-gnu
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

78 matches

Mail list logo