from:"Benjamin Poirier"

[PATCH] menuconfig: Replace CIRCLEQ by list_head-style lists.

2012-10-20 Thread Benjamin Poirier

From: Benjamin Poirier bpoir...@suse.de

sys/queue.h and CIRCLEQ in particular have proven to cause portability
problems (reported on Debian Sarge, Cygwin and FreeBSD)

Reported-by: Tetsuo Handa penguin-ker...@i-love.sakura.ne.jp
Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/expr.h  |5 +--
 scripts/kconfig/list.h  |   90 +++
 scripts/kconfig/lkc_proto.h |4 +-
 scripts/kconfig/mconf.c |6 +--
 scripts/kconfig/menu.c  |   14 ---
 5 files changed, 105 insertions(+), 14 deletions(-)
 create mode 100644 scripts/kconfig/list.h

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index bd2e098..cdd4860 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -12,7 +12,7 @@ extern C {
 
 #include assert.h
 #include stdio.h
-#include sys/queue.h
+#include list.h
 #ifndef __cplusplus
 #include stdbool.h
 #endif
@@ -175,12 +175,11 @@ struct menu {
 #define MENU_ROOT  0x0002
 
 struct jump_key {
-   CIRCLEQ_ENTRY(jump_key) entries;
+   struct list_head entries;
size_t offset;
struct menu *target;
int index;
 };
-CIRCLEQ_HEAD(jk_head, jump_key);
 
 #define JUMP_NB9
 
diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
new file mode 100644
index 000..934bdba
--- /dev/null
+++ b/scripts/kconfig/list.h
@@ -0,0 +1,90 @@
+#ifndef LIST_H
+#define LIST_H
+
+/*
+ * Copied from include/linux/...
+ */
+
+#define offsetof(TYPE, MEMBER) ((size_t) ((TYPE *)0)-MEMBER)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:the pointer to the member.
+ * @type:   the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({  \
+   const typeof( ((type *)0)-member ) *__mptr = (ptr);\
+   (type *)( (char *)__mptr - offsetof(type,member) );})
+
+
+struct list_head {
+   struct list_head *next, *prev;
+};
+
+
+#define LIST_HEAD_INIT(name) { (name), (name) }
+
+#define LIST_HEAD(name) \
+   struct list_head name = LIST_HEAD_INIT(name)
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:   the struct list_head pointer.
+ * @type:  the type of the struct this is embedded in.
+ * @member:the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+   container_of(ptr, type, member)
+
+/**
+ * list_for_each_entry -   iterate over list of given type
+ * @pos:   the type * to use as a loop cursor.
+ * @head:  the head for your list.
+ * @member:the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+   for (pos = list_entry((head)-next, typeof(*pos), member);  \
+pos-member != (head);\
+pos = list_entry(pos-member.next, typeof(*pos), member))
+
+#endif
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+   return head-next == head;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+   next-prev = new;
+   new-next = next;
+   new-prev = prev;
+   prev-next = new;
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+   __list_add(new, head-prev, head);
+}
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 1d1c085..ef1a738 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,9 +21,9 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, void, (struct gstr *r, struct symbol *sym, struct jk_head
+P(get_symbol_str, void, (struct gstr *r, struct symbol *sym, struct list_head
 *head));
-P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct jk_head
+P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct list_head
   *head));
 P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
 
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 48f6744..53975cf 100644
--- a/scripts/kconfig/mconf.c
+++ b

[PATCH v2] menuconfig: Replace CIRCLEQ by list_head-style lists.

2012-10-21 Thread Benjamin Poirier

From: Benjamin Poirier bpoir...@suse.de

sys/queue.h and CIRCLEQ in particular have proven to cause portability
problems (reported on Debian Sarge, Cygwin and FreeBSD)

Reported-by: Tetsuo Handa penguin-ker...@i-love.sakura.ne.jp
Tested-by: Tetsuo Handa penguin-ker...@i-love.sakura.ne.jp
Tested-by: Yaakov Selkowitz yselkow...@users.sourceforge.net
Signed-off-by: Benjamin Poirier bpoir...@suse.de
Signed-off-by: Yann E. MORIN yann.morin.1...@free.fr
---

Changes v1-v2:
* integrate the patch/suggestion from Yann to fix problems related to the
  usage of list.h with xconfig/c++
* new keyword
* offsetof redefinition
* silly #endif placement mistake

Thanks to Tetsuo, Yann and Yaakov for thorough testing.

 scripts/kconfig/expr.h  |5 +--
 scripts/kconfig/list.h  |   91 +++
 scripts/kconfig/lkc_proto.h |4 +-
 scripts/kconfig/mconf.c |6 +--
 scripts/kconfig/menu.c  |   14 ---
 5 files changed, 106 insertions(+), 14 deletions(-)
 create mode 100644 scripts/kconfig/list.h

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index bd2e098..cdd4860 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -12,7 +12,7 @@ extern C {
 
 #include assert.h
 #include stdio.h
-#include sys/queue.h
+#include list.h
 #ifndef __cplusplus
 #include stdbool.h
 #endif
@@ -175,12 +175,11 @@ struct menu {
 #define MENU_ROOT  0x0002
 
 struct jump_key {
-   CIRCLEQ_ENTRY(jump_key) entries;
+   struct list_head entries;
size_t offset;
struct menu *target;
int index;
 };
-CIRCLEQ_HEAD(jk_head, jump_key);
 
 #define JUMP_NB9
 
diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
new file mode 100644
index 000..0ae730b
--- /dev/null
+++ b/scripts/kconfig/list.h
@@ -0,0 +1,91 @@
+#ifndef LIST_H
+#define LIST_H
+
+/*
+ * Copied from include/linux/...
+ */
+
+#undef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) ((TYPE *)0)-MEMBER)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:the pointer to the member.
+ * @type:   the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({  \
+   const typeof( ((type *)0)-member ) *__mptr = (ptr);\
+   (type *)( (char *)__mptr - offsetof(type,member) );})
+
+
+struct list_head {
+   struct list_head *next, *prev;
+};
+
+
+#define LIST_HEAD_INIT(name) { (name), (name) }
+
+#define LIST_HEAD(name) \
+   struct list_head name = LIST_HEAD_INIT(name)
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:   the struct list_head pointer.
+ * @type:  the type of the struct this is embedded in.
+ * @member:the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+   container_of(ptr, type, member)
+
+/**
+ * list_for_each_entry -   iterate over list of given type
+ * @pos:   the type * to use as a loop cursor.
+ * @head:  the head for your list.
+ * @member:the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+   for (pos = list_entry((head)-next, typeof(*pos), member);  \
+pos-member != (head);\
+pos = list_entry(pos-member.next, typeof(*pos), member))
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+   return head-next == head;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *_new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+   next-prev = _new;
+   _new-next = next;
+   _new-prev = prev;
+   prev-next = _new;
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *_new, struct list_head 
*head)
+{
+   __list_add(_new, head-prev, head);
+}
+
+#endif
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 1d1c085..ef1a738 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,9 +21,9 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, void, (struct gstr *r, struct symbol *sym, struct jk_head
+P(get_symbol_str, void, (struct gstr *r

[PATCH 1/2] menuconfig: Fix memory leak introduced by jump keys feature

2013-04-15 Thread Benjamin Poirier

Fixes the memory leak of struct jump_key allocated in get_prompt_str()

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/list.h  |   13 +
 scripts/kconfig/mconf.c |3 +++
 2 files changed, 16 insertions(+)

diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
index 0ae730b..b87206c 100644
--- a/scripts/kconfig/list.h
+++ b/scripts/kconfig/list.h
@@ -51,6 +51,19 @@ struct list_head {
 pos = list_entry(pos-member.next, typeof(*pos), member))
 
 /**
+ * list_for_each_entry_safe - iterate over list of given type safe against 
removal of list entry
+ * @pos:   the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head:  the head for your list.
+ * @member:the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+   for (pos = list_entry((head)-next, typeof(*pos), member),  \
+   n = list_entry(pos-member.next, typeof(*pos), member); \
+pos-member != (head);\
+pos = n, n = list_entry(n-member.next, typeof(*n), member))
+
+/**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
  */
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 566288a..c5418d6 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -389,6 +389,7 @@ again:
.targets = targets,
.keys = keys,
};
+   struct jump_key *pos, *tmp;
 
res = get_relations_str(sym_arr, head);
dres = show_textbox_ext(_(Search Results), (char *)
@@ -402,6 +403,8 @@ again:
again = true;
}
str_free(res);
+   list_for_each_entry_safe(pos, tmp, head, entries)
+   free(pos);
} while (again);
free(sym_arr);
str_free(title);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] menuconfig: Add breadcrumbs navigation aid

2013-04-15 Thread Benjamin Poirier

Displays a trail of the menu entries used to get to the current menu.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/list.h|   27 ++
 scripts/kconfig/lxdialog/dialog.h |7 
 scripts/kconfig/lxdialog/util.c   |   48 -
 scripts/kconfig/mconf.c   |   71 -
 4 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
index b87206c..ea1d581 100644
--- a/scripts/kconfig/list.h
+++ b/scripts/kconfig/list.h
@@ -101,4 +101,31 @@ static inline void list_add_tail(struct list_head *_new, 
struct list_head *head)
__list_add(_new, head-prev, head);
 }
 
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+   next-prev = prev;
+   prev-next = next;
+}
+
+#define LIST_POISON1  ((void *) 0x00100100)
+#define LIST_POISON2  ((void *) 0x00200200)
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+   __list_del(entry-prev, entry-next);
+   entry-next = LIST_POISON1;
+   entry-prev = LIST_POISON2;
+}
 #endif
diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 307022a..10993370 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -106,8 +106,14 @@ struct dialog_color {
int hl; /* highlight this item */
 };
 
+struct subtitle_list {
+   struct subtitle_list *next;
+   const char *text;
+};
+
 struct dialog_info {
const char *backtitle;
+   struct subtitle_list *subtitles;
struct dialog_color screen;
struct dialog_color shadow;
struct dialog_color dialog;
@@ -196,6 +202,7 @@ int on_key_resize(void);
 
 int init_dialog(const char *backtitle);
 void set_dialog_backtitle(const char *backtitle);
+void set_dialog_subtitles(struct subtitle_list *subtitles);
 void end_dialog(int x, int y);
 void attr_clear(WINDOW * win, int height, int width, chtype attr);
 void dialog_clear(void);
diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 109d531..9a41d78 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c
@@ -257,13 +257,54 @@ void dialog_clear(void)
attr_clear(stdscr, LINES, COLS, dlg.screen.atr);
/* Display background title if it exists ... - SLH */
if (dlg.backtitle != NULL) {
-   int i;
+   int i, len = 0;
+   struct subtitle_list *pos;
 
wattrset(stdscr, dlg.screen.atr);
mvwaddstr(stdscr, 0, 1, (char *)dlg.backtitle);
wmove(stdscr, 1, 1);
for (i = 1; i  COLS - 1; i++)
waddch(stdscr, ACS_HLINE);
+
+   for (pos = dlg.subtitles; pos != NULL; pos = pos-next) {
+   /* 3 is for the arrow and spaces */
+   len += strlen(pos-text) + 3;
+   }
+
+   if (len) {
+   int skip = 0;
+
+   wmove(stdscr, 1, 1);
+   if (len  COLS - 2) {
+   const char *ellipsis = [...] ;
+   waddstr(stdscr, ellipsis);
+   skip = len - (COLS - 2 - strlen(ellipsis));
+   }
+
+   for (pos = dlg.subtitles; pos != NULL; pos =
+pos-next) {
+   if (skip == 0)
+   waddch(stdscr, ACS_RARROW);
+   else
+   skip--;
+
+   if (skip == 0)
+   waddch(stdscr, ' ');
+   else
+   skip--;
+
+   if (skip  strlen(pos-text)) {
+   waddstr(stdscr, pos-text + skip);
+   skip = 0;
+   } else
+   skip -= strlen(pos-text);
+
+   if (skip == 0)
+   waddch(stdscr, ' ');
+   else
+   skip--;
+   }
+   }
}
wnoutrefresh(stdscr);
 }
@@ -302,6 +343,11 @@ void set_dialog_backtitle(const char *backtitle)
dlg.backtitle = backtitle;
 }
 
+void set_dialog_subtitles(struct

[PATCH v3] menuconfig: Add breadcrumbs navigation aid

2013-04-16 Thread Benjamin Poirier

Displays a trail of the menu entries used to get to the current menu.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
Tested-by: Yann E. MORIN yann.morin.1...@free.fr
[yann.morin.1...@free.fr: small, trivial code re-ordering]
Signed-off-by: Yann E. MORIN yann.morin.1...@free.fr
---

Indeed Yann, we can even remove one wmove() and one level of indentation in
dialog_clear().

I'm not sure who's pulling from who and where patches are going, but this
would supersede what's in yem-kconfig-for-next now.


 scripts/kconfig/list.h|   27 ++
 scripts/kconfig/lxdialog/dialog.h |7 
 scripts/kconfig/lxdialog/util.c   |   45 +--
 scripts/kconfig/mconf.c   |   71 -
 4 files changed, 147 insertions(+), 3 deletions(-)

diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
index 0ae730b..2f02a3b 100644
--- a/scripts/kconfig/list.h
+++ b/scripts/kconfig/list.h
@@ -88,4 +88,31 @@ static inline void list_add_tail(struct list_head *_new, 
struct list_head *head)
__list_add(_new, head-prev, head);
 }
 
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+   next-prev = prev;
+   prev-next = next;
+}
+
+#define LIST_POISON1  ((void *) 0x00100100)
+#define LIST_POISON2  ((void *) 0x00200200)
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+   __list_del(entry-prev, entry-next);
+   entry-next = LIST_POISON1;
+   entry-prev = LIST_POISON2;
+}
 #endif
diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 307022a..10993370 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -106,8 +106,14 @@ struct dialog_color {
int hl; /* highlight this item */
 };
 
+struct subtitle_list {
+   struct subtitle_list *next;
+   const char *text;
+};
+
 struct dialog_info {
const char *backtitle;
+   struct subtitle_list *subtitles;
struct dialog_color screen;
struct dialog_color shadow;
struct dialog_color dialog;
@@ -196,6 +202,7 @@ int on_key_resize(void);
 
 int init_dialog(const char *backtitle);
 void set_dialog_backtitle(const char *backtitle);
+void set_dialog_subtitles(struct subtitle_list *subtitles);
 void end_dialog(int x, int y);
 void attr_clear(WINDOW * win, int height, int width, chtype attr);
 void dialog_clear(void);
diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 109d531..a0e97c2 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c
@@ -257,12 +257,48 @@ void dialog_clear(void)
attr_clear(stdscr, LINES, COLS, dlg.screen.atr);
/* Display background title if it exists ... - SLH */
if (dlg.backtitle != NULL) {
-   int i;
+   int i, len = 0, skip = 0;
+   struct subtitle_list *pos;
 
wattrset(stdscr, dlg.screen.atr);
mvwaddstr(stdscr, 0, 1, (char *)dlg.backtitle);
+
+   for (pos = dlg.subtitles; pos != NULL; pos = pos-next) {
+   /* 3 is for the arrow and spaces */
+   len += strlen(pos-text) + 3;
+   }
+
wmove(stdscr, 1, 1);
-   for (i = 1; i  COLS - 1; i++)
+   if (len  COLS - 2) {
+   const char *ellipsis = [...] ;
+   waddstr(stdscr, ellipsis);
+   skip = len - (COLS - 2 - strlen(ellipsis));
+   }
+
+   for (pos = dlg.subtitles; pos != NULL; pos = pos-next) {
+   if (skip == 0)
+   waddch(stdscr, ACS_RARROW);
+   else
+   skip--;
+
+   if (skip == 0)
+   waddch(stdscr, ' ');
+   else
+   skip--;
+
+   if (skip  strlen(pos-text)) {
+   waddstr(stdscr, pos-text + skip);
+   skip = 0;
+   } else
+   skip -= strlen(pos-text);
+
+   if (skip == 0)
+   waddch(stdscr, ' ');
+   else
+   skip--;
+   }
+
+   for (i = len + 1; i  COLS - 1; i++)
waddch(stdscr, ACS_HLINE);
}
wnoutrefresh(stdscr);
@@ -302,6 +338,11 @@ void set_dialog_backtitle(const

[PATCH 0/6] menuconfig: jump to search results

2012-07-30 Thread Benjamin Poirier

Hello,

This patch series adds jump to keys (similar to the cscope interface) to the
search results of make menuconfig so that we can go directly to the menu
entry for a config option after searching for it.

Patches 1-4 implement the basic functionnality.
Patches 5-6 are an optionnal improvement.

Enjoy,
-Benjamin

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] menuconfig: Remove superfluous conditionnal

2012-07-30 Thread Benjamin Poirier

Because end_reached is set to 0 before the loop, the test !end_reached is
always true and can be removed. This structure was perhaps copied from the
similar one in back_lines().

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |8 +++-
 1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 4e5de60..264a2b9 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -357,10 +357,8 @@ static char *get_line(void)
end_reached = 0;
while (*page != '\n') {
if (*page == '\0') {
-   if (!end_reached) {
-   end_reached = 1;
-   break;
-   }
+   end_reached = 1;
+   break;
} else if (i  MAX_LEN)
line[i++] = *(page++);
else {
@@ -373,7 +371,7 @@ static char *get_line(void)
if (i = MAX_LEN)
line[i] = '\0';
if (!end_reached)
-   page++; /* move pass '\n' */
+   page++; /* move past '\n' */
 
return line;
 }
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/6] menuconfig: Extend dialog_textbox so that it can exit on arbitrary keypresses

2012-07-30 Thread Benjamin Poirier

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/dialog.h  |3 ++-
 scripts/kconfig/lxdialog/textbox.c |   31 +--
 scripts/kconfig/mconf.c|   12 ++--
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index b5211fc..014c029 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -209,7 +209,8 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
-int dialog_textbox(const char *title, const char *file, int height, int width);
+int dialog_textbox(const char *title, const char *file, int height, int width,
+  int *keys);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 264a2b9..eb4ee92 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -47,14 +47,16 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, 
int boxh, int boxw,
 
 /*
  * Display text from a file in a dialog box.
+ *
+ * keys is a null-terminated array
  */
-int dialog_textbox(const char *title, const char *tbuf,
-  int initial_height, int initial_width)
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+  int initial_width, int *keys)
 {
int i, x, y, cur_x, cur_y, key = 0;
int height, width, boxh, boxw;
-   int passed_end;
WINDOW *dialog, *box;
+   bool done = false;
 
begin_reached = 1;
end_reached = 0;
@@ -122,7 +124,7 @@ do_resize:
attr_clear(box, boxh, boxw, dlg.dialog.atr);
refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
 
-   while ((key != KEY_ESC)  (key != '\n')) {
+   while (!done) {
key = wgetch(dialog);
switch (key) {
case 'E':   /* Exit */
@@ -130,9 +132,9 @@ do_resize:
case 'X':
case 'x':
case 'q':
-   delwin(box);
-   delwin(dialog);
-   return 0;
+   case '\n':
+   done = true;
+   break;
case 'g':   /* First page */
case KEY_HOME:
if (!begin_reached) {
@@ -156,6 +158,8 @@ do_resize:
case 'k':
case KEY_UP:
if (!begin_reached) {
+   int passed_end = 0;
+
back_lines(page_length + 1);
 
/* We don't call print_page() here but use
@@ -169,7 +173,6 @@ do_resize:
wscrl(box, -1); /* Scroll box region down one 
line */
scrollok(box, FALSE);
page_length = 0;
-   passed_end = 0;
for (i = 0; i  boxh; i++) {
if (!i) {
/* print first line of page */
@@ -252,7 +255,8 @@ do_resize:
 cur_y, cur_x);
break;
case KEY_ESC:
-   key = on_key_esc(dialog);
+   if (on_key_esc(dialog) == KEY_ESC)
+   done = true;
break;
case KEY_RESIZE:
back_lines(height);
@@ -260,11 +264,18 @@ do_resize:
delwin(dialog);
on_key_resize();
goto do_resize;
+   default:
+   for (i = 0; keys[i]; i++) {
+   if (key == keys[i]) {
+   done = true;
+   break;
+   }
+   }
}
}
delwin(box);
delwin(dialog);
-   return key; /* ESC pressed */
+   return key;
 }
 
 /*
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index f584a28..116e5da 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -280,6 +280,8 @@ static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
 static void conf_load(void);
 static void conf_save(void);
+static int show_textbox_ext(const char *title, const char *text, int r, int c,
+   int *keys);
 static

[PATCH 4/6] menuconfig: Add jump keys to search results

2012-07-30 Thread Benjamin Poirier

makes it possible to jump directly to the menu for a configuration entry after
having searched for it with '/'. If this menu is not currently accessible we
jump to the nearest accessible parent instead. After exiting this menu, the
user is returned to the search results where he may jump further in or
elsewhere.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/expr.h  |2 +
 scripts/kconfig/lkc_proto.h |6 +++-
 scripts/kconfig/mconf.c |   64 +--
 scripts/kconfig/menu.c  |   55 +---
 scripts/kconfig/nconf.c |2 +-
 5 files changed, 94 insertions(+), 35 deletions(-)

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index d4ecce8..52f4246 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -173,6 +173,8 @@ struct menu {
 #define MENU_CHANGED   0x0001
 #define MENU_ROOT  0x0002
 
+#define JUMP_NB9
+
 extern struct file *file_list;
 extern struct file *current_file;
 struct file *lookup_file(const char *name);
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 47fe9c3..946c2cb3 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,8 +21,10 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, void, (struct gstr *r, struct symbol *sym));
-P(get_relations_str, struct gstr, (struct symbol **sym_arr));
+P(get_symbol_str, int, (struct gstr *r, struct symbol *sym, struct menu
+   **jumps, int jump_nb));
+P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct menu
+  **jumps));
 P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
 
 /* symbol.c */
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index c57cc6a..bf75753 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -236,16 +236,19 @@ search_help[] = N_(
Result:\n
-\n
Symbol: FOO [=m]\n
+   Type  : tristate\n
Prompt: Foo bus is used to drive the bar HW\n
-   Defined at drivers/pci/Kconfig:47\n
-   Depends on: X86_LOCAL_APIC  X86_IO_APIC || IA64\n
-   Location:\n
- - Bus options (PCI, PCMCIA, EISA, ISA)\n
-   - PCI support (PCI [=y])\n
- - PCI access mode (choice [=y])\n
-   Selects: LIBCRC32\n
-   Selected by: BAR\n
+ Defined at drivers/pci/Kconfig:47\n
+ Depends on: X86_LOCAL_APIC  X86_IO_APIC || IA64\n
+ Location:\n
+   - Bus options (PCI, PCMCIA, EISA, ISA)\n
+ - PCI support (PCI [=y])\n
+   (1) - PCI access mode (choice [=y])\n
+ Selects: LIBCRC32\n
+ Selected by: BAR\n
-\n
+   o The line 'Type:' shows the type of the configuration option for\n
+ this symbol (boolean, tristate, string, ...)\n
o The line 'Prompt:' shows the text used in the menu structure for\n
  this symbol\n
o The 'Defined at' line tell at what file / line number the symbol\n
@@ -254,8 +257,12 @@ search_help[] = N_(
  this symbol to be visible in the menu (selectable)\n
o The 'Location:' lines tell where in the menu structure this symbol\n
  is located\n
-   A location followed by a [=y] indicate that this is a selectable\n
-   menu item - and current value is displayed inside brackets.\n
+   A location followed by a [=y] indicates that this is a\n
+   selectable menu item - and the current value is displayed inside\n
+   brackets.\n
+   Press the key in the (#) prefix to jump directly to that\n
+   location. You will be returned to the current search results\n
+   after exiting this new menu\n
o The 'Selects:' line tell what symbol will be automatically\n
  selected if this symbol is selected (y or m)\n
o The 'Selected by' line tell what symbol has selected this symbol\n
@@ -275,7 +282,7 @@ static int single_menu_mode;
 static int show_all_options;
 static int saved_x, saved_y;
 
-static void conf(struct menu *menu);
+static void conf(struct menu *menu, struct menu *active_menu);
 static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
 static void conf_load(void);
@@ -309,7 +316,9 @@ static void search_conf(void)
struct symbol **sym_arr;
struct gstr res;
char *dialog_input;
-   int dres;
+   int dres, vscroll = 0, hscroll = 0;
+   bool again;
+
 again:
dialog_clear();
dres = dialog_inputbox(_(Search Configuration Parameter),
@@ -332,10 +341,24 @@ again

[PATCH 6/6] menuconfig: Assign jump keys per-page instead of globally

2012-07-30 Thread Benjamin Poirier

At the moment, keys 1-9 are assigned to the first 9 search results. This patch
makes them assigned to the first 9 results per-page instead. We are much less
likely to run out of keys that way.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/expr.h |9 
 scripts/kconfig/lkc_proto.h|8 ++--
 scripts/kconfig/lxdialog/dialog.h  |9 +++-
 scripts/kconfig/lxdialog/textbox.c |   65 
 scripts/kconfig/mconf.c|   73 ---
 scripts/kconfig/menu.c |   54 ++-
 6 files changed, 147 insertions(+), 71 deletions(-)

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 52f4246..bd2e098 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -12,6 +12,7 @@ extern C {
 
 #include assert.h
 #include stdio.h
+#include sys/queue.h
 #ifndef __cplusplus
 #include stdbool.h
 #endif
@@ -173,6 +174,14 @@ struct menu {
 #define MENU_CHANGED   0x0001
 #define MENU_ROOT  0x0002
 
+struct jump_key {
+   CIRCLEQ_ENTRY(jump_key) entries;
+   size_t offset;
+   struct menu *target;
+   int index;
+};
+CIRCLEQ_HEAD(jk_head, jump_key);
+
 #define JUMP_NB9
 
 extern struct file *file_list;
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 946c2cb3..1d1c085 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,10 +21,10 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, int, (struct gstr *r, struct symbol *sym, struct menu
-   **jumps, int jump_nb));
-P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct menu
-  **jumps));
+P(get_symbol_str, void, (struct gstr *r, struct symbol *sym, struct jk_head
+*head));
+P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct jk_head
+  *head));
 P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
 
 /* symbol.c */
diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 8e7f43b..28e8877 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -209,8 +209,13 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
-int dialog_textbox(const char *title, const char *file, int height, int width,
-  int *keys, int *_vscroll, int *_hscroll);
+
+
+typedef void (*update_text_fn)(char* buf, size_t start, size_t end, void
+  *_data);
+int dialog_textbox(const char *title, char *tbuf, int initial_height,
+  int initial_width, int *keys, int *_vscroll, int *_hscroll,
+  update_text_fn update_text, void *data);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 3b3c5c4..fcca719e 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -22,23 +22,25 @@
 #include dialog.h
 
 static void back_lines(int n);
-static void print_page(WINDOW * win, int height, int width);
+static void print_page(WINDOW * win, int height, int width, update_text_fn
+  update_text, void *data);
 static void print_line(WINDOW * win, int row, int width);
 static char *get_line(void);
 static void print_position(WINDOW * win);
 
 static int hscroll;
 static int begin_reached, end_reached, page_length;
-static const char *buf;
-static const char *page;
+static char *buf;
+static char *page;
 
 /*
  * refresh window content
  */
 static void refresh_text_box(WINDOW *dialog, WINDOW *box, int boxh, int boxw,
- int cur_y, int cur_x)
+int cur_y, int cur_x, update_text_fn update_text,
+void *data)
 {
-   print_page(box, boxh, boxw);
+   print_page(box, boxh, boxw, update_text, data);
print_position(dialog);
wmove(dialog, cur_y, cur_x);/* Restore cursor position */
wrefresh(dialog);
@@ -49,9 +51,11 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, 
int boxh, int boxw,
  * Display text from a file in a dialog box.
  *
  * keys is a null-terminated array
+ * update_text() may not add or remove any '\n' or '\0' in tbuf
  */
-int dialog_textbox(const char *title, const char

[PATCH 5/6] menuconfig: Do not open code textbox scroll up/down

2012-07-30 Thread Benjamin Poirier

We don't need to explicitely use ncurses' scroll(). ncurses performs
vertical-motion optimization at wrefresh() time.

Using strace I confirmed that with the following patch curses still sends only
the new line of text to the terminal when scrolling up/down one line at a
time.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |   55 +++
 1 files changed, 11 insertions(+), 44 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 506a095..3b3c5c4 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -166,40 +166,12 @@ do_resize:
case 'K':   /* Previous line */
case 'k':
case KEY_UP:
-   if (!begin_reached) {
-   int passed_end = 0;
-
-   back_lines(page_length + 1);
-
-   /* We don't call print_page() here but use
-* scrolling to ensure faster screen update.
-* However, 'end_reached' and 'page_length'
-* should still be updated, and 'page' should
-* point to start of next page. This is done
-* by calling get_line() in the following
-* 'for' loop. */
-   scrollok(box, TRUE);
-   wscrl(box, -1); /* Scroll box region down one 
line */
-   scrollok(box, FALSE);
-   page_length = 0;
-   for (i = 0; i  boxh; i++) {
-   if (!i) {
-   /* print first line of page */
-   print_line(box, 0, boxw);
-   wnoutrefresh(box);
-   } else
-   /* Called to update 
'end_reached' and 'page' */
-   get_line();
-   if (!passed_end)
-   page_length++;
-   if (end_reached  !passed_end)
-   passed_end = 1;
-   }
+   if (begin_reached)
+   break;
 
-   print_position(dialog);
-   wmove(dialog, cur_y, cur_x);/* Restore 
cursor position */
-   wrefresh(dialog);
-   }
+   back_lines(page_length + 1);
+   refresh_text_box(dialog, box, boxh, boxw, cur_y,
+cur_x);
break;
case 'B':   /* Previous page */
case 'b':
@@ -214,17 +186,12 @@ do_resize:
case 'J':   /* Next line */
case 'j':
case KEY_DOWN:
-   if (!end_reached) {
-   begin_reached = 0;
-   scrollok(box, TRUE);
-   scroll(box);/* Scroll box region up one 
line */
-   scrollok(box, FALSE);
-   print_line(box, boxh - 1, boxw);
-   wnoutrefresh(box);
-   print_position(dialog);
-   wmove(dialog, cur_y, cur_x);/* Restore 
cursor position */
-   wrefresh(dialog);
-   }
+   if (end_reached)
+   break;
+
+   back_lines(page_length - 1);
+   refresh_text_box(dialog, box, boxh, boxw, cur_y,
+cur_x);
break;
case KEY_NPAGE: /* Next page */
case ' ':
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] menuconfig: Extend dialog_textbox so that it can return to a scrolled position

2012-07-30 Thread Benjamin Poirier

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/dialog.h  |2 +-
 scripts/kconfig/lxdialog/textbox.c |   24 +++-
 scripts/kconfig/mconf.c|8 
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 014c029..8e7f43b 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -210,7 +210,7 @@ int dialog_yesno(const char *title, const char *prompt, int 
height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
 int dialog_textbox(const char *title, const char *file, int height, int width,
-  int *keys);
+  int *keys, int *_vscroll, int *_hscroll);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index eb4ee92..506a095 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -51,7 +51,7 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, int 
boxh, int boxw,
  * keys is a null-terminated array
  */
 int dialog_textbox(const char *title, const char *tbuf, int initial_height,
-  int initial_width, int *keys)
+  int initial_width, int *keys, int *_vscroll, int *_hscroll)
 {
int i, x, y, cur_x, cur_y, key = 0;
int height, width, boxh, boxw;
@@ -65,6 +65,15 @@ int dialog_textbox(const char *title, const char *tbuf, int 
initial_height,
buf = tbuf;
page = buf; /* page is pointer to start of page to be displayed */
 
+   if (_vscroll  *_vscroll) {
+   begin_reached = 0;
+
+   for (i = 0; i  *_vscroll; i++)
+   get_line();
+   }
+   if (_hscroll)
+   hscroll = *_hscroll;
+
 do_resize:
getmaxyx(stdscr, height, width);
if (height  8 || width  8)
@@ -275,6 +284,19 @@ do_resize:
}
delwin(box);
delwin(dialog);
+   if (_vscroll) {
+   const char *s;
+
+   s = buf;
+   *_vscroll = 0;
+   back_lines(page_length);
+   while (s  page  (s = strchr(s, '\n'))) {
+   (*_vscroll)++;
+   s++;
+   }
+   }
+   if (_hscroll)
+   *_hscroll = hscroll;
return key;
 }
 
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 116e5da..c57cc6a 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -281,7 +281,7 @@ static void conf_string(struct menu *menu);
 static void conf_load(void);
 static void conf_save(void);
 static int show_textbox_ext(const char *title, const char *text, int r, int c,
-   int *keys);
+   int *keys, int *vscroll, int *hscroll);
 static void show_textbox(const char *title, const char *text, int r, int c);
 static void show_helptext(const char *title, const char *text);
 static void show_help(struct menu *menu);
@@ -622,15 +622,15 @@ static void conf(struct menu *menu)
 }
 
 static int show_textbox_ext(const char *title, const char *text, int r, int c,
-   int *keys)
+   int *keys, int *vscroll, int *hscroll)
 {
dialog_clear();
-   return dialog_textbox(title, text, r, c, keys);
+   return dialog_textbox(title, text, r, c, keys, vscroll, hscroll);
 }
 
 static void show_textbox(const char *title, const char *text, int r, int c)
 {
-   show_textbox_ext(title, text, r, c, (int []) {0});
+   show_textbox_ext(title, text, r, c, (int []) {0}, NULL, NULL);
 }
 
 static void show_helptext(const char *title, const char *text)
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/6] menuconfig: jump to search results

2012-07-30 Thread Benjamin Poirier

On 2012/07/30 21:58, Borislav Petkov wrote:
 On Mon, Jul 30, 2012 at 03:22:04PM -0400, Benjamin Poirier wrote:
  Hello,
  
  This patch series adds jump to keys (similar to the cscope interface) to 
  the
  search results of make menuconfig so that we can go directly to the menu
  entry for a config option after searching for it.
  
  Patches 1-4 implement the basic functionnality.
  Patches 5-6 are an optionnal improvement.
 
 Generally,
 
 a nice idea, I like it.
 
 A couple of notes though:
 
 * patches 2/6 and 3/6 don't have commit messages and it would be good if
 they had, even though it is obvious what the changes in there do.
 
 * this patchset must be against some kbuild tree because it doesn't
 apply against 3.5:

It's against linux-next, which already contains some patches for
menuconfig.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/6] menuconfig: jump to search results

2012-07-31 Thread Benjamin Poirier

On 2012/07/31 09:57, Borislav Petkov wrote:
 On Mon, Jul 30, 2012 at 04:22:12PM -0400, Benjamin Poirier wrote:
  It's against linux-next, which already contains some patches for
  menuconfig.
 
 Ok, applying against the linux-next from today succeeds. And they seem
 to work, I can search for CONFIG_* stuff and then go straight to the
 results.
 
 A couple more observations:
 
 * The numbering in the search results looks kinda strange and
 non-intuitive IMHO. Would it be possible to use the ncurses cursor to
 iterate over the search results like one does in the normal menus? I.e.,
 the horizontal bar which highlights the item with the background color
 or whatever - can it be moved up and down with the arrow keys through
 the search results too?

Further patches are welcome ;) It's not a trivial difference though.
The horizontal bar is not a curses concept, it's implemented via the
menubox lxdialog whereas search results are currently presented via a
textbox lxdialog.

 
 * when searching for DELAY for example, I get results without numbers
 to which I can't jump to:
 
 ...
   │ Symbol: DEFAULT_IO_DELAY_TYPE [=0]
   │ Type  : integer
   │
   │
   │ Symbol: IO_DELAY_TYPE_0X80 [=0]
   │ Type  : integer
 ...
 
 maybe because they don't have ---help--- section?

Because they don't have a prompt - they don't appear in any menu at all.
In that case there's nowhere to jump to. That's why those symbols don't
get a number.

 
 In those cases, you probably want to hide those from the search results?
 
 Other than that it looks good and I'd say we need more menuconfig
 functionality like that which eases/speeds up working with that thing :).
 
 Thanks.
 
 -- 
 Regards/Gruss,
 Boris.
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/6] menuconfig: jump to search results

2012-08-23 Thread Benjamin Poirier

This patch series adds jump to keys (similar to the cscope interface) to the
search results of make menuconfig so that we can go directly to the menu
entry for a config option after searching for it.

Patches 1-4 implement the basic functionality.
Patches 5-6 are an optional improvement.

Changes v1-v2:
* add a short description to patches 2  3
* correct checkpatch style warnings on patch 6

It should be targeted for v3.7

Enjoy,
-Benjamin

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/6] menuconfig: Remove superfluous conditionnal

2012-08-23 Thread Benjamin Poirier

Because end_reached is set to 0 before the loop, the test !end_reached is
always true and can be removed. This structure was perhaps copied from the
similar one in back_lines().

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |8 +++-
 1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 4e5de60..264a2b9 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -357,10 +357,8 @@ static char *get_line(void)
end_reached = 0;
while (*page != '\n') {
if (*page == '\0') {
-   if (!end_reached) {
-   end_reached = 1;
-   break;
-   }
+   end_reached = 1;
+   break;
} else if (i  MAX_LEN)
line[i++] = *(page++);
else {
@@ -373,7 +371,7 @@ static char *get_line(void)
if (i = MAX_LEN)
line[i] = '\0';
if (!end_reached)
-   page++; /* move pass '\n' */
+   page++; /* move past '\n' */
 
return line;
 }
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 4/6] menuconfig: Add jump keys to search results

2012-08-23 Thread Benjamin Poirier

makes it possible to jump directly to the menu for a configuration entry after
having searched for it with '/'. If this menu is not currently accessible we
jump to the nearest accessible parent instead. After exiting this menu, the
user is returned to the search results where he may jump further in or
elsewhere.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/expr.h  |2 +
 scripts/kconfig/lkc_proto.h |6 +++-
 scripts/kconfig/mconf.c |   64 +--
 scripts/kconfig/menu.c  |   55 +---
 scripts/kconfig/nconf.c |2 +-
 5 files changed, 94 insertions(+), 35 deletions(-)

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index d4ecce8..52f4246 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -173,6 +173,8 @@ struct menu {
 #define MENU_CHANGED   0x0001
 #define MENU_ROOT  0x0002
 
+#define JUMP_NB9
+
 extern struct file *file_list;
 extern struct file *current_file;
 struct file *lookup_file(const char *name);
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 47fe9c3..946c2cb3 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,8 +21,10 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, void, (struct gstr *r, struct symbol *sym));
-P(get_relations_str, struct gstr, (struct symbol **sym_arr));
+P(get_symbol_str, int, (struct gstr *r, struct symbol *sym, struct menu
+   **jumps, int jump_nb));
+P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct menu
+  **jumps));
 P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
 
 /* symbol.c */
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index c57cc6a..b389e47 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -236,16 +236,19 @@ search_help[] = N_(
Result:\n
-\n
Symbol: FOO [=m]\n
+   Type  : tristate\n
Prompt: Foo bus is used to drive the bar HW\n
-   Defined at drivers/pci/Kconfig:47\n
-   Depends on: X86_LOCAL_APIC  X86_IO_APIC || IA64\n
-   Location:\n
- - Bus options (PCI, PCMCIA, EISA, ISA)\n
-   - PCI support (PCI [=y])\n
- - PCI access mode (choice [=y])\n
-   Selects: LIBCRC32\n
-   Selected by: BAR\n
+ Defined at drivers/pci/Kconfig:47\n
+ Depends on: X86_LOCAL_APIC  X86_IO_APIC || IA64\n
+ Location:\n
+   - Bus options (PCI, PCMCIA, EISA, ISA)\n
+ - PCI support (PCI [=y])\n
+   (1) - PCI access mode (choice [=y])\n
+ Selects: LIBCRC32\n
+ Selected by: BAR\n
-\n
+   o The line 'Type:' shows the type of the configuration option for\n
+ this symbol (boolean, tristate, string, ...)\n
o The line 'Prompt:' shows the text used in the menu structure for\n
  this symbol\n
o The 'Defined at' line tell at what file / line number the symbol\n
@@ -254,8 +257,12 @@ search_help[] = N_(
  this symbol to be visible in the menu (selectable)\n
o The 'Location:' lines tell where in the menu structure this symbol\n
  is located\n
-   A location followed by a [=y] indicate that this is a selectable\n
-   menu item - and current value is displayed inside brackets.\n
+   A location followed by a [=y] indicates that this is a\n
+   selectable menu item - and the current value is displayed inside\n
+   brackets.\n
+   Press the key in the (#) prefix to jump directly to that\n
+   location. You will be returned to the current search results\n
+   after exiting this new menu.\n
o The 'Selects:' line tell what symbol will be automatically\n
  selected if this symbol is selected (y or m)\n
o The 'Selected by' line tell what symbol has selected this symbol\n
@@ -275,7 +282,7 @@ static int single_menu_mode;
 static int show_all_options;
 static int saved_x, saved_y;
 
-static void conf(struct menu *menu);
+static void conf(struct menu *menu, struct menu *active_menu);
 static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
 static void conf_load(void);
@@ -309,7 +316,9 @@ static void search_conf(void)
struct symbol **sym_arr;
struct gstr res;
char *dialog_input;
-   int dres;
+   int dres, vscroll = 0, hscroll = 0;
+   bool again;
+
 again:
dialog_clear();
dres = dialog_inputbox(_(Search Configuration Parameter),
@@ -332,10 +341,24 @@ again

[PATCH v2 5/6] menuconfig: Do not open code textbox scroll up/down

2012-08-23 Thread Benjamin Poirier

We don't need to explicitely use ncurses' scroll(). ncurses performs
vertical-motion optimization at wrefresh() time.

Using strace I confirmed that with the following patch curses still sends only
the new line of text to the terminal when scrolling up/down one line at a
time.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |   55 +++
 1 files changed, 11 insertions(+), 44 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 506a095..3b3c5c4 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -166,40 +166,12 @@ do_resize:
case 'K':   /* Previous line */
case 'k':
case KEY_UP:
-   if (!begin_reached) {
-   int passed_end = 0;
-
-   back_lines(page_length + 1);
-
-   /* We don't call print_page() here but use
-* scrolling to ensure faster screen update.
-* However, 'end_reached' and 'page_length'
-* should still be updated, and 'page' should
-* point to start of next page. This is done
-* by calling get_line() in the following
-* 'for' loop. */
-   scrollok(box, TRUE);
-   wscrl(box, -1); /* Scroll box region down one 
line */
-   scrollok(box, FALSE);
-   page_length = 0;
-   for (i = 0; i  boxh; i++) {
-   if (!i) {
-   /* print first line of page */
-   print_line(box, 0, boxw);
-   wnoutrefresh(box);
-   } else
-   /* Called to update 
'end_reached' and 'page' */
-   get_line();
-   if (!passed_end)
-   page_length++;
-   if (end_reached  !passed_end)
-   passed_end = 1;
-   }
+   if (begin_reached)
+   break;
 
-   print_position(dialog);
-   wmove(dialog, cur_y, cur_x);/* Restore 
cursor position */
-   wrefresh(dialog);
-   }
+   back_lines(page_length + 1);
+   refresh_text_box(dialog, box, boxh, boxw, cur_y,
+cur_x);
break;
case 'B':   /* Previous page */
case 'b':
@@ -214,17 +186,12 @@ do_resize:
case 'J':   /* Next line */
case 'j':
case KEY_DOWN:
-   if (!end_reached) {
-   begin_reached = 0;
-   scrollok(box, TRUE);
-   scroll(box);/* Scroll box region up one 
line */
-   scrollok(box, FALSE);
-   print_line(box, boxh - 1, boxw);
-   wnoutrefresh(box);
-   print_position(dialog);
-   wmove(dialog, cur_y, cur_x);/* Restore 
cursor position */
-   wrefresh(dialog);
-   }
+   if (end_reached)
+   break;
+
+   back_lines(page_length - 1);
+   refresh_text_box(dialog, box, boxh, boxw, cur_y,
+cur_x);
break;
case KEY_NPAGE: /* Next page */
case ' ':
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 6/6] menuconfig: Assign jump keys per-page instead of globally

2012-08-23 Thread Benjamin Poirier

At the moment, keys 1-9 are assigned to the first 9 search results. This patch
makes them assigned to the first 9 results per-page instead. We are much less
likely to run out of keys that way.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/expr.h |9 
 scripts/kconfig/lkc_proto.h|8 ++--
 scripts/kconfig/lxdialog/dialog.h  |9 +++-
 scripts/kconfig/lxdialog/textbox.c |   67 
 scripts/kconfig/mconf.c|   73 ---
 scripts/kconfig/menu.c |   54 ++-
 6 files changed, 148 insertions(+), 72 deletions(-)

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 52f4246..bd2e098 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -12,6 +12,7 @@ extern C {
 
 #include assert.h
 #include stdio.h
+#include sys/queue.h
 #ifndef __cplusplus
 #include stdbool.h
 #endif
@@ -173,6 +174,14 @@ struct menu {
 #define MENU_CHANGED   0x0001
 #define MENU_ROOT  0x0002
 
+struct jump_key {
+   CIRCLEQ_ENTRY(jump_key) entries;
+   size_t offset;
+   struct menu *target;
+   int index;
+};
+CIRCLEQ_HEAD(jk_head, jump_key);
+
 #define JUMP_NB9
 
 extern struct file *file_list;
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 946c2cb3..1d1c085 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -21,10 +21,10 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu));
 P(menu_get_parent_menu,struct menu *,(struct menu *menu));
 P(menu_has_help,bool,(struct menu *menu));
 P(menu_get_help,const char *,(struct menu *menu));
-P(get_symbol_str, int, (struct gstr *r, struct symbol *sym, struct menu
-   **jumps, int jump_nb));
-P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct menu
-  **jumps));
+P(get_symbol_str, void, (struct gstr *r, struct symbol *sym, struct jk_head
+*head));
+P(get_relations_str, struct gstr, (struct symbol **sym_arr, struct jk_head
+  *head));
 P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help));
 
 /* symbol.c */
diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 8e7f43b..2a15c86 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -209,8 +209,13 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
-int dialog_textbox(const char *title, const char *file, int height, int width,
-  int *keys, int *_vscroll, int *_hscroll);
+
+
+typedef void (*update_text_fn)(char *buf, size_t start, size_t end, void
+  *_data);
+int dialog_textbox(const char *title, char *tbuf, int initial_height,
+  int initial_width, int *keys, int *_vscroll, int *_hscroll,
+  update_text_fn update_text, void *data);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 3b3c5c4..a48bb93 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -22,23 +22,25 @@
 #include dialog.h
 
 static void back_lines(int n);
-static void print_page(WINDOW * win, int height, int width);
-static void print_line(WINDOW * win, int row, int width);
+static void print_page(WINDOW *win, int height, int width, update_text_fn
+  update_text, void *data);
+static void print_line(WINDOW *win, int row, int width);
 static char *get_line(void);
 static void print_position(WINDOW * win);
 
 static int hscroll;
 static int begin_reached, end_reached, page_length;
-static const char *buf;
-static const char *page;
+static char *buf;
+static char *page;
 
 /*
  * refresh window content
  */
 static void refresh_text_box(WINDOW *dialog, WINDOW *box, int boxh, int boxw,
- int cur_y, int cur_x)
+int cur_y, int cur_x, update_text_fn update_text,
+void *data)
 {
-   print_page(box, boxh, boxw);
+   print_page(box, boxh, boxw, update_text, data);
print_position(dialog);
wmove(dialog, cur_y, cur_x);/* Restore cursor position */
wrefresh(dialog);
@@ -49,9 +51,11 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, 
int boxh, int boxw,
  * Display text from a file in a dialog box.
  *
  * keys is a null-terminated array
+ * update_text() may not add or remove any '\n' or '\0' in tbuf

[PATCH v2 3/6] menuconfig: Extend dialog_textbox so that it can return to a scrolled position

2012-08-23 Thread Benjamin Poirier

We can now display other UI elements (menus) on top of a textbox and then
seemingly come back to it in the same state it was left.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/dialog.h  |2 +-
 scripts/kconfig/lxdialog/textbox.c |   24 +++-
 scripts/kconfig/mconf.c|8 
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index 014c029..8e7f43b 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -210,7 +210,7 @@ int dialog_yesno(const char *title, const char *prompt, int 
height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
 int dialog_textbox(const char *title, const char *file, int height, int width,
-  int *keys);
+  int *keys, int *_vscroll, int *_hscroll);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index eb4ee92..506a095 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -51,7 +51,7 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, int 
boxh, int boxw,
  * keys is a null-terminated array
  */
 int dialog_textbox(const char *title, const char *tbuf, int initial_height,
-  int initial_width, int *keys)
+  int initial_width, int *keys, int *_vscroll, int *_hscroll)
 {
int i, x, y, cur_x, cur_y, key = 0;
int height, width, boxh, boxw;
@@ -65,6 +65,15 @@ int dialog_textbox(const char *title, const char *tbuf, int 
initial_height,
buf = tbuf;
page = buf; /* page is pointer to start of page to be displayed */
 
+   if (_vscroll  *_vscroll) {
+   begin_reached = 0;
+
+   for (i = 0; i  *_vscroll; i++)
+   get_line();
+   }
+   if (_hscroll)
+   hscroll = *_hscroll;
+
 do_resize:
getmaxyx(stdscr, height, width);
if (height  8 || width  8)
@@ -275,6 +284,19 @@ do_resize:
}
delwin(box);
delwin(dialog);
+   if (_vscroll) {
+   const char *s;
+
+   s = buf;
+   *_vscroll = 0;
+   back_lines(page_length);
+   while (s  page  (s = strchr(s, '\n'))) {
+   (*_vscroll)++;
+   s++;
+   }
+   }
+   if (_hscroll)
+   *_hscroll = hscroll;
return key;
 }
 
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 116e5da..c57cc6a 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -281,7 +281,7 @@ static void conf_string(struct menu *menu);
 static void conf_load(void);
 static void conf_save(void);
 static int show_textbox_ext(const char *title, const char *text, int r, int c,
-   int *keys);
+   int *keys, int *vscroll, int *hscroll);
 static void show_textbox(const char *title, const char *text, int r, int c);
 static void show_helptext(const char *title, const char *text);
 static void show_help(struct menu *menu);
@@ -622,15 +622,15 @@ static void conf(struct menu *menu)
 }
 
 static int show_textbox_ext(const char *title, const char *text, int r, int c,
-   int *keys)
+   int *keys, int *vscroll, int *hscroll)
 {
dialog_clear();
-   return dialog_textbox(title, text, r, c, keys);
+   return dialog_textbox(title, text, r, c, keys, vscroll, hscroll);
 }
 
 static void show_textbox(const char *title, const char *text, int r, int c)
 {
-   show_textbox_ext(title, text, r, c, (int []) {0});
+   show_textbox_ext(title, text, r, c, (int []) {0}, NULL, NULL);
 }
 
 static void show_helptext(const char *title, const char *text)
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/6] menuconfig: Extend dialog_textbox so that it can exit on arbitrary keypresses

2012-08-23 Thread Benjamin Poirier

The caller will be able to perform actions based on hotkeys in the displayed
text.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/dialog.h  |3 ++-
 scripts/kconfig/lxdialog/textbox.c |   31 +--
 scripts/kconfig/mconf.c|   12 ++--
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/scripts/kconfig/lxdialog/dialog.h 
b/scripts/kconfig/lxdialog/dialog.h
index b5211fc..014c029 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -209,7 +209,8 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
  int width, int pause);
-int dialog_textbox(const char *title, const char *file, int height, int width);
+int dialog_textbox(const char *title, const char *file, int height, int width,
+  int *keys);
 int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 264a2b9..eb4ee92 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -47,14 +47,16 @@ static void refresh_text_box(WINDOW *dialog, WINDOW *box, 
int boxh, int boxw,
 
 /*
  * Display text from a file in a dialog box.
+ *
+ * keys is a null-terminated array
  */
-int dialog_textbox(const char *title, const char *tbuf,
-  int initial_height, int initial_width)
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+  int initial_width, int *keys)
 {
int i, x, y, cur_x, cur_y, key = 0;
int height, width, boxh, boxw;
-   int passed_end;
WINDOW *dialog, *box;
+   bool done = false;
 
begin_reached = 1;
end_reached = 0;
@@ -122,7 +124,7 @@ do_resize:
attr_clear(box, boxh, boxw, dlg.dialog.atr);
refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
 
-   while ((key != KEY_ESC)  (key != '\n')) {
+   while (!done) {
key = wgetch(dialog);
switch (key) {
case 'E':   /* Exit */
@@ -130,9 +132,9 @@ do_resize:
case 'X':
case 'x':
case 'q':
-   delwin(box);
-   delwin(dialog);
-   return 0;
+   case '\n':
+   done = true;
+   break;
case 'g':   /* First page */
case KEY_HOME:
if (!begin_reached) {
@@ -156,6 +158,8 @@ do_resize:
case 'k':
case KEY_UP:
if (!begin_reached) {
+   int passed_end = 0;
+
back_lines(page_length + 1);
 
/* We don't call print_page() here but use
@@ -169,7 +173,6 @@ do_resize:
wscrl(box, -1); /* Scroll box region down one 
line */
scrollok(box, FALSE);
page_length = 0;
-   passed_end = 0;
for (i = 0; i  boxh; i++) {
if (!i) {
/* print first line of page */
@@ -252,7 +255,8 @@ do_resize:
 cur_y, cur_x);
break;
case KEY_ESC:
-   key = on_key_esc(dialog);
+   if (on_key_esc(dialog) == KEY_ESC)
+   done = true;
break;
case KEY_RESIZE:
back_lines(height);
@@ -260,11 +264,18 @@ do_resize:
delwin(dialog);
on_key_resize();
goto do_resize;
+   default:
+   for (i = 0; keys[i]; i++) {
+   if (key == keys[i]) {
+   done = true;
+   break;
+   }
+   }
}
}
delwin(box);
delwin(dialog);
-   return key; /* ESC pressed */
+   return key;
 }
 
 /*
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index f584a28..116e5da 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -280,6 +280,8 @@ static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
 static void conf_load(void);
 static void conf_save(void);
+static int show_textbox_ext(const char *title

Re: [PATCH v2 0/6] menuconfig: jump to search results

2012-08-24 Thread Benjamin Poirier

On 2012/08/24 17:49, Borislav Petkov wrote:
 On Thu, Aug 23, 2012 at 02:55:02PM -0400, Benjamin Poirier wrote:
  This patch series adds jump to keys (similar to the cscope interface) to 
  the
  search results of make menuconfig so that we can go directly to the menu
  entry for a config option after searching for it.
  
  Patches 1-4 implement the basic functionality.
  Patches 5-6 are an optional improvement.
  
  Changes v1-v2:
  * add a short description to patches 2  3
  * correct checkpatch style warnings on patch 6
  
  It should be targeted for v3.7
 
 Ok, did a quick test on -rc3 here. Is it a feature that if I do a
 search, go to the result, then do a search again and go to another
 result from the second search... and do that a couple of times, clicking
 on Exit at the lower part of the menu returns me back to all the
 searches I had done? Much like a stack.
 
 I mean, I don't mind it that way, it is workable - I'm just asking :).

That's how it's meant, yes. As stated in the log for patch 4:
menuconfig: Add jump keys to search results
[...] After exiting this menu, the user is returned to the
search results where he may jump further in or elsewhere.
... and after exiting the search results the user is returned to the
menu where he was before performing the search. That's the current
behavior.

A likely alternative would be that jumping to a menu entry would exit
the search results, forget the current position in the menus and go to
the new one as if the user had navigated to it from the root menu.

The current mconf.c code structure lends itself much better to the
stacked way of doing things. Each navigation to a deeper menu
corresponds to a nested conf() call. To do it the second way we'd have
to add logic to exit these nested conf() calls and redo new ones without
user intervention or change how it's handled altogether.

It seemed to me that choosing one approach over the other was debatable
UI-wise but that the first approach was definitely simpler code-wise.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/6] menuconfig: jump to search results

2012-08-02 Thread Benjamin Poirier

On 2012/08/02 15:32, Borislav Petkov wrote:
 On Tue, Jul 31, 2012 at 09:17:51AM -0400, Benjamin Poirier wrote:
   * when searching for DELAY for example, I get results without numbers
   to which I can't jump to:
   
   ...
 │ Symbol: DEFAULT_IO_DELAY_TYPE [=0]
 │ Type  : integer
 │
 │
 │ Symbol: IO_DELAY_TYPE_0X80 [=0]
 │ Type  : integer
   ...
   
   maybe because they don't have ---help--- section?
  
  Because they don't have a prompt - they don't appear in any menu at all.
  In that case there's nowhere to jump to. That's why those symbols don't
  get a number.
 
 Right, so how about you don't show them at all then?

It's a search for CONFIG_ symbols not menus. Even though the user
doesn't have direct control over some symbols, it's still useful to know
they exist and (possibly) what enables them. For example, searching for
LOCKDEP tells me I can enable DEBUG_LOCK_ALLOC to get it.

Besides, this patchset to add the jumps keys didn't change that behavior at
all. You're gonna get the same search results with the current menuconfig.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] menuconfig: make keys u/d move to the previous/next dialog page

2012-07-24 Thread Benjamin Poirier

... just like less(1) for example.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 154c2dd..805200f 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -190,6 +190,7 @@ do_resize:
break;
case 'B':   /* Previous page */
case 'b':
+   case 'u':
case KEY_PPAGE:
if (begin_reached)
break;
@@ -214,6 +215,7 @@ do_resize:
break;
case KEY_NPAGE: /* Next page */
case ' ':
+   case 'd':
if (end_reached)
break;
 
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/2] nconf: add u, d command keys in scroll windows

2012-07-24 Thread Benjamin Poirier

They function just like they do in less(1).
Also correct some discrepancy between the help text and the code wrt
function keys.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/nconf.c |6 +++---
 scripts/kconfig/nconf.gui.c |8 +---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index 8c0eb65..8087e4d 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -83,10 +83,10 @@ static const char nconf_readme[] = N_(
 Text Box(Help Window)\n
 \n
 o  Use the cursor keys to scroll up/down/left/right.  The VI editor\n
-   keys h,j,k,l function here as do SPACE BAR for those\n
-   who are familiar with less and lynx.\n
+   keys h,j,k,l function here as do u, d and SPACE BAR for\n
+   those who are familiar with less and lynx.\n
 \n
-o  Press Enter, F1, F5, F7 or Esc to exit.\n
+o  Press Enter, F1, F5, F9, q or Esc to exit.\n
 \n
 \n
 Alternate Configuration Files\n
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c
index 3b18dd8..379003c 100644
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -604,9 +604,11 @@ void show_scroll_win(WINDOW *main_window,
switch (res) {
case KEY_NPAGE:
case ' ':
+   case 'd':
start_y += text_lines-2;
break;
case KEY_PPAGE:
+   case 'u':
start_y -= text_lines+2;
break;
case KEY_HOME:
@@ -632,10 +634,10 @@ void show_scroll_win(WINDOW *main_window,
start_x++;
break;
}
-   if (res == 10 || res == 27 || res == 'q'
-   || res == KEY_F(F_BACK) || res == KEY_F(F_EXIT)) {
+   if (res == 10 || res == 27 || res == 'q' ||
+   res == KEY_F(F_HELP) || res == KEY_F(F_BACK) ||
+   res == KEY_F(F_EXIT))
break;
-   }
if (start_y  0)
start_y = 0;
if (start_y = total_lines-text_lines)
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/2] menuconfig: add u, d, q command keys in text boxes

2012-07-24 Thread Benjamin Poirier

They function just like they do in less(1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 scripts/kconfig/lxdialog/textbox.c |3 +++
 scripts/kconfig/mconf.c|6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/scripts/kconfig/lxdialog/textbox.c 
b/scripts/kconfig/lxdialog/textbox.c
index 154c2dd..4e5de60 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -129,6 +129,7 @@ do_resize:
case 'e':
case 'X':
case 'x':
+   case 'q':
delwin(box);
delwin(dialog);
return 0;
@@ -190,6 +191,7 @@ do_resize:
break;
case 'B':   /* Previous page */
case 'b':
+   case 'u':
case KEY_PPAGE:
if (begin_reached)
break;
@@ -214,6 +216,7 @@ do_resize:
break;
case KEY_NPAGE: /* Next page */
case ' ':
+   case 'd':
if (end_reached)
break;
 
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index f606738..f584a28 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -105,10 +105,10 @@ static const char mconf_readme[] = N_(
 Text Box(Help Window)\n
 \n
 o  Use the cursor keys to scroll up/down/left/right.  The VI editor\n
-   keys h,j,k,l function here as do SPACE BAR and B for those\n
-   who are familiar with less and lynx.\n
+   keys h,j,k,l function here as do u, d, SPACE BAR and B for \n
+   those who are familiar with less and lynx.\n
 \n
-o  Press E, X, Enter or EscEsc to exit.\n
+o  Press E, X, q, Enter or EscEsc to exit.\n
 \n
 \n
 Alternate Configuration Files\n
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH]kconfig/menu.c: fix uninitialized variable warning

2013-09-19 Thread Benjamin Poirier

On 2013/09/19 12:58, Madhavan Srinivasan wrote:
 In file included from scripts/kconfig/zconf.tab.c:2537:0:
 scripts/kconfig/menu.c: In function ‘get_symbol_str’:
 scripts/kconfig/menu.c:586:18: warning: ‘jump’ may be used uninitialized in
 this function [-Wmaybe-uninitialized]
   ^

from gcc(1):
These warnings are made optional because GCC is not smart
enough to see all the reasons why the code might be correct
despite appearing to have an error.

I do not see this warning with gcc 4.7.2 or 4.8.1. Which version are you
using?

The code does:

static void get_prompt_str(struct gstr *r, struct property *prop,
[...]
if (head  location) {
jump = xmalloc(sizeof(struct jump_key));
[... different basic block ...]
if (head  location  menu == location)
jump-offset = r-len - 1;

If the second if is true, then the first one was true as well, and
jump is initilized.

  jump-offset = r-len - 1;
   ^
 scripts/kconfig/menu.c:547:19: note: ‘jump’ was declared here
   struct jump_key *jump;
^
 Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com
 ---
  scripts/kconfig/menu.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
 index c1d5320..23b1827 100644
 --- a/scripts/kconfig/menu.c
 +++ b/scripts/kconfig/menu.c
 @@ -544,7 +544,7 @@ static void get_prompt_str(struct gstr *r, struct 
 property *prop,
  {
   int i, j;
   struct menu *submenu[8], *menu, *location = NULL;
 - struct jump_key *jump;
 + struct jump_key *jump = NULL;
 
   str_printf(r, _(Prompt: %s\n), _(prop-text));
   menu = prop-menu-parent;
 --
 1.7.10.4
 
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH]kconfig/menu.c: fix uninitialized variable warning

2013-09-19 Thread Benjamin Poirier

On 2013/09/19 19:27, Yann E. MORIN wrote:
 Benjamin, Madhavan, All,
 
 On 2013-09-19 11:22 -0400, Benjamin Poirier spake thusly:
  On 2013/09/19 12:58, Madhavan Srinivasan wrote:
   In file included from scripts/kconfig/zconf.tab.c:2537:0:
   scripts/kconfig/menu.c: In function ‘get_symbol_str’:
   scripts/kconfig/menu.c:586:18: warning: ‘jump’ may be used uninitialized 
   in
   this function [-Wmaybe-uninitialized]
 ^
  
  from gcc(1):
  These warnings are made optional because GCC is not smart
  enough to see all the reasons why the code might be correct
  despite appearing to have an error.
  
  I do not see this warning with gcc 4.7.2 or 4.8.1. Which version are you
  using?
  
  The code does:
  
  static void get_prompt_str(struct gstr *r, struct property *prop,
  [...]
  if (head  location) {
  jump = xmalloc(sizeof(struct jump_key));
  [... different basic block ...]
  if (head  location  menu == location)
  jump-offset = r-len - 1;
  
  If the second if is true, then the first one was true as well, and
  jump is initilized.
 
 Exactly what I was going to say.
 
 However:
   - I believe we should strive for a warning-free code whenever possible
   - while still getting help from gcc to pinpoint potential issues.
 
 Clearly, gcc is wrong here. Setting jump to NULL will cause fault if we
 try to dereference it. Since this should never happen given the code as
 it currently is, I'm slightly in favour of acking this patch.
 
 Any other reason not to apply it?

Did you manage to get that warning as well? I didn't. If it's only from
old versions of gcc I'd feel better leaving the warning there than
masking it, in case a future code change does really introduce a use
before initialize.

 
jump-offset = r-len - 1;
 ^
   scripts/kconfig/menu.c:547:19: note: ‘jump’ was declared here
 struct jump_key *jump;
  ^
   Signed-off-by: Madhavan Srinivasan ma...@linux.vnet.ibm.com
   ---
scripts/kconfig/menu.c |2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
   
   diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
   index c1d5320..23b1827 100644
   --- a/scripts/kconfig/menu.c
   +++ b/scripts/kconfig/menu.c
   @@ -544,7 +544,7 @@ static void get_prompt_str(struct gstr *r, struct 
   property *prop,
{
 int i, j;
 struct menu *submenu[8], *menu, *location = NULL;
   - struct jump_key *jump;
   + struct jump_key *jump = NULL;
   
 str_printf(r, _(Prompt: %s\n), _(prop-text));
 menu = prop-menu-parent;
   --
   1.7.10.4
   
 
 Regards,
 Yann E. MORIN.
 
 -- 
 .-..--..
 |  Yann E. MORIN  | Real-Time Embedded | /\ ASCII RIBBON | Erics' 
 conspiracy: |
 | +33 662 376 056 | Software  Designer | \ / CAMPAIGN |  ___  
  |
 | +33 223 225 172 `.---:  X  AGAINST  |  \e/  There is no 
  |
 | http://ymorin.is-a-geek.org/ | _/*\_ | / \ HTML MAIL|   v   conspiracy. 
  |
 '--^---^--^'
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] netiucv: Hold rtnl between name allocation and device registration.

2013-06-13 Thread Benjamin Poirier

fixes a race condition between concurrent initializations of netiucv devices
that try to use the same name.

sysfs: cannot create duplicate filename '/devices/iucv/netiucv2'
[...]
Call Trace:
([002edea4] sysfs_add_one+0xb0/0xdc)
 [002eecd4] create_dir+0x80/0xfc
 [002eee38] sysfs_create_dir+0xe8/0x118
 [003835a8] kobject_add_internal+0x120/0x2d0
 [003839d6] kobject_add+0x62/0x9c
 [003d9564] device_add+0xcc/0x510
 [03e00212c7b4] netiucv_register_device+0xc0/0x1ec [netiucv]

Signed-off-by: Benjamin Poirier bpoir...@suse.de
Tested-by: Ursula Braun bra...@de.ibm.com
---
 drivers/s390/net/netiucv.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 4ffa66c..9ca3996 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2040,6 +2040,7 @@ static struct net_device *netiucv_init_netdevice(char 
*username, char *userdata)
   netiucv_setup_netdevice);
if (!dev)
return NULL;
+   rtnl_lock();
if (dev_alloc_name(dev, dev-name)  0)
goto out_netdev;
 
@@ -2061,6 +2062,7 @@ static struct net_device *netiucv_init_netdevice(char 
*username, char *userdata)
 out_fsm:
kfree_fsm(privptr-fsm);
 out_netdev:
+   rtnl_unlock();
free_netdev(dev);
return NULL;
 }
@@ -2100,6 +2102,7 @@ static ssize_t conn_write(struct device_driver *drv,
 
rc = netiucv_register_device(dev);
if (rc) {
+   rtnl_unlock();
IUCV_DBF_TEXT_(setup, 2,
ret %d from netiucv_register_device\n, rc);
goto out_free_ndev;
@@ -2109,7 +2112,8 @@ static ssize_t conn_write(struct device_driver *drv,
priv = netdev_priv(dev);
SET_NETDEV_DEV(dev, priv-dev);
 
-   rc = register_netdev(dev);
+   rc = register_netdevice(dev);
+   rtnl_unlock();
if (rc)
goto out_unreg;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/3] unix/dgram: peek beyond 0-sized skbs

2013-04-29 Thread Benjamin Poirier

77c1090 net: fix infinite loop in __skb_recv_datagram() (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().

When peeking at an offset with 0-sized skb(s), each one of those is received
only once, in sequence. The offset starts moving forward again after receiving
datagrams with len  0.

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

* v1 fix the case when SO_PEEK_OFF is used to set sk_peek_off beyond a
  0-sized skb

* v2 also fix the situation when sk_peek_off must advance to and beyond a
  0-sized skb

 net/core/datagram.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..99c4f52 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  skb-len) {
+   if (*off = skb-len  (skb-len || *off ||
+skb-peeked)) {
*off -= skb-len;
continue;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/3] unix/stream: fix peeking with an offset larger than data in queue

2013-04-29 Thread Benjamin Poirier

Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/unix/af_unix.c |   25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
 }
 
 /*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
  */
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
 {
DEFINE_WAIT(wait);
 
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
 
-   if (!skb_queue_empty(sk-sk_receive_queue) ||
+   if (skb_peek_tail(sk-sk_receive_queue) != last ||
sk-sk_err ||
(sk-sk_shutdown  RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
return timeo;
 }
 
-
-
 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
   struct msghdr *msg, size_t size,
   int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, 
struct socket *sock,
goto out;
}
 
-   skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
 
unix_state_lock(sk);
-   skb = skb_peek(sk-sk_receive_queue);
+   last = skb = skb_peek(sk-sk_receive_queue);
 again:
if (skb == NULL) {
unix_sk(sk)-recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(u-readlock);
 
-   timeo = unix_stream_data_wait(sk, timeo);
+   timeo = unix_stream_data_wait(sk, timeo, last);
 
if (signal_pending(current)
||  mutex_lock_interruptible(u-readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
 
-   if (skip = skb-len) {
+   skip = sk_peek_offset(sk, flags);
+   while (skip = skb-len) {
skip -= skb-len;
+   last = skb;
skb = skb_peek_next(skb, sk-sk_receive_queue);
-   goto again;
+   if (!skb)
+   goto again;
}
 
unix_state_unlock(sk);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/3] unix/dgram: fix peeking with an offset larger than data in queue

2013-04-29 Thread Benjamin Poirier

Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

v2: address review feedback from Eric Dumazet

v3: address review feedback from Cong Wang

 net/core/datagram.c |   21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 99c4f52..b5d48ac 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, 
unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
 }
 /*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
  */
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+const struct sk_buff *skb)
 {
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long 
*timeo_p)
if (error)
goto out_err;
 
-   if (!skb_queue_empty(sk-sk_receive_queue))
+   if (sk-sk_receive_queue.prev != skb)
goto out;
 
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
  * __skb_recv_datagram - Receive a datagram skbuff
  * @sk: socket
  * @flags: MSG_ flags
+ * @peeked: returns non-zero if this packet has been seen before
  * @off: an offset in bytes to peek skb from. Returns an offset
  *   within an skb where data actually starts
- * @peeked: returns non-zero if this packet has been seen before
  * @err: error code returned
  *
  * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -161,7 +162,7 @@ out_noerr:
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
 {
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
long timeo;
/*
 * Caller is allowed not to check sk-sk_err before skb_recv_datagram()
@@ -182,14 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
 */
unsigned long cpu_flags;
struct sk_buff_head *queue = sk-sk_receive_queue;
+   int _off = *off;
 
+   last = (struct sk_buff *)queue;
spin_lock_irqsave(queue-lock, cpu_flags);
skb_queue_walk(queue, skb) {
+   last = skb;
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  (skb-len || *off ||
+   if (_off = skb-len  (skb-len || _off ||
 skb-peeked)) {
-   *off -= skb-len;
+   _off -= skb-len;
continue;
}
skb-peeked = 1;
@@ -198,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
__skb_unlink(skb, queue);
 
spin_unlock_irqrestore(queue-lock, cpu_flags);
+   *off = _off;
return skb;
}
spin_unlock_irqrestore(queue-lock, cpu_flags);
@@ -207,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
if (!timeo)
goto no_packet;
 
-   } while (!wait_for_packet(sk, err, timeo));
+   } while (!wait_for_more_packets(sk, err, timeo, last));
 
return NULL;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net 1/3] unix/dgram: peek beyond 0-sized skbs

2013-04-25 Thread Benjamin Poirier

77c1090 net: fix infinite loop in __skb_recv_datagram() (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/core/datagram.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..02398ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  skb-len) {
+   if (*off = skb-len  (skb-len || *off)) {
*off -= skb-len;
continue;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net 3/3] unix/stream: fix peeking with an offset larger than data in queue

2013-04-25 Thread Benjamin Poirier

Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/unix/af_unix.c |   25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
 }
 
 /*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
  */
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
 {
DEFINE_WAIT(wait);
 
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
 
-   if (!skb_queue_empty(sk-sk_receive_queue) ||
+   if (skb_peek_tail(sk-sk_receive_queue) != last ||
sk-sk_err ||
(sk-sk_shutdown  RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
return timeo;
 }
 
-
-
 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
   struct msghdr *msg, size_t size,
   int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, 
struct socket *sock,
goto out;
}
 
-   skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
 
unix_state_lock(sk);
-   skb = skb_peek(sk-sk_receive_queue);
+   last = skb = skb_peek(sk-sk_receive_queue);
 again:
if (skb == NULL) {
unix_sk(sk)-recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(u-readlock);
 
-   timeo = unix_stream_data_wait(sk, timeo);
+   timeo = unix_stream_data_wait(sk, timeo, last);
 
if (signal_pending(current)
||  mutex_lock_interruptible(u-readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
 
-   if (skip = skb-len) {
+   skip = sk_peek_offset(sk, flags);
+   while (skip = skb-len) {
skip -= skb-len;
+   last = skb;
skb = skb_peek_next(skb, sk-sk_receive_queue);
-   goto again;
+   if (!skb)
+   goto again;
}
 
unix_state_unlock(sk);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net 2/3] unix/dgram: fix peeking with an offset larger than data in queue

2013-04-25 Thread Benjamin Poirier

Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/core/datagram.c |   25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 02398ae..6c502b5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, 
unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
 }
 /*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
  */
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+struct sk_buff *skb)
 {
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long 
*timeo_p)
if (error)
goto out_err;
 
-   if (!skb_queue_empty(sk-sk_receive_queue))
+   if ((struct sk_buff *)sk-sk_receive_queue.prev != skb)
goto out;
 
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
  * __skb_recv_datagram - Receive a datagram skbuff
  * @sk: socket
  * @flags: MSG_ flags
- * @off: an offset in bytes to peek skb from. Returns an offset
- *   within an skb where data actually starts
  * @peeked: returns non-zero if this packet has been seen before
+ * @_off: an offset in bytes to peek skb from. Returns an offset
+ *within an skb where data actually starts
  * @err: error code returned
  *
  * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -159,9 +160,9 @@ out_noerr:
  * the standard around please.
  */
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
-   int *peeked, int *off, int *err)
+   int *peeked, int *_off, int *err)
 {
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
long timeo;
/*
 * Caller is allowed not to check sk-sk_err before skb_recv_datagram()
@@ -182,13 +183,16 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
 */
unsigned long cpu_flags;
struct sk_buff_head *queue = sk-sk_receive_queue;
+   int off = *_off;
 
spin_lock_irqsave(queue-lock, cpu_flags);
+   last = (struct sk_buff *)queue;
skb_queue_walk(queue, skb) {
+   last = skb;
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  (skb-len || *off)) {
-   *off -= skb-len;
+   if (off = skb-len  (skb-len || off)) {
+   off -= skb-len;
continue;
}
skb-peeked = 1;
@@ -197,6 +201,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
__skb_unlink(skb, queue);
 
spin_unlock_irqrestore(queue-lock, cpu_flags);
+   *_off = off;
return skb;
}
spin_unlock_irqrestore(queue-lock, cpu_flags);
@@ -206,7 +211,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
if (!timeo)
goto no_packet;
 
-   } while (!wait_for_packet(sk, err, timeo));
+   } while (!wait_for_more_packets(sk, err, timeo, last));
 
return NULL;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH net 1/3] unix/dgram: peek beyond 0-sized skbs

2013-04-26 Thread Benjamin Poirier

On 2013/04/25 11:48, Eric Dumazet wrote:
 On Thu, 2013-04-25 at 09:47 -0400, Benjamin Poirier wrote:
  77c1090 net: fix infinite loop in __skb_recv_datagram() (v3.8) introduced 
  a
  regression:
  After that commit, recv can no longer peek beyond a 0-sized skb in the 
  queue.
  __skb_recv_datagram() instead stops at the first skb with len == 0 and 
  results
  in the system call failing with -EFAULT via skb_copy_datagram_iovec().
 
 
 if MSG_PEEK is not used, what happens here ?

I'm not sure what you're question is aiming at, but if MSG_PEEK isn't used,
there's no difference with regards to this patch. It's all in the if (flags 
MSG_PEEK) block.

More generally, without MSG_PEEK, a sequence of
send(..., len=10, ...); send(len=0); send(len=20)
results in
recv()=10; recv()=0; recv()=20; recv()= /* blocks */

With flags=MSG_PEEK, a sequence of
send(len=10); send(len=0); send(len=20)
resulted (without any patch) in
setsockopt(..., SO_PEEK_OFF - 0);
recv()=10; recv()=0; recv()=0; recv()=0; ...
and with v2 of the patch, results in
setsockopt(..., SO_PEEK_OFF - 0);
recv()=10; recv()=0; recv()=20; recv()= /* blocks */

We could also have the following sequence
setsockopt(..., SO_PEEK_OFF - 10);
recv()=0; recv()=20; recv()= /* blocks */
or
setsockopt(..., SO_PEEK_OFF - 5);
recv()=5; recv()=0; recv()=20; recv()= /* blocks */
or the unfortunate
setsockopt(..., SO_PEEK_OFF - 0);
recv()=10; recv()=0; recv()=20;
setsockopt(..., SO_PEEK_OFF - 0);
recv()=10; ; recv()=20; recv()= /* blocks */

That last one could be changed by resetting the skb-peeked flag for all
buffers the queue during sock_setsockopt SO_PEEK_OFF. If you think it's better
that way.

 
 It doesn't look right to me that we return -EFAULT if skb-len is 0,
 EFAULT is reserved to faulting (ie reading/writing at least one byte)

That's what happens when skb_copy_datagram_iovec() is asked to copy  0 bytes
out of a skb with len == 0.

Perhaps skb_copy_datagram_iovec() should be changed to use EINVAL in that case
but we can avoid that kind of call altogether by fixing the problem with
MSG_PEEK.

 
 How are we telling the user message had 0 byte, but its not EOF ?
 

We aren't, but what's EOF on a datagram socket?

Thank you for the review.


Subject: [PATCH net v2 1/3] unix/dgram: peek beyond 0-sized skbs

77c1090 net: fix infinite loop in __skb_recv_datagram() (v3.8) introduced a
regression:
After that commit, recv can no longer peek beyond a 0-sized skb in the queue.
__skb_recv_datagram() instead stops at the first skb with len == 0 and results
in the system call failing with -EFAULT via skb_copy_datagram_iovec().

When peeking at an offset with 0-sized skb(s), each one of those is received
only once, in sequence. The offset starts moving forward again after receiving
datagrams with len  0.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

* v2 also fix the situation when sk_peek_off must advance to and beyond a
  0-sized skb

* v1 fix the case when SO_PEEK_OFF is used to set sk_peek_off beyond a
  0-sized skb

 net/core/datagram.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..99c4f52 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -187,7 +187,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
skb_queue_walk(queue, skb) {
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  skb-len) {
+   if (*off = skb-len  (skb-len || *off ||
+skb-peeked)) {
*off -= skb-len;
continue;
}
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v2 3/3] unix/stream: fix peeking with an offset larger than data in queue

2013-04-26 Thread Benjamin Poirier

Currently, peeking on a unix stream socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data.

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/unix/af_unix.c |   25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d..1a02af0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1859,10 +1859,10 @@ out:
 }
 
 /*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
  */
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
 {
DEFINE_WAIT(wait);
 
@@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
 
-   if (!skb_queue_empty(sk-sk_receive_queue) ||
+   if (skb_peek_tail(sk-sk_receive_queue) != last ||
sk-sk_err ||
(sk-sk_shutdown  RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long 
timeo)
return timeo;
 }
 
-
-
 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
   struct msghdr *msg, size_t size,
   int flags)
@@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, 
struct socket *sock,
goto out;
}
 
-   skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
 
unix_state_lock(sk);
-   skb = skb_peek(sk-sk_receive_queue);
+   last = skb = skb_peek(sk-sk_receive_queue);
 again:
if (skb == NULL) {
unix_sk(sk)-recursion_level = 0;
@@ -1966,7 +1962,7 @@ again:
break;
mutex_unlock(u-readlock);
 
-   timeo = unix_stream_data_wait(sk, timeo);
+   timeo = unix_stream_data_wait(sk, timeo, last);
 
if (signal_pending(current)
||  mutex_lock_interruptible(u-readlock)) {
@@ -1980,10 +1976,13 @@ again:
break;
}
 
-   if (skip = skb-len) {
+   skip = sk_peek_offset(sk, flags);
+   while (skip = skb-len) {
skip -= skb-len;
+   last = skb;
skb = skb_peek_next(skb, sk-sk_receive_queue);
-   goto again;
+   if (!skb)
+   goto again;
}
 
unix_state_unlock(sk);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v2 2/3] unix/dgram: fix peeking with an offset larger than data in queue

2013-04-26 Thread Benjamin Poirier

Currently, peeking on a unix datagram socket with an offset larger than len of
the data in the sk receive queue returns immediately with bogus data. That's
because *off is not reset between each skb_queue_walk().

This patch fixes this so that the behavior is the same as peeking with no
offset on an empty queue: the caller blocks.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

v2: address review feedback

 net/core/datagram.c |   27 ---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 99c4f52..1985c9a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, 
unsigned int mode, int syn
return autoremove_wake_function(wait, mode, sync, key);
 }
 /*
- * Wait for a packet..
+ * Wait for the last received packet to be different from skb
  */
-static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
+static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
+const struct sk_buff *skb)
 {
int error;
DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long 
*timeo_p)
if (error)
goto out_err;
 
-   if (!skb_queue_empty(sk-sk_receive_queue))
+   if (sk-sk_receive_queue.prev != skb)
goto out;
 
/* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
  * __skb_recv_datagram - Receive a datagram skbuff
  * @sk: socket
  * @flags: MSG_ flags
- * @off: an offset in bytes to peek skb from. Returns an offset
- *   within an skb where data actually starts
  * @peeked: returns non-zero if this packet has been seen before
+ * @_off: an offset in bytes to peek skb from. Returns an offset
+ *within an skb where data actually starts
  * @err: error code returned
  *
  * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -159,9 +160,9 @@ out_noerr:
  * the standard around please.
  */
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
-   int *peeked, int *off, int *err)
+   int *peeked, int *_off, int *err)
 {
-   struct sk_buff *skb;
+   struct sk_buff *skb, *last;
long timeo;
/*
 * Caller is allowed not to check sk-sk_err before skb_recv_datagram()
@@ -182,14 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
 */
unsigned long cpu_flags;
struct sk_buff_head *queue = sk-sk_receive_queue;
+   int off = *_off;
 
+   last = (struct sk_buff *)queue;
spin_lock_irqsave(queue-lock, cpu_flags);
skb_queue_walk(queue, skb) {
+   last = skb;
*peeked = skb-peeked;
if (flags  MSG_PEEK) {
-   if (*off = skb-len  (skb-len || *off ||
-skb-peeked)) {
-   *off -= skb-len;
+   if (off = skb-len  (skb-len || off ||
+   skb-peeked)) {
+   off -= skb-len;
continue;
}
skb-peeked = 1;
@@ -198,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
__skb_unlink(skb, queue);
 
spin_unlock_irqrestore(queue-lock, cpu_flags);
+   *_off = off;
return skb;
}
spin_unlock_irqrestore(queue-lock, cpu_flags);
@@ -207,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, 
unsigned int flags,
if (!timeo)
goto no_packet;
 
-   } while (!wait_for_packet(sk, err, timeo));
+   } while (!wait_for_more_packets(sk, err, timeo, last));
 
return NULL;
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 5/5] kconfig: fix bug in search results string: use strlen(gstr-s), not gstr-len

2013-10-07 Thread Benjamin Poirier

On 2013/10/03 19:25, Martin Walch wrote:
 From: Martin Walch walch.mar...@web.de
 Date: Thu, 3 Oct 2013 18:35:16 +0200
 Subject: [PATCH v2 5/5] kconfig: fix bug in search results string: use 
 strlen(gstr-s), not gstr-len
 

Good catch, thank you.

Acked-by: Benjamin Poirier bpoir...@suse.de
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH]kconfig/menu.c: fix uninitialized variable warning

2013-09-20 Thread Benjamin Poirier

On 2013/09/20 08:05, Madhavan Srinivasan wrote:
 On Friday 20 September 2013 01:34 AM, Yann E. MORIN wrote:
  Benjamin, All,
  
  On 2013-09-19 15:13 -0400, Benjamin Poirier spake thusly:
  On 2013/09/19 19:27, Yann E. MORIN wrote:
  Benjamin, Madhavan, All,
 
  On 2013-09-19 11:22 -0400, Benjamin Poirier spake thusly:
  On 2013/09/19 12:58, Madhavan Srinivasan wrote:
  In file included from scripts/kconfig/zconf.tab.c:2537:0:
  scripts/kconfig/menu.c: In function ‘get_symbol_str’:
  scripts/kconfig/menu.c:586:18: warning: ‘jump’ may be used 
  uninitialized in
  this function [-Wmaybe-uninitialized]
[...]
 
  I do not see this warning with gcc 4.7.2 or 4.8.1. Which version are you
  using?
 
[...]
  
  No, I was not able to reproduce it with either gcc-4.4.7, gcc-4.6.4 or
  gcc-4.7.3 on my Ubuntu 13.04. Neither gcc-4.5 nor 4.8 are packaged, so
  I could not test them.
  
  Anyway, -Wmaybe-uninitialized is new with 4.7. So, the warning can't
  happen with gcc  4.7.
  
  Madhavan, what gcc version are you using?
 gcc i have is gcc version 4.8.0 20130526

In that case, I don't have a strong opinion whether the patch should be
merged or not, but if it is merged, I think a comment should be added to
the code or the changelog stating that this is to work around a
deficiency observed in gcc 4.8.0.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] net: Do not enable tx-nocache-copy by default

2014-01-06 Thread Benjamin Poirier

There are many cases where this feature does not improve performance or even
reduces it. See the following discussion for example perf numbers:
http://thread.gmane.org/gmane.linux.network/298345

CC: Tom Herbert therb...@google.com
Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/core/dev.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 153ee2f..2e242583 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5783,13 +5783,8 @@ int register_netdevice(struct net_device *dev)
dev-features |= NETIF_F_SOFT_FEATURES;
dev-wanted_features = dev-features  dev-hw_features;
 
-   /* Turn on no cache copy if HW is doing checksum */
if (!(dev-flags  IFF_LOOPBACK)) {
dev-hw_features |= NETIF_F_NOCACHE_COPY;
-   if (dev-features  NETIF_F_ALL_CSUM) {
-   dev-wanted_features |= NETIF_F_NOCACHE_COPY;
-   dev-features |= NETIF_F_NOCACHE_COPY;
-   }
}
 
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2] net: Do not enable tx-nocache-copy by default

2014-01-07 Thread Benjamin Poirier

 elapsed

CC: Tom Herbert therb...@google.com
Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 net/core/dev.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4fc1722..0e82e77 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5831,13 +5831,8 @@ int register_netdevice(struct net_device *dev)
dev-features |= NETIF_F_SOFT_FEATURES;
dev-wanted_features = dev-features  dev-hw_features;
 
-   /* Turn on no cache copy if HW is doing checksum */
if (!(dev-flags  IFF_LOOPBACK)) {
dev-hw_features |= NETIF_F_NOCACHE_COPY;
-   if (dev-features  NETIF_F_ALL_CSUM) {
-   dev-wanted_features |= NETIF_F_NOCACHE_COPY;
-   dev-features |= NETIF_F_NOCACHE_COPY;
-   }
}
 
/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
-- 
1.8.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 2/4] driver core: enable drivers to use deferred probefrom init

2014-07-29 Thread Benjamin Poirier

On 2014/07/29 21:07, Tetsuo Handa wrote:
 Luis R. Rodriguez wrote:
  On Mon, Jul 28, 2014 at 5:35 PM, Greg KH gre...@linuxfoundation.org wrote:
   On Mon, Jul 28, 2014 at 05:26:34PM -0700, Luis R. Rodriguez wrote:
   To ignore SIGKILL ?
  
   Sorry, I thought this was a userspace change that caused this.
  
   As it's a kernel change, well, maybe that patch should be reverted...
  
  That's certainly viable. Oleg?
 
 I don't want to revert that patch.

I agree that 786235ee should not be reverted to fix the problem of
modules that receive sigkill from udev while they are initializing. In
fact, while it may fix the case that was reported with mptsas, it would
not fix cxgb4 because there are other code paths that check for pending
signals and that abort (ex. pci_vpd_pci22_wait()).

Reverting 786235ee effectively works around the problem by making
modprobe unkillable. The proper solution would be to make sure that udev
does not send sigkill to modprobe in the first place, either by making
the timeout longer or by making the module probe faster.

If you have other reasons for reverting 786235ee, then it's a different
story.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 3/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-26 Thread Benjamin Poirier

On 2014/08/22 16:17, Prashant Sreedharan wrote:
 Benjamin, thanks for the patch. Broadcom QA will be testing the changes.
 Couple of comments below.
  segs = skb_gso_segment(skb, tp-dev-features 
  ~(NETIF_F_TSO | NETIF_F_TSO6));
  -   if (IS_ERR(segs) || !segs)
  +   if (IS_ERR_OR_NULL(segs))
  goto tg3_tso_bug_end;
   
  do {
  +   unsigned int desc_cnt = skb_shinfo(segs)-nr_frags + 1;
  +
  nskb = segs;
  segs = segs-next;
  nskb-next = NULL;
  -   tg3_start_xmit(nskb, tp-dev);
  +
  +   if (tg3_tx_avail(tnapi) = segs_remaining - 1 + desc_cnt 
  +   skb_linearize(nskb)) {
  +   nskb-next = segs;
  +   segs = nskb;
  +   do {
  +   nskb = segs-next;
  +
  +   dev_kfree_skb_any(segs);
  +   segs = nskb;
  +   } while (segs);
 
 If skb_linearize() fails need to increment the tp-tx_dropped count

Sorry for the delay, while testing this error path I noticed a potential
problem. There should be an additional check here to stop the queue with
the default threshold. Otherwise, the netdev_err message at the start of
__tg3_start_xmit() could be triggered when the next frame is
transmitted. That is because the previous calls to __tg3_start_xmit() in
tg3_tso_bug() may have been using a stop_thresh=segs_remaining that is 
MAX_SKB_FRAGS + 1.

 
  +   goto tg3_tso_bug_end;
  +   }
  +   segs_remaining--;
  +   if (segs_remaining)
  +   __tg3_start_xmit(nskb, tp-dev, segs_remaining);
 
 To clarify passing segs_remaining will make sure the queue is never
 stopped correct ?

It makes sure the queue is not stopped before we are finished submitting
all gso segments.

This is what's alluded to in this part of the commit message:
This puts us in the exceptional situation that a single skb that
triggers tg3_tso_bug() may require the entire tx ring. [...]
Likewise, usually the tx queue is stopped as soon as an skb with
max frags may overrun it. Since the skbs submitted from
tg3_tso_bug() use a controlled number of descriptors, the tx
queue stop threshold may be lowered.

 
  +   else
  +   tg3_start_xmit(nskb, tp-dev);
  } while (segs);
   
 
 
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v3 1/4] tg3: Limit minimum tx queue wakeup threshold

2014-08-26 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v3 4/4] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-26 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, we instead take the approach of adjusting dev-gso_max_segs according
to the requested tx_pending size.

This puts us in the exceptional situation that a single skb that triggers
tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up
when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that
would be insufficient now. To avoid useless wakeups, the tx queue wake up
threshold is made dynamic. Likewise, usually the tx queue is stopped as soon
as an skb with max frags may overrun it. Since the skbs submitted from
tg3_tso_bug() use a controlled number of descriptors, the tx queue stop
threshold may be lowered.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

Changes v2-v3
* use tg3_maybe_stop_txq() instead of repeatedly open coding it
* add the requested tp-tx_dropped++ stat increase in tg3_tso_bug() if
  skb_linearize() fails and we must abort
* in the same code block, add an additional check to stop the queue with the
  default threshold. Otherwise, the netdev_err message at the start of
  __tg3_start_xmit() could be triggered when the next frame is transmitted.
  That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug()
  may have been using a stop_thresh=segs_remaining that is  MAX_SKB_FRAGS +
  1.

For v3, I repeated the same rr latency test I had done for v2. Once again, I
did not measure a significant impact.

* without patches
rr values: 6297.2 6851.71 6928.61 6907.2 6682.71 6808.54 6920.69 
6906.56 6890.48 6891.39
sample size: 10
mean: 6808.509
standard deviation: 194.1019
quantiles: 6297.2 6819.332 6890.935 6907.04 6928.61
6800±200

 Performance counter stats for 'netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- 
-d rr' (10 runs):

 480672.401297 task-clock#8.001 CPUs utilized   
 ( +-  0.01% ) [100.00%]
   840,080 context-switches  #0.002 M/sec   
 ( +-  0.88% ) [100.00%]
   598 CPU-migrations#0.000 M/sec   
 ( +- 10.27% ) [100.00%]
   552 page-faults   #0.000 M/sec   
 ( +- 81.33% )
   275,174,355,207 cycles#0.572 GHz 
 ( +-  7.02% ) [15.38%]
   791,022,327,544 stalled-cycles-frontend   #  287.46% frontend cycles idle
 ( +-  4.74% ) [24.88%]
   686,658,715,636 stalled-cycles-backend#  249.54% backend  cycles idle
 ( +-  4.93% ) [34.88%]
   114,236,655,920 instructions  #0.42  insns per cycle
 #6.92  stalled cycles per insn 
 ( +-  5.02% ) [44.88%]
25,562,621,872 branches  #   53.181 M/sec   
 ( +-  5.23% ) [50.00%]
   200,879,548 branch-misses #0.79% of all branches 
 ( +-  0.85% ) [50.00%]
27,266,292,729 L1-dcache-loads   #   56.725 M/sec   
 ( +-  4.94% ) [50.00%]
   360,072,063 L1-dcache-load-misses #1.32% of all L1-dcache hits   
 ( +-  0.39% ) [49.88%]
85,199,150 LLC-loads #0.177 M/sec   
 ( +-  1.20% ) [40.00%]
27,617 LLC-load-misses   #0.03% of all LL-cache hits
 ( +- 49.75% ) [ 5.00%]

  60.078218016 seconds time elapsed 
 ( +-  0.01% )

* with patches
rr values: 6849.21 6872.63 6848.53 6889.81 6889.85 6873.16 6831.34 
6918.74 6878.89 6908.51
sample

[PATCH net v3 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-08-26 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18
---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index b11c0fd..0cecd6d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12319,7 +12319,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS) ||
+   (ering-tx_pending = MAX_SKB_FRAGS + 1) ||
(tg3_flag(tp, TSO_BUG) 
 (ering-tx_pending = (MAX_SKB_FRAGS * 3
return -EINVAL;
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v3 3/4] tg3: Move tx queue stop logic to its own function

2014-08-26 Thread Benjamin Poirier

It is duplicated. Also, the first instance in tg3_start_xmit() is racy.
Consider:

tg3_start_xmit()
if budget = ...
tg3_tx()
(free up the entire ring)
tx_cons =
smp_mb
if queue_stopped and tx_avail, NO
if !queue_stopped
stop queue
return NETDEV_TX_BUSY

... tx queue stopped forever

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
Changes v2-v3
* new patch to avoid repeatedly open coding this block in the next patch.
---
 drivers/net/ethernet/broadcom/tg3.c | 69 ++---
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 0cecd6d..5d39554 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,29 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
+static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
+ struct netdev_queue *txq,
+ u32 stop_thresh, u32 wakeup_thresh)
+{
+   bool stopped = false;
+
+   if (unlikely(tg3_tx_avail(tnapi) = stop_thresh)) {
+   if (!netif_tx_queue_stopped(txq)) {
+   stopped = true;
+   netif_tx_stop_queue(txq);
+   BUG_ON(wakeup_thresh = tnapi-tx_pending);
+   }
+   /* netif_tx_stop_queue() must be done before checking tx index
+* in tg3_tx_avail(), because in tg3_tx(), we update tx index
+* before checking for netif_tx_queue_stopped().
+*/
+   smp_mb();
+   if (tg3_tx_avail(tnapi)  wakeup_thresh)
+   netif_tx_wake_queue(txq);
+   }
+   return stopped;
+}
+
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
  */
@@ -7841,20 +7864,9 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
/* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
-   netif_tx_stop_queue(txq);
-
-   /* netif_tx_stop_queue() must be done before checking
-* checking tx index in tg3_tx_avail() below, because in
-* tg3_tx(), we update tx index before checking for
-* netif_tx_queue_stopped().
-*/
-   smp_mb();
-   if (tg3_tx_avail(tnapi) = frag_cnt_est)
-   return NETDEV_TX_BUSY;
-
-   netif_tx_wake_queue(txq);
-   }
+   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
segs = skb_gso_segment(skb, tp-dev-features 
~(NETIF_F_TSO | NETIF_F_TSO6));
@@ -7902,16 +7914,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 * no IRQ context deadlocks to worry about either.  Rejoice!
 */
-   if (unlikely(budget = (skb_shinfo(skb)-nr_frags + 1))) {
-   if (!netif_tx_queue_stopped(txq)) {
-   netif_tx_stop_queue(txq);
-
-   /* This is a hard error, log it. */
-   netdev_err(dev,
-  BUG! Tx Ring full when queue awake!\n);
-   }
-   return NETDEV_TX_BUSY;
+   if (tg3_maybe_stop_txq(tnapi, txq, skb_shinfo(skb)-nr_frags + 1,
+  TG3_TX_WAKEUP_THRESH(tnapi))) {
+   /* This is a hard error, log it. */
+   netdev_err(dev, BUG! Tx Ring full when queue awake!\n);
}
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
entry = tnapi-tx_prod;
base_flags = 0;
@@ -8087,18 +8096,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
tw32_tx_mbox(tnapi-prodmbox, entry);
 
tnapi-tx_prod = entry;
-   if (unlikely(tg3_tx_avail(tnapi) = (MAX_SKB_FRAGS + 1))) {
-   netif_tx_stop_queue(txq);
-
-   /* netif_tx_stop_queue() must be done before checking
-* checking tx index in tg3_tx_avail() below, because in
-* tg3_tx(), we update tx index before checking for
-* netif_tx_queue_stopped().
-*/
-   smp_mb();
-   if (tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH

Re: [PATCH 2/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-19 Thread Benjamin Poirier

On 2014/08/19 11:52, Benjamin Poirier wrote:
 + trace_printk(stopping queue, %d = %d\n,
 +  tg3_tx_avail(tnapi), skb_shinfo(skb)-gso_segs);
   netif_tx_stop_queue(txq);
 + trace_printk(stopped queue\n);

err, I'll resubmit without the trace_printk. Please review for other
issues nevertheless.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-20 Thread Benjamin Poirier

On 2014/08/19 16:10, Michael Chan wrote:
 On Tue, 2014-08-19 at 11:52 -0700, Benjamin Poirier wrote: 
  @@ -7838,11 +7838,14 @@ static int tg3_tso_bug(struct tg3 *tp, struct 
  tg3_napi *tnapi,
 struct netdev_queue *txq, struct sk_buff *skb)
   {
  struct sk_buff *segs, *nskb;
  -   u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
   
  -   /* Estimate the number of fragments in the worst case */
  -   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
  +   if (unlikely(tg3_tx_avail(tnapi) = skb_shinfo(skb)-gso_segs)) {
  +   trace_printk(stopping queue, %d = %d\n,
  +tg3_tx_avail(tnapi), 
  skb_shinfo(skb)-gso_segs);
  netif_tx_stop_queue(txq);
  +   trace_printk(stopped queue\n);
  +   tnapi-wakeup_thresh = skb_shinfo(skb)-gso_segs;
  +   BUG_ON(tnapi-wakeup_thresh = tnapi-tx_pending);
   
  /* netif_tx_stop_queue() must be done before checking
   * checking tx index in tg3_tx_avail() below, because in 
 
 I don't quite understand this logic and I must be missing something.
 gso_segs is the number of TCP segments the large packet will be broken
 up into.  If it exceeds dev-gso_max_segs, it means it exceeds
 hardware's capabilty and it will do GSO instead of TSO.  But in this
 case in tg3_tso_bug(), we are doing GSO and we may not have enough DMA
 descriptors to do GSO.  Each gso_seg typically requires 2 DMA
 descriptors.

You're right, I had wrongly assumed that the skbs coming out of
skb_gso_segment() were linear. I'll address that in v2 of the patch by masking
out NETIF_F_SG in tg3_tso_bug().

I noticed another issue that had not occurred to me: when tg3_tso_bug is
submitting a full gso segs sequence to tg3_start_xmit, the code at the end of
that function stops the queue before the end of the sequence because tx_avail
becomes smaller than (MAX_SKB_FRAGS + 1). The transmission actually proceeds
because tg3_tso_bug() does not honour the queue state but it seems rather
unsightly to me. I'm trying different solutions to this and will resubmit.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 3/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-21 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, we instead take the approach of adjusting dev-gso_max_segs according
to the requested tx_pending size.

This puts us in the exceptional situation that a single skb that triggers
tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up
when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that
would be insufficient now. To avoid useless wakeups, the tx queue wake up
threshold is made dynamic. Likewise, usually the tx queue is stopped as soon
as an skb with max frags may overrun it. Since the skbs submitted from
tg3_tso_bug() use a controlled number of descriptors, the tx queue stop
threshold may be lowered.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
Changes v1-v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

I was concerned that this last change, because of the extra call in the
default xmit path, may impact performance so I performed an rr latency test
but I did not measure a significant impact. That test was with default mtu and
ring size.

# perf stat -r10 -ad netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- -d rr

* without patches
rr values: 7039.63 6865.03 6939.21 6919.31 6931.88 6932.74 6925.1 
6953.33 6868.43 6935.65
sample size: 10
mean: 6931.031
standard deviation: 48.10918
quantiles: 6865.03 6920.757 6932.31 6938.32 7039.63
6930±50

 Performance counter stats for 'netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- 
-d rr' (10 runs):

 480643.024723 task-clock#8.001 CPUs utilized   
 ( +-  0.00% ) [100.00%]
   855,136 context-switches  #0.002 M/sec   
 ( +-  0.23% ) [100.00%]
   521 CPU-migrations#0.000 M/sec   
 ( +-  6.49% ) [100.00%]
   104 page-faults   #0.000 M/sec   
 ( +-  2.73% )
   298,416,906,437 cycles#0.621 GHz 
 ( +-  4.08% ) [15.01%]
   812,072,320,370 stalled-cycles-frontend   #  272.13% frontend cycles idle
 ( +-  1.89% ) [25.01%]
   685,633,562,247 stalled-cycles-backend#  229.76% backend  cycles idle
 ( +-  2.50% ) [35.00%]
   117,665,891,888 instructions  #0.39  insns per cycle
 #6.90  stalled cycles per insn 
 ( +-  2.22% ) [45.00%]
26,158,399,505 branches  #   54.424 M/sec   
 ( +-  2.10% ) [50.00%]
   205,688,614 branch-misses #0.79% of all branches 
 ( +-  0.78% ) [50.00%]
27,882,474,171 L1-dcache-loads   #   58.011 M/sec   
 ( +-  1.98% ) [50.00%]
   369,911,372 L1-dcache-load-misses #1.33% of all L1-dcache hits   
 ( +-  0.62% ) [50.00%]
76,240,847 LLC-loads #0.159 M/sec   
 ( +-  1.04% ) [40.00%]
 3,220 LLC-load-misses   #0.00% of all LL-cache hits
 ( +- 19.49% ) [ 5.00%]

  60.074059340 seconds time elapsed 
 ( +-  0.00% )

* with patches
rr values: 6732.65 6920.1 6909.46 7032.41 6864.43 6897.6 6815.19 
6967.83 6849.23 6929.52
sample size: 10
mean: 6891.842
standard deviation: 82.91901
quantiles: 6732.65 6853.03 6903.53 6927.165 7032.41
6890±80

 Performance counter stats for 'netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- 
-d rr' (10 runs):

 480675.949728 task-clock#8.001 CPUs utilized   
 ( +-  0.01% ) [100.00%]
   850,461

[PATCH v2 1/3] tg3: Limit minimum tx queue wakeup threshold

2014-08-21 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with 
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh
#!/bin/bash

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n

---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 2/3] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-08-21 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
Changes v1-v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18

---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index b11c0fd..0cecd6d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12319,7 +12319,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS) ||
+   (ering-tx_pending = MAX_SKB_FRAGS + 1) ||
(tg3_flag(tp, TSO_BUG) 
 (ering-tx_pending = (MAX_SKB_FRAGS * 3
return -EINVAL;
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-21 Thread Benjamin Poirier

On 2014/08/21 02:51, Michael Chan wrote:
 On Wed, 2014-08-20 at 18:23 -0700, Benjamin Poirier wrote: 
  On 2014/08/19 16:10, Michael Chan wrote:
   On Tue, 2014-08-19 at 11:52 -0700, Benjamin Poirier wrote: 
@@ -7838,11 +7838,14 @@ static int tg3_tso_bug(struct tg3 *tp, struct 
tg3_napi *tnapi,
   struct netdev_queue *txq, struct sk_buff *skb)
 {
struct sk_buff *segs, *nskb;
-   u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
-   /* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
+   if (unlikely(tg3_tx_avail(tnapi) = skb_shinfo(skb)-gso_segs)) 
{
+   trace_printk(stopping queue, %d = %d\n,
+tg3_tx_avail(tnapi), 
skb_shinfo(skb)-gso_segs);
netif_tx_stop_queue(txq);
+   trace_printk(stopped queue\n);
+   tnapi-wakeup_thresh = skb_shinfo(skb)-gso_segs;
+   BUG_ON(tnapi-wakeup_thresh = tnapi-tx_pending);
 
/* netif_tx_stop_queue() must be done before checking
 * checking tx index in tg3_tx_avail() below, because 
in 
   
   I don't quite understand this logic and I must be missing something.
   gso_segs is the number of TCP segments the large packet will be broken
   up into.  If it exceeds dev-gso_max_segs, it means it exceeds
   hardware's capabilty and it will do GSO instead of TSO.  But in this
   case in tg3_tso_bug(), we are doing GSO and we may not have enough DMA
   descriptors to do GSO.  Each gso_seg typically requires 2 DMA
   descriptors.
  
  You're right, I had wrongly assumed that the skbs coming out of
  skb_gso_segment() were linear. I'll address that in v2 of the patch by 
  masking
  out NETIF_F_SG in tg3_tso_bug().
  
 
 While masking out NETF_F_SG will work, it will also disable checksum
 offload for the whole device momentarily.
 
  I noticed another issue that had not occurred to me: when tg3_tso_bug is
  submitting a full gso segs sequence to tg3_start_xmit, the code at the end 
  of
  that function stops the queue before the end of the sequence because 
  tx_avail
  becomes smaller than (MAX_SKB_FRAGS + 1). The transmission actually proceeds
  because tg3_tso_bug() does not honour the queue state but it seems rather
  unsightly to me.
 
 That's why the number of DMA descriptors that we estimate has to be
 accurate.  It's unfortunate that the various tg3 chips require so many
 different workarounds.  The objective is to keep TSO and checksum
 enabled and workaround the occasional packets using GSO.

Ah, now I understand the reason for the * 3 in
u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;

/* Estimate the number of fragments in the worst case */
but that is not really the worst case. It's not forbidden to have more than
two frags per skb output from skb_gso_segment(). I've kept this estimation
approach but I've added code to validate the estimation or else linearize the
skb.

 
 I believe that the boundary error conditions that you brought up can be
 addressed by enforcing some limits on the tx ring size and by reducing
 gso_max_size/gso_max_segs when necessary (for example when MTU and/or
 ring size is set very small).
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/3] tg3: Limit minimum tx queue wakeup threshold

2014-08-21 Thread Benjamin Poirier

On 2014/08/19 15:00, Michael Chan wrote:
 On Tue, 2014-08-19 at 11:52 -0700, Benjamin Poirier wrote: 
  diff --git a/drivers/net/ethernet/broadcom/tg3.c 
  b/drivers/net/ethernet/broadcom/tg3.c
  index 3ac5d23..b11c0fd 100644
  --- a/drivers/net/ethernet/broadcom/tg3.c
  +++ b/drivers/net/ethernet/broadcom/tg3.c
  @@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
  unsigned long *bits)
   #endif
   
   /* minimum number of free TX descriptors required to wake up TX process */
  -#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
  +#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, 
  \
  + MAX_SKB_FRAGS + 1)
 
 I think we should precompute this and store it in something like
 tp-tx_wake_thresh.

I've tried this by adding the following patch at the end of the v2
series but I did not measure a significant latency improvement. Was
there another reason for the change?

Here are the performance results. The first set of numbers are the same
as those found in patch v2 3/3.

# perf stat -r10 -ad netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- -d rr

* with patches 1-3
rr values: 6732.65 6920.1 6909.46 7032.41 6864.43 6897.6 6815.19 
6967.83 6849.23 6929.52
sample size: 10
mean: 6891.842
standard deviation: 82.91901
quantiles: 6732.65 6853.03 6903.53 6927.165 7032.41
6890±80

 Performance counter stats for 'netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- 
-d rr' (10 runs):

 480675.949728 task-clock#8.001 CPUs utilized   
 ( +-  0.01% ) [100.00%]
   850,461 context-switches  #0.002 M/sec   
 ( +-  0.37% ) [100.00%]
   564 CPU-migrations#0.000 M/sec   
 ( +-  5.67% ) [100.00%]
   417 page-faults   #0.000 M/sec   
 ( +- 76.04% )
   287,019,442,295 cycles#0.597 GHz 
 ( +-  7.16% ) [15.01%]
   828,198,830,689 stalled-cycles-frontend   #  288.55% frontend cycles idle
 ( +-  3.01% ) [25.01%]
   718,230,307,166 stalled-cycles-backend#  250.24% backend  cycles idle
 ( +-  3.53% ) [35.00%]
   117,976,598,188 instructions  #0.41  insns per cycle
 #7.02  stalled cycles per insn 
 ( +-  4.06% ) [45.00%]
26,715,853,108 branches  #   55.580 M/sec   
 ( +-  3.77% ) [50.00%]
   198,787,673 branch-misses #0.74% of all branches 
 ( +-  0.86% ) [50.00%]
28,416,922,166 L1-dcache-loads   #   59.119 M/sec   
 ( +-  3.54% ) [50.00%]
   367,613,007 L1-dcache-load-misses #1.29% of all L1-dcache hits   
 ( +-  0.47% ) [50.00%]
75,260,575 LLC-loads #0.157 M/sec   
 ( +-  2.24% ) [40.00%]
 5,777 LLC-load-misses   #0.01% of all LL-cache hits
 ( +- 36.03% ) [ 5.00%]

  60.077898757 seconds time elapsed 
 ( +-  0.01% )

* with patches 1-3 + tx_wake_thresh_def
rr values: 6636.87 6874.05 6916.29 6961.68 6941.3 6841.44 6829.05 
6806.55 6846.04 6958.39
sample size: 10
mean: 6861.166
standard deviation: 96.67967
quantiles: 6636.87 6832.148 6860.045 6935.048 6961.68
6900±100

 Performance counter stats for 'netperf -H 192.168.9.30 -l60 -T 0,0 -t omni -- 
-d rr' (10 runs):

 480688.653656 task-clock#8.001 CPUs utilized   
 ( +-  0.01% ) [100.00%]
   846,980 context-switches  #0.002 M/sec   
 ( +-  0.40% ) [100.00%]
   524 CPU-migrations#0.000 M/sec   
 ( +- 11.82% ) [100.00%]
   420 page-faults   #0.000 M/sec   
 ( +- 75.31% )
   275,602,421,981 cycles#0.573 GHz 
 ( +-  3.23% ) [15.01%]
   806,335,406,844 stalled-cycles-frontend   #  292.57% frontend cycles idle
 ( +-  2.16% ) [25.01%]
   640,757,376,054 stalled-cycles-backend#  232.49% backend  cycles idle
 ( +-  2.46% ) [35.00%]
   113,241,018,220 instructions  #0.41  insns per cycle
 #7.12  stalled cycles per insn 
 ( +-  1.93% ) [45.00%]
25,479,064,973 branches  #   53.005 M/sec   
 ( +-  1.96% ) [50.00%]
   205,483,191 branch-misses #0.81% of all branches 
 ( +-  0.75% ) [50.00%]
27,209,883,125 L1-dcache-loads   #   56.606 M/sec   
 ( +-  1.87% ) [50.00%]
   361,721,478 L1-dcache-load-misses #1.33% of all L1-dcache hits   
 ( +-  0.51% ) [50.00%]
80,669,260 LLC-loads #0.168 M/sec

Re: [PATCH 1/3] tg3: Limit minimum tx queue wakeup threshold

2014-08-21 Thread Benjamin Poirier

On 2014/08/21 15:32, Michael Chan wrote:
 On Thu, 2014-08-21 at 15:04 -0700, Benjamin Poirier wrote: 
  On 2014/08/19 15:00, Michael Chan wrote:
   On Tue, 2014-08-19 at 11:52 -0700, Benjamin Poirier wrote: 
diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS 
flag, unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX 
process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 
4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending 
/ 4, \
+ MAX_SKB_FRAGS + 1)
   
   I think we should precompute this and store it in something like
   tp-tx_wake_thresh.
  
  I've tried this by adding the following patch at the end of the v2
  series but I did not measure a significant latency improvement. Was
  there another reason for the change? 
 
 Just performance.  The wake up threshold is checked in the tx fast path
 in both start_xmit() and tg3_tx().  I would optimize such code for speed

I don't see what you mean. The code in those two functions that used to
invoke TG3_TX_WAKEUP_THRESH is wrapped in unlikely() conditions. You
can't tell me that's the fast path ;) It's only checked when the queue
is stopped.

Moreover, the patches I've sent already add tg3_napi.wakeup_thresh. It
is over those patches that I've made the measurements.

 as much as possible.  In the current code, it was just a right shift
 operation.  Now, with max_t() added, I think I prefer having it
 pre-computed.  The performance difference may not be measurable, but I
 think the compiled code size may be smaller too.

Maybe in certain areas, but not overall:

with v2 patches 1-3
   textdata bss dec hex filename
 1494951247   0  150742   24cd6 drivers/net/ethernet/broadcom/tg3.o
with v2 patches 1-3 + tx_wake_thresh_def
   textdata bss dec hex filename
 1495241247   0  150771   24cf3 drivers/net/ethernet/broadcom/tg3.o

I really don't see a gain.

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-08-19 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Currently, it is possible to set tx_pending = MAX_SKB_FRAGS + 1. This leads to
a netdev watchdog tx timeout. Depending on whether the previous patches in
this series are applied or not, the timeout happens as soon as tx_pending is
updated or after an skb with max frags is submitted for transmission.

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18

---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 7022f6d..27e2701 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12328,7 +12328,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS))
+   (ering-tx_pending = MAX_SKB_FRAGS + 1))
return -EINVAL;
 
if (netif_running(dev)) {
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] tg3: Limit minimum tx queue wakeup threshold

2014-08-19 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with 
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh
#!/bin/bash

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n

---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/3] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-19 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, we instead take the approach of adjusting dev-gso_max_segs according
to the requested tx_pending size.

This puts us in the exceptional situation that a single skb that triggers
tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up
when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that
would be insufficient now. To avoid useless wakeups, the tx queue wake up
threshold is made dynamic.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 135

---
 drivers/net/ethernet/broadcom/tg3.c | 31 ---
 drivers/net/ethernet/broadcom/tg3.h |  1 +
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index b11c0fd..7022f6d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6609,10 +6609,10 @@ static void tg3_tx(struct tg3_napi *tnapi)
smp_mb();
 
if (unlikely(netif_tx_queue_stopped(txq) 
-(tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi {
+(tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))) {
__netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) 
-   (tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi)))
+   (tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))
netif_tx_wake_queue(txq);
__netif_tx_unlock(txq);
}
@@ -7838,11 +7838,14 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
   struct netdev_queue *txq, struct sk_buff *skb)
 {
struct sk_buff *segs, *nskb;
-   u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
-   /* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
+   if (unlikely(tg3_tx_avail(tnapi) = skb_shinfo(skb)-gso_segs)) {
+   trace_printk(stopping queue, %d = %d\n,
+tg3_tx_avail(tnapi), skb_shinfo(skb)-gso_segs);
netif_tx_stop_queue(txq);
+   trace_printk(stopped queue\n);
+   tnapi-wakeup_thresh = skb_shinfo(skb)-gso_segs;
+   BUG_ON(tnapi-wakeup_thresh = tnapi-tx_pending);
 
/* netif_tx_stop_queue() must be done before checking
 * checking tx index in tg3_tx_avail() below, because in
@@ -7850,7 +7853,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
 * netif_tx_queue_stopped().
 */
smp_mb();
-   if (tg3_tx_avail(tnapi) = frag_cnt_est)
+   if (tg3_tx_avail(tnapi) = tnapi-wakeup_thresh)
return NETDEV_TX_BUSY;
 
netif_tx_wake_queue(txq);
@@ -7905,12 +7908,17 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
if (unlikely(budget = (skb_shinfo(skb)-nr_frags + 1))) {
if (!netif_tx_queue_stopped(txq)) {
netif_tx_stop_queue(txq);
+   tnapi-wakeup_thresh = TG3_TX_WAKEUP_THRESH(tnapi);
 
/* This is a hard error, log it. */
netdev_err(dev,
   BUG! Tx Ring full when queue awake!\n);
}
-   return NETDEV_TX_BUSY;
+   smp_mb();
+   if (tg3_tx_avail(tnapi) = tnapi-wakeup_thresh)
+   return NETDEV_TX_BUSY;
+
+   netif_tx_wake_queue(txq);
}
 
entry = tnapi-tx_prod;
@@ -8089,6 +8097,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
tnapi-tx_prod = entry;
if (unlikely

Re: Regarding tx-nocache-copy in the Sheevaplug

2014-10-15 Thread Benjamin Poirier

On 2014/10/13 12:52, Lluís Batlle i Rossell wrote:
 Hello,
 
 on the 7th of January 2014 ths patch was applied:
 https://lkml.org/lkml/2014/1/7/307
 
 [PATCH v2] net: Do not enable tx-nocache-copy by default
 
 In the Sheevaplug (ARM Feroceon 88FR131 from Marvell) this made packets to be
 sent corrupted. I think this machine has something special about the cache.
 
 Enabling back this tx-nocache-copy (as it used to be before the patch) the
 transfers work fine again. I think that most people, encountering this 
 problem,
 completely disable the tx offload instead of enabling back this setting.
 
 Is this an ARM kernel problem regarding this platform?

This is odd, only x86 defines ARCH_HAS_NOCACHE_UACCESS. On arm,
skb_do_copy_data_nocache() should end up using __copy_from_user()
regardless of tx-nocache-copy.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Regarding tx-nocache-copy in the Sheevaplug

2014-10-16 Thread Benjamin Poirier

On 2014/10/15 15:45, Eric Dumazet wrote:
 On Wed, 2014-10-15 at 14:57 -0700, Benjamin Poirier wrote:
  On 2014/10/13 12:52, Lluís Batlle i Rossell wrote:
   Hello,
   
   on the 7th of January 2014 ths patch was applied:
   https://lkml.org/lkml/2014/1/7/307
   
   [PATCH v2] net: Do not enable tx-nocache-copy by default
   
   In the Sheevaplug (ARM Feroceon 88FR131 from Marvell) this made packets 
   to be
   sent corrupted. I think this machine has something special about the 
   cache.
   
   Enabling back this tx-nocache-copy (as it used to be before the patch) the
   transfers work fine again. I think that most people, encountering this 
   problem,
   completely disable the tx offload instead of enabling back this setting.
   
   Is this an ARM kernel problem regarding this platform?
  
  This is odd, only x86 defines ARCH_HAS_NOCACHE_UACCESS. On arm,
  skb_do_copy_data_nocache() should end up using __copy_from_user()
  regardless of tx-nocache-copy.
 
  kmap_atomic()/kunmap_atomic() is missing, so we lack
 __cpuc_flush_dcache_area() operations.
 

You lost me there.
1) I don't see the link
2) It seems kmap_atomic and so on are there:
$ grep kmap_atomic System.map-3.16-2-kirkwood
c0014838 T kmap_atomic
c001491c T kmap_atomic_pfn
c00149a4 T kmap_atomic_to_page

MACH_KIRKWOOD selects CPU_FEROCEON which has
__cpuc_flush_dcache_area -
cpu_cache.flush_kern_dcache_area -
feroceon_flush_kern_dcache_area
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Regarding tx-nocache-copy in the Sheevaplug

2014-10-17 Thread Benjamin Poirier

On 2014/10/16 19:46, Lluís Batlle i Rossell wrote:
[...]
 
 Hello all,
 
 it seems I was a bit wrong - although enabling back tx-nocache-copy makes the
 tx-errors happen much less often (ssh complaining about HMAC), they still
 happen. It seems that something was introduced in some recent kernels that 
 broke
 the tx offload.
 
 I have no idea what it can be, but since 2.6 until at least 3.10 the network
 driver worked fine with tx offload in this sheevaplug board.

It's not the most pleasant alternative but if you can be sure enough
whether the problem is occurring or not, you could try bisecting,
possibly limiting the bisection to mv643xx

$ git bisect start v3.16.3 v3.10 -- drivers/net/ethernet/marvell/mv643xx_eth.c
Bisecting: 16 revisions left to test after this (roughly 4 steps)

The problem might be outside of the driver though.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v6 0/4] tg3: tx_pending fixes

2014-09-04 Thread Benjamin Poirier


Extra info regarding patch 4:
This version of the series calls gso_segment() without NETIF_F_SG. This avoids
the need for desc_cnt_est in tg3_tso_bug() as in previous versions of this
patch series. Since Michael had previously raised concerns about gso_segment
without SG, I ran some netperf throughput tests. I used a small patch to force
tg3_tso_bug() to be called even when it is not needed [1].

root@linux-y64m:~# perf stat -r10 -ad netperf -H 192.168.9.30 -l60 -T 0,0 -t 
omni -- -d send

* original tg3_tso_bug() (ie. without patch 4/4)
  781±2 10^6bits/s
  6.60 cycle/bit
* gso_segment() without SG (current series)
  801.0±0.9 10^6bits/s
  5.79 cycle/bit
* gso_segment() with SG (alternate patch 4/4 [2])
  783±2 10^6bits/s
  7.25 cycle/bit

(For reference, with the original tg3_tso_bug() implementation but without
forcing it to be called, the throughput I get is 822±1 10^6bits/s @ 3.82
cycle/bit with 0 invocations of tg3_tso_bug)

[1] fault injection patch

---
 drivers/net/ethernet/broadcom/tg3.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index cb77ae9..f9144dc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -47,6 +47,7 @@
 #include linux/ssb/ssb_driver_gige.h
 #include linux/hwmon.h
 #include linux/hwmon-sysfs.h
+#include linux/debugfs.h
 
 #include net/checksum.h
 #include net/ip.h
@@ -468,6 +469,27 @@ static const struct {
 #define TG3_NUM_TEST   ARRAY_SIZE(ethtool_test_keys)
 
 
+/* debugging stuff */
+static u32 tg3_do_mangle;
+static struct dentry *tg3_mangle_debugfs;
+
+static int __init tg3_mod_init(void)
+{
+   tg3_mangle_debugfs = debugfs_create_u32(tg3_do_mangle, S_IRUGO |
+   S_IWUSR, NULL,
+   tg3_do_mangle);
+
+   return 0;
+}
+module_init(tg3_mod_init);
+
+static void __exit tg3_mod_exit(void)
+{
+   debugfs_remove(tg3_mangle_debugfs);
+}
+module_exit(tg3_mod_exit);
+/* --- */
+
 static void tg3_write32(struct tg3 *tp, u32 off, u32 val)
 {
writel(val, tp-regs + off);
@@ -8048,6 +8070,11 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
would_hit_hwbug = 1;
break;
}
+
+   if (tg3_do_mangle  0) {
+   would_hit_hwbug = 4;
+   break;
+   }
}
}
 
-- 

[2] alternate patch 4

call gso_segment with SG (without removing it, actually)

---
 drivers/net/ethernet/broadcom/tg3.c | 80 +++--
 1 file changed, 59 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index ee93b51..1ecb393 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -205,6 +205,9 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
  MAX_SKB_FRAGS + 1)
+/* estimate a certain number of descriptors per gso segment */
+#define TG3_TX_DESC_PER_SEG(seg_nb)((seg_nb) * 3)
+
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
@@ -7852,6 +7855,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+   u32);
 
 /* Returns true if the queue has been stopped. Note that it may have been
  * restarted since.
@@ -7888,27 +7893,56 @@ static inline bool tg3_maybe_stop_txq(struct tg3_napi 
*tnapi,
 static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
   struct netdev_queue *txq, struct sk_buff *skb)
 {
-   struct sk_buff *segs, *nskb;
-   u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
+   unsigned int segs_remaining = skb_shinfo(skb)-gso_segs;
+   u32 desc_cnt_est = TG3_TX_DESC_PER_SEG(segs_remaining);
 
-   /* Estimate the number of fragments in the worst case */
-   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
-   if (netif_tx_queue_stopped(txq))
-   return NETDEV_TX_BUSY;
+   if (unlikely(tg3_tx_avail(tnapi) = desc_cnt_est)) {
+   if (!skb_is_nonlinear(skb) || skb_linearize(skb))
+   goto tg3_tso_bug_drop;
+   tg3_start_xmit(skb, tp-dev);
+   } else {
+   struct sk_buff *segs, *nskb;
 
-   segs = skb_gso_segment(skb, tp-dev-features 
-   ~(NETIF_F_TSO |

[PATCH net v6 1/4] tg3: Limit minimum tx queue wakeup threshold

2014-09-04 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index cb77ae9..81b3a57 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v6 3/4] tg3: Move tx queue stop logic to its own function

2014-09-04 Thread Benjamin Poirier

It is duplicated. Also, the first instance in tg3_start_xmit() is racy.
Consider:

tg3_start_xmit()
if budget = ...
tg3_tx()
(free up the entire ring)
tx_cons =
smp_mb
if queue_stopped and tx_avail, NO
if !queue_stopped
stop queue
return NETDEV_TX_BUSY

... tx queue stopped forever

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

Changes v2-v3
* new patch to avoid repeatedly open coding this block in the next patch.

Changes v3-v4
* added a comment to clarify the return value, as suggested
* replaced the BUG_ON with netdev_err(). No need to be so dramatic, this
  situation will trigger a netdev watchdog anyways.
---
 drivers/net/ethernet/broadcom/tg3.c | 75 -
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index c5061c3..6e6b07c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,35 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
+/* Returns true if the queue has been stopped. Note that it may have been
+ * restarted since.
+ */
+static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
+ struct netdev_queue *txq,
+ u32 stop_thresh, u32 wakeup_thresh)
+{
+   bool stopped = false;
+
+   if (unlikely(tg3_tx_avail(tnapi) = stop_thresh)) {
+   if (!netif_tx_queue_stopped(txq)) {
+   stopped = true;
+   netif_tx_stop_queue(txq);
+   if (wakeup_thresh = tnapi-tx_pending)
+   netdev_err(tnapi-tp-dev,
+  BUG! wakeup_thresh too large (%u = 
%u)\n,
+  wakeup_thresh, tnapi-tx_pending);
+   }
+   /* netif_tx_stop_queue() must be done before checking tx index
+* in tg3_tx_avail(), because in tg3_tx(), we update tx index
+* before checking for netif_tx_queue_stopped().
+*/
+   smp_mb();
+   if (tg3_tx_avail(tnapi)  wakeup_thresh)
+   netif_tx_wake_queue(txq);
+   }
+   return stopped;
+}
+
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
  */
@@ -7841,20 +7870,9 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
/* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
-   netif_tx_stop_queue(txq);
-
-   /* netif_tx_stop_queue() must be done before checking
-* checking tx index in tg3_tx_avail() below, because in
-* tg3_tx(), we update tx index before checking for
-* netif_tx_queue_stopped().
-*/
-   smp_mb();
-   if (tg3_tx_avail(tnapi) = frag_cnt_est)
-   return NETDEV_TX_BUSY;
-
-   netif_tx_wake_queue(txq);
-   }
+   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
segs = skb_gso_segment(skb, tp-dev-features 
~(NETIF_F_TSO | NETIF_F_TSO6));
@@ -7902,16 +7920,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 * no IRQ context deadlocks to worry about either.  Rejoice!
 */
-   if (unlikely(budget = (skb_shinfo(skb)-nr_frags + 1))) {
-   if (!netif_tx_queue_stopped(txq)) {
-   netif_tx_stop_queue(txq);
-
-   /* This is a hard error, log it. */
-   netdev_err(dev,
-  BUG! Tx Ring full when queue awake!\n);
-   }
-   return NETDEV_TX_BUSY;
+   if (tg3_maybe_stop_txq(tnapi, txq, skb_shinfo(skb)-nr_frags + 1,
+  TG3_TX_WAKEUP_THRESH(tnapi))) {
+   /* This is a hard error, log it. */
+   netdev_err(dev, BUG! Tx Ring full when queue awake!\n);
}
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
entry = tnapi-tx_prod;
base_flags = 0;
@@ -8087,18 +8102,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
tw32_tx_mbox(tnapi

[PATCH net v6 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-09-04 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

Changes v1-v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18
---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 81b3a57..c5061c3 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12331,7 +12331,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS) ||
+   (ering-tx_pending = MAX_SKB_FRAGS + 1) ||
(tg3_flag(tp, TSO_BUG) 
 (ering-tx_pending = (MAX_SKB_FRAGS * 3
return -EINVAL;
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v6 4/4] tg3: Fix tx_pending checks for tg3_tso_bug

2014-09-04 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, this patch changes tg3_set_ringparam() to ignore the requirements of
tg3_tso_bug(). Those requirements are instead checked in tg3_tso_bug() itself
and if there is not a sufficient number of descriptors available in the tx
queue, the skb is linearized.

This patch also removes the current scheme in tg3_tso_bug() where the number
of descriptors required to transmit an skb is estimated. Instead,
gso_segment() is called without _SG which yields predictable, linear skbs.

Signed-off-by: Benjamin Poirier bpoir...@suse.de

---

Changes v1-v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

Changes v2-v3
* use tg3_maybe_stop_txq() instead of repeatedly open coding it
* add the requested tp-tx_dropped++ stat increase in tg3_tso_bug() if
  skb_linearize() fails and we must abort
* in the same code block, add an additional check to stop the queue with the
  default threshold. Otherwise, the netdev_err message at the start of
  __tg3_start_xmit() could be triggered when the next frame is transmitted.
  That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug()
  may have been using a stop_thresh=segs_remaining that is  MAX_SKB_FRAGS +
  1.

Changes v3-v4
* in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being
  = tx_pending. Identified by Prashant.

Changes v4-v5
* in tg3_set_ringparam(), use TG3_TX_WAKEUP_THRESH() and tp-txq_cnt instead
  of tp-irq_max. Identified by Prashant.

Changes v5-v6
* avoid changing gso_max_segs and making the tx queue wakeup threshold
  dynamic. Instead of stopping the queue when there are not enough descriptors
  available, the skb is linearized.

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 135
---
 drivers/net/ethernet/broadcom/tg3.c | 59 -
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 6e6b07c..a9787a1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7830,6 +7830,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+   u32);
 
 /* Returns true if the queue has been stopped. Note that it may have been
  * restarted since.
@@ -7866,27 +7868,38 @@ static inline bool tg3_maybe_stop_txq(struct tg3_napi 
*tnapi,
 static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
   struct netdev_queue *txq, struct sk_buff *skb)
 {
-   struct sk_buff *segs, *nskb;
-   u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
+   unsigned int segs_remaining = skb_shinfo(skb)-gso_segs;
 
-   /* Estimate the number of fragments in the worst case */
-   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
-   if (netif_tx_queue_stopped(txq))
-   return NETDEV_TX_BUSY;
+   if (unlikely(tg3_tx_avail(tnapi) = segs_remaining)) {
+   if (!skb_is_nonlinear(skb) || skb_linearize(skb))
+   goto tg3_tso_bug_drop;
+   tg3_start_xmit(skb, tp-dev);
+   } else {
+   struct sk_buff *segs, *nskb;
 
-   segs = skb_gso_segment(skb, tp-dev-features 
-   ~(NETIF_F_TSO | NETIF_F_TSO6));
-   if (IS_ERR(segs) || !segs)
-   goto tg3_tso_bug_end;
+   segs = skb_gso_segment(skb, tp-dev-features

[PATCH net v4 1/4] tg3: Limit minimum tx queue wakeup threshold

2014-08-27 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v4 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-08-27 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18
---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index b11c0fd..0cecd6d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12319,7 +12319,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS) ||
+   (ering-tx_pending = MAX_SKB_FRAGS + 1) ||
(tg3_flag(tp, TSO_BUG) 
 (ering-tx_pending = (MAX_SKB_FRAGS * 3
return -EINVAL;
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-27 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, we instead take the approach of adjusting dev-gso_max_segs according
to the requested tx_pending size.

This puts us in the exceptional situation that a single skb that triggers
tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up
when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that
would be insufficient now. To avoid useless wakeups, the tx queue wake up
threshold is made dynamic. Likewise, usually the tx queue is stopped as soon
as an skb with max frags may overrun it. Since the skbs submitted from
tg3_tso_bug() use a controlled number of descriptors, the tx queue stop
threshold may be lowered.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

Changes v2-v3
* use tg3_maybe_stop_txq() instead of repeatedly open coding it
* add the requested tp-tx_dropped++ stat increase in tg3_tso_bug() if
  skb_linearize() fails and we must abort
* in the same code block, add an additional check to stop the queue with the
  default threshold. Otherwise, the netdev_err message at the start of
  __tg3_start_xmit() could be triggered when the next frame is transmitted.
  That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug()
  may have been using a stop_thresh=segs_remaining that is  MAX_SKB_FRAGS +
  1.

Changes v3-v4
* in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being
  = tx_pending. Identified by Prashant.

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 135
---
 drivers/net/ethernet/broadcom/tg3.c | 70 +
 drivers/net/ethernet/broadcom/tg3.h |  1 +
 2 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index f706a1e..05cb940 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -204,6 +204,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
  MAX_SKB_FRAGS + 1)
+/* estimate a certain number of descriptors per gso segment */
+#define TG3_TX_DESC_PER_SEG(seg_nb)((seg_nb) * 3)
+#define TG3_TX_SEG_PER_DESC(desc_nb)   ((desc_nb) / 3)
+
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
@@ -6609,10 +6613,10 @@ static void tg3_tx(struct tg3_napi *tnapi)
smp_mb();
 
if (unlikely(netif_tx_queue_stopped(txq) 
-(tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi {
+(tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))) {
__netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) 
-   (tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi)))
+   (tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))
netif_tx_wake_queue(txq);
__netif_tx_unlock(txq);
}
@@ -7830,6 +7834,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+   u32);
 
 /* Returns true if the queue has been stopped. Note that it may have been
  * restarted since.
@@ -7844,6 +7850,7 @@ static inline bool

[PATCH net v4 3/4] tg3: Move tx queue stop logic to its own function

2014-08-27 Thread Benjamin Poirier

It is duplicated. Also, the first instance in tg3_start_xmit() is racy.
Consider:

tg3_start_xmit()
if budget = ...
tg3_tx()
(free up the entire ring)
tx_cons =
smp_mb
if queue_stopped and tx_avail, NO
if !queue_stopped
stop queue
return NETDEV_TX_BUSY

... tx queue stopped forever

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
Changes v2-v3
* new patch to avoid repeatedly open coding this block in the next patch.

Changes v3-v4
* added a comment to clarify the return value, as suggested
* replaced the BUG_ON with netdev_err(). No need to be so dramatic, this
  situation will trigger a netdev watchdog anyways.
---
 drivers/net/ethernet/broadcom/tg3.c | 75 -
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 0cecd6d..f706a1e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,35 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
+/* Returns true if the queue has been stopped. Note that it may have been
+ * restarted since.
+ */
+static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
+ struct netdev_queue *txq,
+ u32 stop_thresh, u32 wakeup_thresh)
+{
+   bool stopped = false;
+
+   if (unlikely(tg3_tx_avail(tnapi) = stop_thresh)) {
+   if (!netif_tx_queue_stopped(txq)) {
+   stopped = true;
+   netif_tx_stop_queue(txq);
+   if (wakeup_thresh = tnapi-tx_pending)
+   netdev_err(tnapi-tp-dev,
+  BUG! wakeup_thresh too large (%u = 
%u)\n,
+  wakeup_thresh, tnapi-tx_pending);
+   }
+   /* netif_tx_stop_queue() must be done before checking tx index
+* in tg3_tx_avail(), because in tg3_tx(), we update tx index
+* before checking for netif_tx_queue_stopped().
+*/
+   smp_mb();
+   if (tg3_tx_avail(tnapi)  wakeup_thresh)
+   netif_tx_wake_queue(txq);
+   }
+   return stopped;
+}
+
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
  */
@@ -7841,20 +7870,9 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
/* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
-   netif_tx_stop_queue(txq);
-
-   /* netif_tx_stop_queue() must be done before checking
-* checking tx index in tg3_tx_avail() below, because in
-* tg3_tx(), we update tx index before checking for
-* netif_tx_queue_stopped().
-*/
-   smp_mb();
-   if (tg3_tx_avail(tnapi) = frag_cnt_est)
-   return NETDEV_TX_BUSY;
-
-   netif_tx_wake_queue(txq);
-   }
+   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
segs = skb_gso_segment(skb, tp-dev-features 
~(NETIF_F_TSO | NETIF_F_TSO6));
@@ -7902,16 +7920,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 * no IRQ context deadlocks to worry about either.  Rejoice!
 */
-   if (unlikely(budget = (skb_shinfo(skb)-nr_frags + 1))) {
-   if (!netif_tx_queue_stopped(txq)) {
-   netif_tx_stop_queue(txq);
-
-   /* This is a hard error, log it. */
-   netdev_err(dev,
-  BUG! Tx Ring full when queue awake!\n);
-   }
-   return NETDEV_TX_BUSY;
+   if (tg3_maybe_stop_txq(tnapi, txq, skb_shinfo(skb)-nr_frags + 1,
+  TG3_TX_WAKEUP_THRESH(tnapi))) {
+   /* This is a hard error, log it. */
+   netdev_err(dev, BUG! Tx Ring full when queue awake!\n);
}
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
entry = tnapi-tx_prod;
base_flags = 0;
@@ -8087,18 +8102,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
tw32_tx_mbox(tnapi-prodmbox

Re: [PATCH net v4 4/4] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-29 Thread Benjamin Poirier

On 2014/08/28 20:24, Prashant Sreedharan wrote:
   
  -   for (i = 0; i  tp-irq_max; i++)
  -   tp-napi[i].tx_pending = ering-tx_pending;
  +   dev-gso_max_segs = TG3_TX_SEG_PER_DESC(ering-tx_pending - 1);
  +   for (i = 0; i  tp-irq_max; i++) {
  +   struct tg3_napi *tnapi = tp-napi[i];
  +
  +   tnapi-tx_pending = ering-tx_pending;
  +   if (netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)) 
 
 Need to limit the number of TX queues to tp-txq_cnt instead of
 tp-irq_max as txq_cnt can be less than irq_max.
 
 netif_set_real_num_tx_queues(tp-dev, tp-txq_cnt);

Thanks for your careful review, I'll resubmit shortly.

 
  +   tnapi-wakeup_thresh = ering-tx_pending)
  +   tnapi-wakeup_thresh = MAX_SKB_FRAGS + 1;
  +   }
   
 
 
 
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v5 2/4] tg3: Fix tx_pending check for MAX_SKB_FRAGS

2014-08-29 Thread Benjamin Poirier

The rest of the driver assumes at least one free descriptor in the tx ring.
Therefore, since an skb with max frags takes up (MAX_SKB_FRAGS + 1)
descriptors, tx_pending must be  (MAX_SKB_FRAGS + 1).

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
Moved ahead in the series from 3/3 to 2/3, no functionnal change

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 18
---
 drivers/net/ethernet/broadcom/tg3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index b11c0fd..0cecd6d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12319,7 +12319,7 @@ static int tg3_set_ringparam(struct net_device *dev, 
struct ethtool_ringparam *e
if ((ering-rx_pending  tp-rx_std_ring_mask) ||
(ering-rx_jumbo_pending  tp-rx_jmb_ring_mask) ||
(ering-tx_pending  TG3_TX_RING_SIZE - 1) ||
-   (ering-tx_pending = MAX_SKB_FRAGS) ||
+   (ering-tx_pending = MAX_SKB_FRAGS + 1) ||
(tg3_flag(tp, TSO_BUG) 
 (ering-tx_pending = (MAX_SKB_FRAGS * 3
return -EINVAL;
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v5 1/4] tg3: Limit minimum tx queue wakeup threshold

2014-08-29 Thread Benjamin Poirier

tx_pending may be set by the user (via ethtool -G) to a low enough value that
TG3_TX_WAKEUP_THRESH becomes smaller than MAX_SKB_FRAGS + 1. This may cause
the tx queue to be waked when there are in fact not enough descriptors to
handle an skb with max frags. This in turn causes tg3_start_xmit() to return
NETDEV_TX_BUSY and print error messages. Fix the problem by putting a limit to
how low TG3_TX_WAKEUP_THRESH can go.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

I noticed the problem in a 3.0 kernel when setting `ethtool eth0 -G tx 50` and
running a netperf TCP_STREAM test. The console fills up with
[10597.596155] tg3 :06:00.0: eth0: BUG! Tx Ring full when queue awake!
The problem in tg3 remains in current kernels though it does not reproduce as
easily since 5640f76 net: use a per task frag allocator (v3.7-rc1). I
reproduced on current kernels by using the fail_page_alloc fault injection
mechanism to force the creation of skbs with many order-0 frags. Note that the
following script may also trigger another bug (NETDEV WATCHDOG), which is
fixed in the next patch.

$ cat /tmp/doit.sh

F=/sys/kernel/debug/fail_page_alloc

echo -1  $F/times
echo 0  $F/verbose
echo 0  $F/ignore-gfp-wait
echo 1  $F/task-filter
echo 100  $F/probability

netperf -H 192.168.9.30 -l100 -t omni -- -d send 

n=$!

sleep 0.3
echo 1  /proc/$n/make-it-fail
sleep 10

kill $n
---
 drivers/net/ethernet/broadcom/tg3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 3ac5d23..b11c0fd 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -202,7 +202,8 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 #endif
 
 /* minimum number of free TX descriptors required to wake up TX process */
-#define TG3_TX_WAKEUP_THRESH(tnapi)((tnapi)-tx_pending / 4)
+#define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
+ MAX_SKB_FRAGS + 1)
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
-- 
1.8.4.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net v5 4/4] tg3: Fix tx_pending checks for tg3_tso_bug

2014-08-29 Thread Benjamin Poirier

In tg3_set_ringparam(), the tx_pending test to cover the cases where
tg3_tso_bug() is entered has two problems
1) the check is only done for certain hardware whereas the workaround
is now used more broadly. IOW, the check may not be performed when it
is needed.
2) the check is too optimistic.

For example, with a 5761 (SHORT_DMA_BUG), tg3_set_ringparam() skips over the
tx_pending = (MAX_SKB_FRAGS * 3) check because TSO_BUG is false. Even if it
did do the check, with a full sized skb, frag_cnt_est = 135 but the check is
for = MAX_SKB_FRAGS * 3 (= 17 * 3 = 51). So the check is insufficient. This
leads to the following situation: by setting, ex. tx_pending = 100, there can
be an skb that triggers tg3_tso_bug() and that is large enough to cause
tg3_tso_bug() to stop the queue even when it is empty. We then end up with a
netdev watchdog transmit timeout.

Given that 1) some of the conditions tested for in tg3_tx_frag_set() apply
regardless of the chipset flags and that 2) it is difficult to estimate ahead
of time the max possible number of frames that a large skb may be split into
by gso, we instead take the approach of adjusting dev-gso_max_segs according
to the requested tx_pending size.

This puts us in the exceptional situation that a single skb that triggers
tg3_tso_bug() may require the entire tx ring. Usually the tx queue is woken up
when at least a quarter of it is available (TG3_TX_WAKEUP_THRESH) but that
would be insufficient now. To avoid useless wakeups, the tx queue wake up
threshold is made dynamic. Likewise, usually the tx queue is stopped as soon
as an skb with max frags may overrun it. Since the skbs submitted from
tg3_tso_bug() use a controlled number of descriptors, the tx queue stop
threshold may be lowered.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---

Changes v1-v2
* in tg3_set_ringparam(), reduce gso_max_segs further to budget 3 descriptors
  per gso seg instead of only 1 as in v1
* in tg3_tso_bug(), check that this estimation (3 desc/seg) holds, otherwise
  linearize some skbs as needed
* in tg3_start_xmit(), make the queue stop threshold a parameter, for the
  reason explained in the commit description

Changes v2-v3
* use tg3_maybe_stop_txq() instead of repeatedly open coding it
* add the requested tp-tx_dropped++ stat increase in tg3_tso_bug() if
  skb_linearize() fails and we must abort
* in the same code block, add an additional check to stop the queue with the
  default threshold. Otherwise, the netdev_err message at the start of
  __tg3_start_xmit() could be triggered when the next frame is transmitted.
  That is because the previous calls to __tg3_start_xmit() in tg3_tso_bug()
  may have been using a stop_thresh=segs_remaining that is  MAX_SKB_FRAGS +
  1.

Changes v3-v4
* in tg3_set_ringparam(), make sure that wakeup_thresh does not end up being
  = tx_pending. Identified by Prashant.

Changes v4-v5
* in tg3_set_ringparam(), use TG3_TX_WAKEUP_THRESH() and tp-txq_cnt instead
  of tp-irq_max. Identified by Prashant.

I reproduced this bug using the same approach explained in patch 1.
The bug reproduces with tx_pending = 135
---
 drivers/net/ethernet/broadcom/tg3.c | 69 +
 drivers/net/ethernet/broadcom/tg3.h |  1 +
 2 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index f706a1e..43feb18 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -204,6 +204,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, 
unsigned long *bits)
 /* minimum number of free TX descriptors required to wake up TX process */
 #define TG3_TX_WAKEUP_THRESH(tnapi)max_t(u32, (tnapi)-tx_pending / 4, \
  MAX_SKB_FRAGS + 1)
+/* estimate a certain number of descriptors per gso segment */
+#define TG3_TX_DESC_PER_SEG(seg_nb)((seg_nb) * 3)
+#define TG3_TX_SEG_PER_DESC(desc_nb)   ((desc_nb) / 3)
+
 #define TG3_TX_BD_DMA_MAX_2K   2048
 #define TG3_TX_BD_DMA_MAX_4K   4096
 
@@ -6609,10 +6613,10 @@ static void tg3_tx(struct tg3_napi *tnapi)
smp_mb();
 
if (unlikely(netif_tx_queue_stopped(txq) 
-(tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi {
+(tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))) {
__netif_tx_lock(txq, smp_processor_id());
if (netif_tx_queue_stopped(txq) 
-   (tg3_tx_avail(tnapi)  TG3_TX_WAKEUP_THRESH(tnapi)))
+   (tg3_tx_avail(tnapi)  tnapi-wakeup_thresh))
netif_tx_wake_queue(txq);
__netif_tx_unlock(txq);
}
@@ -7830,6 +7834,8 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 }
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t __tg3_start_xmit(struct sk_buff *, struct net_device *,
+   u32

[PATCH net v5 3/4] tg3: Move tx queue stop logic to its own function

2014-08-29 Thread Benjamin Poirier

It is duplicated. Also, the first instance in tg3_start_xmit() is racy.
Consider:

tg3_start_xmit()
if budget = ...
tg3_tx()
(free up the entire ring)
tx_cons =
smp_mb
if queue_stopped and tx_avail, NO
if !queue_stopped
stop queue
return NETDEV_TX_BUSY

... tx queue stopped forever

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
Changes v2-v3
* new patch to avoid repeatedly open coding this block in the next patch.

Changes v3-v4
* added a comment to clarify the return value, as suggested
* replaced the BUG_ON with netdev_err(). No need to be so dramatic, this
  situation will trigger a netdev watchdog anyways.
---
 drivers/net/ethernet/broadcom/tg3.c | 75 -
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 0cecd6d..f706a1e 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7831,6 +7831,35 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi 
*tnapi,
 
 static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
 
+/* Returns true if the queue has been stopped. Note that it may have been
+ * restarted since.
+ */
+static inline bool tg3_maybe_stop_txq(struct tg3_napi *tnapi,
+ struct netdev_queue *txq,
+ u32 stop_thresh, u32 wakeup_thresh)
+{
+   bool stopped = false;
+
+   if (unlikely(tg3_tx_avail(tnapi) = stop_thresh)) {
+   if (!netif_tx_queue_stopped(txq)) {
+   stopped = true;
+   netif_tx_stop_queue(txq);
+   if (wakeup_thresh = tnapi-tx_pending)
+   netdev_err(tnapi-tp-dev,
+  BUG! wakeup_thresh too large (%u = 
%u)\n,
+  wakeup_thresh, tnapi-tx_pending);
+   }
+   /* netif_tx_stop_queue() must be done before checking tx index
+* in tg3_tx_avail(), because in tg3_tx(), we update tx index
+* before checking for netif_tx_queue_stopped().
+*/
+   smp_mb();
+   if (tg3_tx_avail(tnapi)  wakeup_thresh)
+   netif_tx_wake_queue(txq);
+   }
+   return stopped;
+}
+
 /* Use GSO to workaround all TSO packets that meet HW bug conditions
  * indicated in tg3_tx_frag_set()
  */
@@ -7841,20 +7870,9 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi 
*tnapi,
u32 frag_cnt_est = skb_shinfo(skb)-gso_segs * 3;
 
/* Estimate the number of fragments in the worst case */
-   if (unlikely(tg3_tx_avail(tnapi) = frag_cnt_est)) {
-   netif_tx_stop_queue(txq);
-
-   /* netif_tx_stop_queue() must be done before checking
-* checking tx index in tg3_tx_avail() below, because in
-* tg3_tx(), we update tx index before checking for
-* netif_tx_queue_stopped().
-*/
-   smp_mb();
-   if (tg3_tx_avail(tnapi) = frag_cnt_est)
-   return NETDEV_TX_BUSY;
-
-   netif_tx_wake_queue(txq);
-   }
+   tg3_maybe_stop_txq(tnapi, txq, frag_cnt_est, frag_cnt_est);
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
segs = skb_gso_segment(skb, tp-dev-features 
~(NETIF_F_TSO | NETIF_F_TSO6));
@@ -7902,16 +7920,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 * no IRQ context deadlocks to worry about either.  Rejoice!
 */
-   if (unlikely(budget = (skb_shinfo(skb)-nr_frags + 1))) {
-   if (!netif_tx_queue_stopped(txq)) {
-   netif_tx_stop_queue(txq);
-
-   /* This is a hard error, log it. */
-   netdev_err(dev,
-  BUG! Tx Ring full when queue awake!\n);
-   }
-   return NETDEV_TX_BUSY;
+   if (tg3_maybe_stop_txq(tnapi, txq, skb_shinfo(skb)-nr_frags + 1,
+  TG3_TX_WAKEUP_THRESH(tnapi))) {
+   /* This is a hard error, log it. */
+   netdev_err(dev, BUG! Tx Ring full when queue awake!\n);
}
+   if (netif_tx_queue_stopped(txq))
+   return NETDEV_TX_BUSY;
 
entry = tnapi-tx_prod;
base_flags = 0;
@@ -8087,18 +8102,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, 
struct net_device *dev)
tw32_tx_mbox(tnapi-prodmbox

[PATCH] netdevice: Add missing parentheses in macro

2015-01-13 Thread Benjamin Poirier

For example, one could conceivably call
for_each_netdev_in_bond_rcu(condition ? bond1 : bond2, slave)
and get an unexpected result.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7f794db..52fd8e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2085,7 +2085,7 @@ extern rwlock_t   dev_base_lock;  
/* Device list lock */
list_for_each_entry_continue_rcu(d, (net)-dev_base_head, dev_list)
 #define for_each_netdev_in_bond_rcu(bond, slave)   \
for_each_netdev_rcu(init_net, slave)   \
-   if (netdev_master_upper_dev_get_rcu(slave) == bond)
+   if (netdev_master_upper_dev_get_rcu(slave) == (bond))
 #define net_device_entry(lh)   list_entry(lh, struct net_device, dev_list)
 
 static inline struct net_device *next_net_device(struct net_device *dev)
-- 
2.2.0

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] mlx4: Fix tx ring affinity_mask creation

2015-04-13 Thread Benjamin Poirier

On 2015/04/12 10:03, Ido Shamay wrote:
 Hi Benjamin,
 
 On 4/10/2015 7:27 PM, Benjamin Poirier wrote:
 By default, the number of tx queues is limited by the number of online cpus 
 in
 mlx4_en_get_profile(). However, this limit no longer holds after the ethtool
 .set_channels method has been called. In that situation, the driver may 
 access
 invalid bits of certain cpumask variables when queue_index  nr_cpu_ids.
 
 I must say I don't see the above issue with the current code.
 Whatever is the modified value of priv-num_tx_rings_p_up, it will set XPS
 only on queues which have
 been set with CPU affinity mask (no access to invalid bits).

The problem is not with the call to netif_set_xps_queue() it is with the
calls to cpu_online() and cpumask_set_cpu().

For example, if the user calls `ethtool -L ethX tx 32`, queue_index in
mlx4_en_create_tx_ring() can be up to 255. Depending on CONFIG_NR_CPUS
and CONFIG_CPUMASK_OFFSTACK this may result in calls to cpu_online() and
cpumask_set_cpu() with cpu = nr_cpumask_bits which is an invalid usage
of the cpumask api. The driver will potentially read or write beyond the
end of the bitmap. With CONFIG_CPUMASK_OFFSTACK=y and
CONFIG_DEBUG_PER_CPU_MAPS=y, the aforementioned ethtool call on a system
with 32 cpus triggers the warning in cpumask_check(). 

 
 It's true that when priv-num_tx_rings_p_up  nr_cpus. not all queues will
 be set with XPS.
 This is because the code tries to preserve 1:1 mapping of queues to cores,
 to avoid a double mapping
 of queues to cores.
 I guess it's ok to break the 1:1 mapping in this condition, but the commit
 message should say that instead
 of invalid bits. Please fix me if I'm wrong.
 
 Signed-off-by: Benjamin Poirier bpoir...@suse.de
 ---
   drivers/net/ethernet/mellanox/mlx4/en_tx.c | 8 +---
   1 file changed, 5 insertions(+), 3 deletions(-)
 
 diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c 
 b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
 index 55f9f5c..8c234ec 100644
 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
 +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
 @@ -143,8 +143,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
  ring-hwtstamp_tx_type = priv-hwtstamp_config.tx_type;
  ring-queue_index = queue_index;
 -if (queue_index  priv-num_tx_rings_p_up  cpu_online(queue_index))
 -cpumask_set_cpu(queue_index, ring-affinity_mask);
 +if (queue_index  priv-num_tx_rings_p_up)
 +cpumask_set_cpu_local_first(queue_index,
 +priv-mdev-dev-numa_node,
 +ring-affinity_mask);
 Moving from cpumask_set_cpu to cpumask_set_cpu_local_first is great, but
 should come in a different commit, since
 the behavior of the XPS is changed here (xps_cpus[tx_ring[queue_index]] !=
 queue_index from now).
 Commit should state of this behavior change.
 Thanks a lot Benjamin.
  *pring = ring;
  return 0;
 @@ -213,7 +215,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
  err = mlx4_qp_to_ready(mdev-dev, ring-wqres.mtt, ring-context,
 ring-qp, ring-qp_state);
 -if (!user_prio  cpu_online(ring-queue_index))
 +if (!cpumask_empty(ring-affinity_mask))
  netif_set_xps_queue(priv-dev, ring-affinity_mask,
  ring-queue_index);
 
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND] mlx4: Fix tx ring affinity_mask creation

2015-04-28 Thread Benjamin Poirier

By default, the number of tx queues is limited by the number of online cpus
in mlx4_en_get_profile(). However, this limit no longer holds after the
ethtool .set_channels method has been called. In that situation, the driver
may access invalid bits of certain cpumask variables when queue_index =
nr_cpu_ids.

Signed-off-by: Benjamin Poirier bpoir...@suse.de
Acked-by: Ido Shamay i...@mellanox.com
Fixes: d03a68f (net/mlx4_en: Configure the XPS queue mapping on driver load)
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 1783705..f7bf312 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -143,8 +143,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring-hwtstamp_tx_type = priv-hwtstamp_config.tx_type;
ring-queue_index = queue_index;
 
-   if (queue_index  priv-num_tx_rings_p_up  cpu_online(queue_index))
-   cpumask_set_cpu(queue_index, ring-affinity_mask);
+   if (queue_index  priv-num_tx_rings_p_up)
+   cpumask_set_cpu_local_first(queue_index,
+   priv-mdev-dev-numa_node,
+   ring-affinity_mask);
 
*pring = ring;
return 0;
@@ -213,7 +215,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 
err = mlx4_qp_to_ready(mdev-dev, ring-wqres.mtt, ring-context,
   ring-qp, ring-qp_state);
-   if (!user_prio  cpu_online(ring-queue_index))
+   if (!cpumask_empty(ring-affinity_mask))
netif_set_xps_queue(priv-dev, ring-affinity_mask,
ring-queue_index);
 
-- 
2.3.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] mlx4: Fix tx ring affinity_mask creation

2015-04-27 Thread Benjamin Poirier

On 2015/04/13 17:22, Benjamin Poirier wrote:
 On 2015/04/12 10:03, Ido Shamay wrote:
  Hi Benjamin,
  
  On 4/10/2015 7:27 PM, Benjamin Poirier wrote:
  By default, the number of tx queues is limited by the number of online 
  cpus in
  mlx4_en_get_profile(). However, this limit no longer holds after the 
  ethtool
  .set_channels method has been called. In that situation, the driver may 
  access
  invalid bits of certain cpumask variables when queue_index  nr_cpu_ids.
  
  I must say I don't see the above issue with the current code.
  Whatever is the modified value of priv-num_tx_rings_p_up, it will set XPS
  only on queues which have
  been set with CPU affinity mask (no access to invalid bits).
 
 The problem is not with the call to netif_set_xps_queue() it is with the
 calls to cpu_online() and cpumask_set_cpu().
 
 For example, if the user calls `ethtool -L ethX tx 32`, queue_index in
 mlx4_en_create_tx_ring() can be up to 255. Depending on CONFIG_NR_CPUS
 and CONFIG_CPUMASK_OFFSTACK this may result in calls to cpu_online() and
 cpumask_set_cpu() with cpu = nr_cpumask_bits which is an invalid usage
 of the cpumask api. The driver will potentially read or write beyond the
 end of the bitmap. With CONFIG_CPUMASK_OFFSTACK=y and
 CONFIG_DEBUG_PER_CPU_MAPS=y, the aforementioned ethtool call on a system
 with 32 cpus triggers the warning in cpumask_check(). 
 

Mellanox, can you please
ack the patch as submitted, or
clarify what changes you'd like to see given my reply above, or
submit a fix of your own for this problem

Thanks,
-Benjamin

  
  It's true that when priv-num_tx_rings_p_up  nr_cpus. not all queues will
  be set with XPS.
  This is because the code tries to preserve 1:1 mapping of queues to cores,
  to avoid a double mapping
  of queues to cores.
  I guess it's ok to break the 1:1 mapping in this condition, but the commit
  message should say that instead
  of invalid bits. Please fix me if I'm wrong.
  
  Signed-off-by: Benjamin Poirier bpoir...@suse.de
  ---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 8 +---
1 file changed, 5 insertions(+), 3 deletions(-)
  
  diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c 
  b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
  index 55f9f5c..8c234ec 100644
  --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
  +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
  @@ -143,8 +143,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 ring-hwtstamp_tx_type = priv-hwtstamp_config.tx_type;
 ring-queue_index = queue_index;
  -  if (queue_index  priv-num_tx_rings_p_up  cpu_online(queue_index))
  -  cpumask_set_cpu(queue_index, ring-affinity_mask);
  +  if (queue_index  priv-num_tx_rings_p_up)
  +  cpumask_set_cpu_local_first(queue_index,
  +  priv-mdev-dev-numa_node,
  +  ring-affinity_mask);
  Moving from cpumask_set_cpu to cpumask_set_cpu_local_first is great, but
  should come in a different commit, since
  the behavior of the XPS is changed here (xps_cpus[tx_ring[queue_index]] !=
  queue_index from now).
  Commit should state of this behavior change.
  Thanks a lot Benjamin.
 *pring = ring;
 return 0;
  @@ -213,7 +215,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 err = mlx4_qp_to_ready(mdev-dev, ring-wqres.mtt, ring-context,
ring-qp, ring-qp_state);
  -  if (!user_prio  cpu_online(ring-queue_index))
  +  if (!cpumask_empty(ring-affinity_mask))
 netif_set_xps_queue(priv-dev, ring-affinity_mask,
 ring-queue_index);
  
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] mlx4_en: Use correct loop cursor in error path.

2015-04-29 Thread Benjamin Poirier

Signed-off-by: Benjamin Poirier bpoir...@suse.de
Fixes: 9e311e7 (net/mlx4_en: Use affinity hint)
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0f1afc0..bf173d7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1721,7 +1721,7 @@ mac_err:
 cq_err:
while (rx_index--) {
mlx4_en_deactivate_cq(priv, priv-rx_cq[rx_index]);
-   mlx4_en_free_affinity_hint(priv, i);
+   mlx4_en_free_affinity_hint(priv, rx_index);
}
for (i = 0; i  priv-rx_ring_num; i++)
mlx4_en_deactivate_rx_ring(priv, priv-rx_ring[i]);
-- 
2.3.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] mlx4_core: Fix fallback from MSI-X to INTx

2015-05-22 Thread Benjamin Poirier

The test in mlx4_load_one() to remove MLX4_FLAG_MSI_X expects mlx4_NOP() to
fail with -EBUSY. It is also necessary to avoid the reset since the device
is not fully reinitialized before calling mlx4_start_hca() a second time.

Note that this will also affect mlx4_test_interrupts(), the only other user
of MLX4_CMD_NOP.

Fixes: f5aef5a (net/mlx4_core: Activate reset flow upon fatal command cases)
Signed-off-by: Benjamin Poirier bpoir...@suse.de
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c 
b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 4f7dc04..529ef05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -714,8 +714,13 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 
in_param, u64 *out_param,
 msecs_to_jiffies(timeout))) {
mlx4_warn(dev, command 0x%x timed out (go bit not cleared)\n,
  op);
-   err = -EIO;
-   goto out_reset;
+   if (op == MLX4_CMD_NOP) {
+   err = -EBUSY;
+   goto out;
+   } else {
+   err = -EIO;
+   goto out_reset;
+   }
}
 
err = context-result;
-- 
2.3.5

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] e1000e msi-x fixes

2015-10-22 Thread Benjamin Poirier

Hi,

For this series:


Benjamin Poirier (2):
  e1000e: remove unreachable code
  e1000e: Fix msi-x interrupt automask

 drivers/net/ethernet/intel/e1000e/netdev.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)


The first patch is a cleanup but the second one is the real deal. Please
consider reading the description for that patch before proceeding. I
believe that the following simple tracing statements are helpful in
detecting the problem fixed by the second patch.

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 8881256..707a525 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1952,6 +1952,9 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *rx_ring = adapter->rx_ring;
+   struct e1000_hw *hw = >hw;
+
+   trace_printk("%s: rxq0 irq ims 0x%08x\n", netdev->name, er32(IMS));
 
/* Write the ITR value calculated at the end of the
 * previous interrupt.
@@ -1966,6 +1969,7 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(>napi);
+   trace_printk("%s: scheduling napi\n", netdev->name);
}
return IRQ_HANDLED;
 }
@@ -2672,6 +2676,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
struct net_device *poll_dev = adapter->netdev;
int tx_cleaned = 1, work_done = 0;
 
+   trace_printk("%s: poll starting ims 0x%08x\n", poll_dev->name,
+er32(IMS));
adapter = netdev_priv(poll_dev);
 
if (!adapter->msix_entries ||
@@ -2689,6 +2695,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
e1000_set_itr(adapter);
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, >state)) {
+   trace_printk("%s: will enable rxq0 irq\n",
+poll_dev->name);
if (adapter->msix_entries)
ew32(IMS, adapter->rx_ring->ims_val);
else

 8< 

With that patch but without the patches in this series we can see that rx irqs
occur at unexpected times:

  -0 [000] .Ns.  1986.887517: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1986.896654: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] d.h.  1986.896657: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] d.H.  1986.896662: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] ..s.  1986.896667: e1000e_poll: eth1: poll 
starting ims 0x0154
Warning: many interrupts (2) before napi
  -0 [000] ..s.  1986.896685: e1000e_poll: eth1: will enable 
rxq0 irq

  -0 [000] d.h.  1990.688870: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s.  1990.688875: e1000e_poll: eth1: poll 
starting ims 0x0154
  -0 [000] dNH.  1990.688913: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
Warning: interrupt inside napi
  -0 [000] .Ns.  1990.688916: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1990.729688: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154

Here's a typical sequence after applying the patches in this series. Notice
that ims is changed. Another printk at the end of e1000e_poll would show it to
be 0x0154.

  -0 [000] d.h.  3896.134376: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0144
  -0 [000] d.h.  3896.134379: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s.  3896.134384: e1000e_poll: eth1: poll 
starting ims 0x0144
  -0 [000] ..s.  3896.134398: e1000e_poll: eth1: will enable 
rxq0 irq

Finally, here's the script I used to generate the warnings above:

#!/usr/bin/python3

import sys
import re
import pprint


class NaE(Exception):
"Not an Event"
pass

class Event:
def __init__(self, line):
# sample events:
#  -0 [000] d.h.  2025.256536: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
#  -0 [000] d.h.  2025.256539: e1000_intr_msix_rx: eth1: 
scheduling napi
#  -0 [000] ..s.  2025.256544: e1000e_poll: eth1: poll 
starting ims 0x0154
#  -0 [000] ..s.  2025.256558: e1000e_poll: eth1: will enable 
rxq0 irq
retval = re.match(" +.*)>?-(?P[0-9]+) +\[(?P.*)\] 
(?P[^ ]+) +(?P[0-9.]+): (?P[^:]+): (?P[^:]+): 
(?P.*)", line)
if retval:
self.

[PATCH 2/2] e1000e: Fix msi-x interrupt automask

2015-10-22 Thread Benjamin Poirier

Since the introduction of 82574 support in e1000e, the driver has worked on
the assumption that msi-x interrupt generation is automatically disabled
after each irq. As it turns out, this is not the case. Currently, rx
interrupts can fire multiple times before and during napi processing. This
can be a problem for users because frames that arrive in a certain window
(after adapter->clean_rx() but before napi_complete_done() has cleared
NAPI_STATE_SCHED) generate an interrupt which does not lead to
napi_schedule(). These frames sit in the rx queue until another frame
arrives (a tcp retransmit for example).

While the EIAC and CTRL_EXT registers are properly configured for irq
automask, the modification of IAM in e1000_configure_msix() is what
prevents automask from working as intended.

This patch removes that erroneous write and fixes interrupt rearming for tx
and "other" interrupts. Since e1000_msix_other() reads ICR, all interrupts
must be rearmed in that function.

Reported-by: Frank Steiner <steiner-...@bio.ifi.lmu.de>
Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index a228167..8881256 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1921,7 +1921,8 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
 
 no_link_interrupt:
if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
+   ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+E1000_IMS_LSC);
 
return IRQ_HANDLED;
 }
@@ -1940,6 +1941,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused 
irq, void *data)
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
 
+   if (!test_bit(__E1000_DOWN, >state))
+   ew32(IMS, E1000_IMS_TXQ0);
+
return IRQ_HANDLED;
 }
 
@@ -2027,11 +2031,7 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
 
/* enable MSI-X PBA support */
ctrl_ext = er32(CTRL_EXT);
-   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
-   /* Auto-Mask Other interrupts upon ICR read */
-   ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
-   ctrl_ext |= E1000_CTRL_EXT_EIAME;
+   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
 }
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] e1000e: remove unreachable code

2015-10-22 Thread Benjamin Poirier

msi-x interrupts are not shared so there's no need to check if the
interrupt was really from this adapter.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0a854a4..a228167 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct e1000_hw *hw = >hw;
u32 icr = er32(ICR);
 
-   if (!(icr & E1000_ICR_INT_ASSERTED)) {
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_OTHER);
-   return IRQ_NONE;
-   }
-
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
 
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 3/4] e1000e: Do not write lsc to ics in msi-x mode

2015-11-09 Thread Benjamin Poirier

In msi-x mode, there is no handler for the lsc interrupt so there is no
point in writing that to ics now that we always assume Other interrupts
are caused by lsc.

Reviewed-by: Jasna Hodzic <jhod...@ucdavis.edu>
Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/defines.h |  3 ++-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 27 ---
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h 
b/drivers/net/ethernet/intel/e1000e/defines.h
index 133d407..f7c7804 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -441,12 +441,13 @@
 #define E1000_IMS_RXQ1  E1000_ICR_RXQ1  /* Rx Queue 1 Interrupt */
 #define E1000_IMS_TXQ0  E1000_ICR_TXQ0  /* Tx Queue 0 Interrupt */
 #define E1000_IMS_TXQ1  E1000_ICR_TXQ1  /* Tx Queue 1 Interrupt */
-#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */
+#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Interrupt Cause Set */
 #define E1000_ICS_LSC   E1000_ICR_LSC   /* Link Status Change */
 #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* Rx desc min. threshold */
+#define E1000_ICS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Transmit Descriptor Control */
 #define E1000_TXDCTL_PTHRESH 0x003F /* TXDCTL Prefetch Threshold */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index a73e323..ed7cc8e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4130,10 +4130,23 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 }
 
-int e1000e_up(struct e1000_adapter *adapter)
+/**
+ * e1000e_trigger_lsc - trigger an lsc interrupt
+ *
+ * Fire a link status change interrupt to start the watchdog.
+ **/
+static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
 {
struct e1000_hw *hw = >hw;
 
+   if (adapter->msix_entries)
+   ew32(ICS, E1000_ICS_OTHER);
+   else
+   ew32(ICS, E1000_ICS_LSC);
+}
+
+int e1000e_up(struct e1000_adapter *adapter)
+{
/* hardware has been reset, we need to reload some things */
e1000_configure(adapter);
 
@@ -4145,11 +4158,7 @@ int e1000e_up(struct e1000_adapter *adapter)
 
netif_start_queue(adapter->netdev);
 
-   /* fire a link change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 
return 0;
 }
@@ -4576,11 +4585,7 @@ static int e1000_open(struct net_device *netdev)
hw->mac.get_link_status = true;
pm_runtime_put(>dev);
 
-   /* fire a link status change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 
return 0;
 
-- 
2.6.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 4/4] e1000e: Fix msi-x interrupt automask

2015-11-09 Thread Benjamin Poirier

Since the introduction of 82574 support in e1000e, the driver has worked
on the assumption that msi-x interrupt generation is automatically
disabled after each irq. As it turns out, this is not the case.
Currently, rx interrupts can fire multiple times before and during napi
processing. This can be a problem for users because frames that arrive
in a certain window (after adapter->clean_rx() but before
napi_complete_done() has cleared NAPI_STATE_SCHED) generate an interrupt
which does not lead to napi_schedule(). These frames sit in the rx queue
until another frame arrives (a tcp retransmit for example).

While the EIAC and CTRL_EXT registers are properly configured for irq
automask, the modification of IAM in e1000_configure_msix() is what
prevents automask from working as intended.

This patch removes that erroneous write and fixes interrupt rearming for
tx interrupts. It also clears IAME from CTRL_EXT. This is not strictly
necessary for operation of the driver but it is to avoid disruption from
potential programs that access the registers directly, like `ethregs -c`.

Reported-by: Frank Steiner <steiner-...@bio.ifi.lmu.de>
Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index ed7cc8e..2a22ed7 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1931,6 +1931,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused 
irq, void *data)
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
 
+   if (!test_bit(__E1000_DOWN, >state))
+   ew32(IMS, adapter->tx_ring->ims_val);
+
return IRQ_HANDLED;
 }
 
@@ -2018,12 +2021,8 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
ew32(IVAR, ivar);
 
/* enable MSI-X PBA support */
-   ctrl_ext = er32(CTRL_EXT);
-   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
-   /* Auto-Mask Other interrupts upon ICR read */
-   ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
-   ctrl_ext |= E1000_CTRL_EXT_EIAME;
+   ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME;
+   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
 }
-- 
2.6.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/4] e1000e: Do not read icr in Other interrupt

2015-11-09 Thread Benjamin Poirier

removes the icr read in the other interrupt handler, uses eiac to
autoclear the Other bit from icr and ims. This allows us to avoid
interference with rx and tx interrupts in the Other interrupt handler.

The information read from icr is not needed. IMS is configured such that
the only interrupt cause that can trigger the Other interrupt is Link
Status Change.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>

---
I noticed a 8-16% improvement in netperf rr tests after applying this
patch. This is a little surprising since this patch touches the handling
of Other interrupts, which do not occur during such a test. Some
profiling was not very insightful but the improvement seems related to
writing Other to EIAC.
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 22 +++---
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index a228167..a73e323 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1905,24 +1905,15 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = >hw;
-   u32 icr = er32(ICR);
 
-   if (icr & adapter->eiac_mask)
-   ew32(ICS, (icr & adapter->eiac_mask));
+   hw->mac.get_link_status = true;
 
-   if (icr & E1000_ICR_OTHER) {
-   if (!(icr & E1000_ICR_LSC))
-   goto no_link_interrupt;
-   hw->mac.get_link_status = true;
-   /* guard against interrupt when we're going down */
-   if (!test_bit(__E1000_DOWN, >state))
-   mod_timer(>watchdog_timer, jiffies + 1);
+   /* guard against interrupt when we're going down */
+   if (!test_bit(__E1000_DOWN, >state)) {
+   mod_timer(>watchdog_timer, jiffies + 1);
+   ew32(IMS, E1000_IMS_OTHER);
}
 
-no_link_interrupt:
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
-
return IRQ_HANDLED;
 }
 
@@ -2019,6 +2010,7 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
   hw->hw_addr + E1000_EITR_82574(vector));
else
writel(1, hw->hw_addr + E1000_EITR_82574(vector));
+   adapter->eiac_mask |= E1000_IMS_OTHER;
 
/* Cause Tx interrupts on every write back */
ivar |= (1 << 31);
@@ -2247,7 +2239,7 @@ static void e1000_irq_enable(struct e1000_adapter 
*adapter)
 
if (adapter->msix_entries) {
ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-   ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+   ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
} else if ((hw->mac.type == e1000_pch_lpt) ||
   (hw->mac.type == e1000_pch_spt)) {
ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
-- 
2.6.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 0/4] e1000e msi-x fixes

2015-11-09 Thread Benjamin Poirier

Hi,

For this series:


Benjamin Poirier (4):
  e1000e: Remove unreachable code
  e1000e: Do not read icr in Other interrupt
  e1000e: Do not write lsc to ics in msi-x mode
  e1000e: Fix msi-x interrupt automask

 drivers/net/ethernet/intel/e1000e/defines.h |  3 +-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 66 -
 2 files changed, 30 insertions(+), 39 deletions(-)

Changes in v3:
Preserve LSC in IMS, LSC events are not delivered otherwise.
Disable CTRL_EXT.IAME to prevent IMC write on ICR read from external
program.

Changes in v2:
Address review comments from Alexander Duyck: extend cleanup of Other
interrupt handler and use tx_ring->ims_val.


The first three patches cleanup handling of Other interrupts and the
last patch fixes tx and rx interrupts. Please consider reading the
description for that patch before proceeding. I believe that the
following simple tracing statements are helpful in detecting the problem
fixed by the last patch.

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index a09d1e4..29b8c6e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1942,6 +1942,9 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *rx_ring = adapter->rx_ring;
+   struct e1000_hw *hw = >hw;
+
+   trace_printk("%s: rxq0 irq ims 0x%08x\n", netdev->name, er32(IMS));
 
/* Write the ITR value calculated at the end of the
 * previous interrupt.
@@ -1956,6 +1959,7 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(>napi);
+   trace_printk("%s: scheduling napi\n", netdev->name);
}
return IRQ_HANDLED;
 }
@@ -2663,6 +2667,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
struct net_device *poll_dev = adapter->netdev;
int tx_cleaned = 1, work_done = 0;
 
+   trace_printk("%s: poll starting ims 0x%08x\n", poll_dev->name,
+er32(IMS));
adapter = netdev_priv(poll_dev);
 
if (!adapter->msix_entries ||
@@ -2680,6 +2686,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
e1000_set_itr(adapter);
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, >state)) {
+   trace_printk("%s: will enable rxq0 irq\n",
+poll_dev->name);
if (adapter->msix_entries)
ew32(IMS, adapter->rx_ring->ims_val);
else

 8< 

With that patch but without the patches in this series we can see that rx irqs
occur at unexpected times:

  -0 [000] .Ns.  1986.887517: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1986.896654: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] d.h.  1986.896657: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] d.H.  1986.896662: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] ..s.  1986.896667: e1000e_poll: eth1: poll 
starting ims 0x0154
Warning: many interrupts (2) before napi
  -0 [000] ..s.  1986.896685: e1000e_poll: eth1: will enable 
rxq0 irq

  -0 [000] d.h.  1990.688870: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s.  1990.688875: e1000e_poll: eth1: poll 
starting ims 0x0154
  -0 [000] dNH.  1990.688913: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
Warning: interrupt inside napi
  -0 [000] .Ns.  1990.688916: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1990.729688: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154

Here's a typical sequence after applying the patches in this series. Notice
that ims is changed. Another printk at the end of e1000e_poll would show it to
be 0x0154.

  -0 [000] d.h. 23547.977917: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0144
  -0 [000] d.h. 23547.977922: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s. 23547.977928: e1000e_poll: eth1: poll 
starting ims 0x0144
  -0 [000] ..s. 23547.977961: e1000e_poll: eth1: will enable 
rxq0 irq

Finally, here's the script I used to generate the warnings above:

#!/usr/bin/python3

import sys
import re
import pprint


class NaE(Exception):
"Not an Event"
pass

class Event:
def __init__(self, line):
# sample events:
#  -0 [000] d.h.  2025.256536: e1000_intr_

[PATCH v3 1/4] e1000e: Remove unreachable code

2015-11-09 Thread Benjamin Poirier

msi-x interrupts are not shared so there's no need to check if the
interrupt was really from this adapter.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0a854a4..a228167 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct e1000_hw *hw = >hw;
u32 icr = er32(ICR);
 
-   if (!(icr & E1000_ICR_INT_ASSERTED)) {
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_OTHER);
-   return IRQ_NONE;
-   }
-
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
 
-- 
2.6.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2 2/4] e1000e: Do not read icr in Other interrupt

2015-11-04 Thread Benjamin Poirier

On 2015/10/30 12:19, Alexander Duyck wrote:
> On 10/30/2015 10:31 AM, Benjamin Poirier wrote:
> >Using eiac instead of reading icr allows us to avoid interference with
> >rx and tx interrupts in the Other interrupt handler.
> >
> >According to the 82574 datasheet section 10.2.4.1, interrupt causes that
> >trigger the Other interrupt are
> >1) Link Status Change.
> >2) Receiver Overrun.
> >3) MDIO Access Complete.
> >4) Small Receive Packet Detected.
> >5) Receive ACK Frame Detected.
> >6) Manageability Event Detected.
> >
> >Causes 3, 4, 5 are related to features which are not enabled by the
> >driver. Always assume that cause 1 is what triggered the Other interrupt
> >and set get_link_status. Cause 2 and 6 should be rare enough that the
> >extra cost of needlessly re-reading the link status is negligible.
> >
> >Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
> 
> You might want to instead use a write of LSC to the ICR instead of just
> using auto-clear and not enabling LSC.  My concern is that you might no
> longer be getting link status change events at all.  An easy test is to just
> unplug/plug the cable a few times, or run "ethtool -r" on the link partner
> if connected back to back.  You should see messages appear in the dmesg log
> indicating that the link state changed.
> 
> In addition you should probably clear the IAME bit in the CTRL_EXT register
> so that you don't risk masking the interrupts on the ICR read or write.

Thanks, your concern about not getting LSC events was right. After more
experimentation I noticed that in order for the Other interrupt to be
raised for each of these six conditions, the IMS bit for that condition
must also be set. I've restored setting LSC in IMS. OTOH, I don't see a
need to clear LSC from ICR. Even without an ICR read or write-to-clear
to clear the LSC bit, Other interrupts are raised to signal LSC events.

I'll wait for net-next to reopen and send v3.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 4/4] e1000e: Fix msi-x interrupt automask

2015-10-30 Thread Benjamin Poirier

Since the introduction of 82574 support in e1000e, the driver has worked
on the assumption that msi-x interrupt generation is automatically
disabled after each irq. As it turns out, this is not the case.
Currently, rx interrupts can fire multiple times before and during napi
processing. This can be a problem for users because frames that arrive
in a certain window (after adapter->clean_rx() but before
napi_complete_done() has cleared NAPI_STATE_SCHED) generate an interrupt
which does not lead to napi_schedule(). These frames sit in the rx queue
until another frame arrives (a tcp retransmit for example).

While the EIAC and CTRL_EXT registers are properly configured for irq
automask, the modification of IAM in e1000_configure_msix() is what
prevents automask from working as intended.

This patch removes that erroneous write and fixes interrupt rearming for
tx interrupts.

Reported-by: Frank Steiner <steiner-...@bio.ifi.lmu.de>
Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 639fbe8..b5549d1 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1932,6 +1932,9 @@ static irqreturn_t e1000_intr_msix_tx(int __always_unused 
irq, void *data)
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
 
+   if (!test_bit(__E1000_DOWN, >state))
+   ew32(IMS, adapter->tx_ring->ims_val);
+
return IRQ_HANDLED;
 }
 
@@ -2020,11 +2023,7 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
 
/* enable MSI-X PBA support */
ctrl_ext = er32(CTRL_EXT);
-   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
-
-   /* Auto-Mask Other interrupts upon ICR read */
-   ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
-   ctrl_ext |= E1000_CTRL_EXT_EIAME;
+   ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
 }
-- 
2.6.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 0/4] e1000e msi-x fixes

2015-10-30 Thread Benjamin Poirier

Hi,

For this series:


Benjamin Poirier (4):
  e1000e: Remove unreachable code
  e1000e: Do not read icr in Other interrupt
  e1000e: Do not write lsc to ics in msi-x mode
  e1000e: Fix msi-x interrupt automask

 drivers/net/ethernet/intel/e1000e/defines.h |  3 +-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 65 +
 2 files changed, 30 insertions(+), 38 deletions(-)

Changes in v2:
Address review comments from Alexander Duyck: extend cleanup of Other
interrupt handler and use tx_ring->ims_val.


The first three patches cleanup handling of Other interrupts and the
last patch fixes tx and rx interrupts. Please consider reading the
description for that patch before proceeding. I believe that the
following simple tracing statements are helpful in detecting the problem
fixed by the last patch.

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 8881256..707a525 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1952,6 +1952,9 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *rx_ring = adapter->rx_ring;
+   struct e1000_hw *hw = >hw;
+
+   trace_printk("%s: rxq0 irq ims 0x%08x\n", netdev->name, er32(IMS));
 
/* Write the ITR value calculated at the end of the
 * previous interrupt.
@@ -1966,6 +1969,7 @@ static irqreturn_t e1000_intr_msix_rx(int __always_unused 
irq, void *data)
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__napi_schedule(>napi);
+   trace_printk("%s: scheduling napi\n", netdev->name);
}
return IRQ_HANDLED;
 }
@@ -2672,6 +2676,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
struct net_device *poll_dev = adapter->netdev;
int tx_cleaned = 1, work_done = 0;
 
+   trace_printk("%s: poll starting ims 0x%08x\n", poll_dev->name,
+er32(IMS));
adapter = netdev_priv(poll_dev);
 
if (!adapter->msix_entries ||
@@ -2689,6 +2695,8 @@ static int e1000e_poll(struct napi_struct *napi, int 
weight)
e1000_set_itr(adapter);
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, >state)) {
+   trace_printk("%s: will enable rxq0 irq\n",
+poll_dev->name);
if (adapter->msix_entries)
ew32(IMS, adapter->rx_ring->ims_val);
else

 8< 

With that patch but without the patches in this series we can see that rx irqs
occur at unexpected times:

  -0 [000] .Ns.  1986.887517: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1986.896654: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] d.h.  1986.896657: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] d.H.  1986.896662: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
  -0 [000] ..s.  1986.896667: e1000e_poll: eth1: poll 
starting ims 0x0154
Warning: many interrupts (2) before napi
  -0 [000] ..s.  1986.896685: e1000e_poll: eth1: will enable 
rxq0 irq

  -0 [000] d.h.  1990.688870: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s.  1990.688875: e1000e_poll: eth1: poll 
starting ims 0x0154
  -0 [000] dNH.  1990.688913: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
Warning: interrupt inside napi
  -0 [000] .Ns.  1990.688916: e1000e_poll: eth1: will enable 
rxq0 irq
  -0 [000] d.h.  1990.729688: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154

Here's a typical sequence after applying the patches in this series. Notice
that ims is changed. Another printk at the end of e1000e_poll would show it to
be 0x0150.

  -0 [000] d.h. 672874.016104: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0140
  -0 [000] d.h. 672874.016107: e1000_intr_msix_rx: eth1: 
scheduling napi
  -0 [000] ..s. 672874.016112: e1000e_poll: eth1: poll 
starting ims 0x0140
  -0 [000] ..s. 672874.016126: e1000e_poll: eth1: will enable 
rxq0 irq

Finally, here's the script I used to generate the warnings above:

#!/usr/bin/python3

import sys
import re
import pprint


class NaE(Exception):
"Not an Event"
pass

class Event:
def __init__(self, line):
# sample events:
#  -0 [000] d.h.  2025.256536: e1000_intr_msix_rx: eth1: rxq0 
irq ims 0x0154
#  -0 [000] d.h.  2025.256539: e1000_intr_msix_rx: eth1: 
scheduling napi
#  -0

[PATCH v2 2/4] e1000e: Do not read icr in Other interrupt

2015-10-30 Thread Benjamin Poirier

Using eiac instead of reading icr allows us to avoid interference with
rx and tx interrupts in the Other interrupt handler.

According to the 82574 datasheet section 10.2.4.1, interrupt causes that
trigger the Other interrupt are
1) Link Status Change.
2) Receiver Overrun.
3) MDIO Access Complete.
4) Small Receive Packet Detected.
5) Receive ACK Frame Detected.
6) Manageability Event Detected.

Causes 3, 4, 5 are related to features which are not enabled by the
driver. Always assume that cause 1 is what triggered the Other interrupt
and set get_link_status. Cause 2 and 6 should be rare enough that the
extra cost of needlessly re-reading the link status is negligible.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index a228167..602fcc9 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1905,24 +1905,16 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = >hw;
-   u32 icr = er32(ICR);
 
-   if (icr & adapter->eiac_mask)
-   ew32(ICS, (icr & adapter->eiac_mask));
+   /* Assume that the Other interrupt was triggered by LSC */
+   hw->mac.get_link_status = true;
 
-   if (icr & E1000_ICR_OTHER) {
-   if (!(icr & E1000_ICR_LSC))
-   goto no_link_interrupt;
-   hw->mac.get_link_status = true;
-   /* guard against interrupt when we're going down */
-   if (!test_bit(__E1000_DOWN, >state))
-   mod_timer(>watchdog_timer, jiffies + 1);
+   /* guard against interrupt when we're going down */
+   if (!test_bit(__E1000_DOWN, >state)) {
+   mod_timer(>watchdog_timer, jiffies + 1);
+   ew32(IMS, E1000_IMS_OTHER);
}
 
-no_link_interrupt:
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
-
return IRQ_HANDLED;
 }
 
@@ -2019,6 +2011,7 @@ static void e1000_configure_msix(struct e1000_adapter 
*adapter)
   hw->hw_addr + E1000_EITR_82574(vector));
else
writel(1, hw->hw_addr + E1000_EITR_82574(vector));
+   adapter->eiac_mask |= E1000_IMS_OTHER;
 
/* Cause Tx interrupts on every write back */
ivar |= (1 << 31);
@@ -2247,7 +2240,7 @@ static void e1000_irq_enable(struct e1000_adapter 
*adapter)
 
if (adapter->msix_entries) {
ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-   ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+   ew32(IMS, adapter->eiac_mask);
} else if ((hw->mac.type == e1000_pch_lpt) ||
   (hw->mac.type == e1000_pch_spt)) {
ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
-- 
2.6.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 3/4] e1000e: Do not write lsc to ics in msi-x mode

2015-10-30 Thread Benjamin Poirier

In msi-x mode, there is no handler for the lsc interrupt so there is no
point in writing that to ics now that we always assume Other interrupts
are caused by lsc.

Reviewed-by: Jasna Hodzic <jhod...@ucdavis.edu>
Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/defines.h |  3 ++-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 27 ---
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h 
b/drivers/net/ethernet/intel/e1000e/defines.h
index 133d407..f7c7804 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -441,12 +441,13 @@
 #define E1000_IMS_RXQ1  E1000_ICR_RXQ1  /* Rx Queue 1 Interrupt */
 #define E1000_IMS_TXQ0  E1000_ICR_TXQ0  /* Tx Queue 0 Interrupt */
 #define E1000_IMS_TXQ1  E1000_ICR_TXQ1  /* Tx Queue 1 Interrupt */
-#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupts */
+#define E1000_IMS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Interrupt Cause Set */
 #define E1000_ICS_LSC   E1000_ICR_LSC   /* Link Status Change */
 #define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* Rx desc min. threshold */
+#define E1000_ICS_OTHER E1000_ICR_OTHER /* Other Interrupt */
 
 /* Transmit Descriptor Control */
 #define E1000_TXDCTL_PTHRESH 0x003F /* TXDCTL Prefetch Threshold */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 602fcc9..639fbe8 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4131,10 +4131,23 @@ void e1000e_reset(struct e1000_adapter *adapter)
 
 }
 
-int e1000e_up(struct e1000_adapter *adapter)
+/**
+ * e1000e_trigger_lsc - trigger a lsc interrupt
+ *
+ * Fire a link status change interrupt to start the watchdog.
+ **/
+static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
 {
struct e1000_hw *hw = >hw;
 
+   if (adapter->msix_entries)
+   ew32(ICS, E1000_ICS_OTHER);
+   else
+   ew32(ICS, E1000_ICS_LSC);
+}
+
+int e1000e_up(struct e1000_adapter *adapter)
+{
/* hardware has been reset, we need to reload some things */
e1000_configure(adapter);
 
@@ -4146,11 +4159,7 @@ int e1000e_up(struct e1000_adapter *adapter)
 
netif_start_queue(adapter->netdev);
 
-   /* fire a link change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 
return 0;
 }
@@ -4577,11 +4586,7 @@ static int e1000_open(struct net_device *netdev)
hw->mac.get_link_status = true;
pm_runtime_put(>dev);
 
-   /* fire a link status change interrupt to start the watchdog */
-   if (adapter->msix_entries)
-   ew32(ICS, E1000_ICS_LSC | E1000_ICR_OTHER);
-   else
-   ew32(ICS, E1000_ICS_LSC);
+   e1000e_trigger_lsc(adapter);
 
return 0;
 
-- 
2.6.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2 1/4] e1000e: Remove unreachable code

2015-10-30 Thread Benjamin Poirier

msi-x interrupts are not shared so there's no need to check if the
interrupt was really from this adapter.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c 
b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0a854a4..a228167 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1907,12 +1907,6 @@ static irqreturn_t e1000_msix_other(int __always_unused 
irq, void *data)
struct e1000_hw *hw = >hw;
u32 icr = er32(ICR);
 
-   if (!(icr & E1000_ICR_INT_ASSERTED)) {
-   if (!test_bit(__E1000_DOWN, >state))
-   ew32(IMS, E1000_IMS_OTHER);
-   return IRQ_NONE;
-   }
-
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
 
-- 
2.6.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH net-next] net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)

2016-06-16 Thread Benjamin Poirier

On 2016/06/13 11:46, Netanel Belgazal wrote:
[...]
> +
> +static int ena_set_coalesce(struct net_device *net_dev,
> + struct ethtool_coalesce *coalesce)
> +{
> + struct ena_adapter *adapter = netdev_priv(net_dev);
> + struct ena_com_dev *ena_dev = adapter->ena_dev;
> + int rc;
> +
> + if (!ena_com_interrupt_moderation_supported(ena_dev)) {
> + /* the devie doesn't support interrupt moderation */
> + return -EOPNOTSUPP;
> + }
> +
> + /* Note, adaptive coalescing settings are updated through sysfs */

I believe the usual approach is to use ethtool for these kinds of
settings, extending the interface if necessary.

> + if (coalesce->rx_coalesce_usecs_irq ||
> + coalesce->rx_max_coalesced_frames ||
> + coalesce->rx_max_coalesced_frames_irq ||
> + coalesce->tx_coalesce_usecs_irq ||
> + coalesce->tx_max_coalesced_frames ||
> + coalesce->tx_max_coalesced_frames_irq ||
> + coalesce->stats_block_coalesce_usecs ||
> + coalesce->use_adaptive_tx_coalesce ||
> + coalesce->pkt_rate_low ||
> + coalesce->rx_coalesce_usecs_low ||
> + coalesce->rx_max_coalesced_frames_low ||
> + coalesce->tx_coalesce_usecs_low ||
> + coalesce->tx_max_coalesced_frames_low ||
> + coalesce->pkt_rate_high ||
> + coalesce->rx_coalesce_usecs_high ||
> + coalesce->rx_max_coalesced_frames_high ||
> + coalesce->tx_coalesce_usecs_high ||
> + coalesce->tx_max_coalesced_frames_high ||
> + coalesce->rate_sample_interval)
> + return -EINVAL;
> +

[...]

> +
> +static ssize_t ena_store_small_copy_len(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t len)
> +{
> + struct ena_adapter *adapter = dev_get_drvdata(dev);
> + unsigned long small_copy_len;
> + struct ena_ring *rx_ring;
> + int err, i;
> +
> + err = kstrtoul(buf, 10, _copy_len);
> + if (err < 0)
> + return err;
> +
> + err = ena_validate_small_copy_len(adapter, small_copy_len);
> + if (err)
> + return err;
> +
> + rtnl_lock();
> + adapter->small_copy_len = small_copy_len;
> +
> + for (i = 0; i < adapter->num_queues; i++) {
> + rx_ring = >rx_ring[i];
> + rx_ring->rx_small_copy_len = small_copy_len;
> + }
> + rtnl_unlock();
> +
> + return len;
> +}
> +
> +static ssize_t ena_show_small_copy_len(struct device *dev,
> +struct device_attribute *attr, char *buf)
> +{
> + struct ena_adapter *adapter = dev_get_drvdata(dev);
> +
> + return sprintf(buf, "%d\n", adapter->small_copy_len);
> +}
> +
> +static DEVICE_ATTR(small_copy_len, S_IRUGO | S_IWUSR, 
> ena_show_small_copy_len,
> +ena_store_small_copy_len);

This is what many other drivers call (rx_)copybreak. Perhaps it's time
to add it to ethtool as well?

[PATCH] ipv6: Annotate change of locking mechanism for np->opt

2016-02-17 Thread Benjamin Poirier

follows up commit 45f6fad84cc3 ("ipv6: add complete rcu protection around
np->opt") which added mixed rcu/refcount protection to np->opt.

Given the current implementation of rcu_pointer_handoff(), this has no
effect at runtime.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 include/net/ipv6.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6570f37..f3c9857 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -259,8 +259,12 @@ static inline struct ipv6_txoptions *txopt_get(const 
struct ipv6_pinfo *np)
 
rcu_read_lock();
opt = rcu_dereference(np->opt);
-   if (opt && !atomic_inc_not_zero(>refcnt))
-   opt = NULL;
+   if (opt) {
+   if (!atomic_inc_not_zero(>refcnt))
+   opt = NULL;
+   else
+   opt = rcu_pointer_handoff(opt);
+   }
rcu_read_unlock();
return opt;
 }
-- 
2.7.0

Re: [PATCH 1/2] localmodconfig: Fix parsing of Kconfig "source" statements

2016-04-10 Thread Benjamin Poirier

On 2016/04/08 14:29, Steven Rostedt wrote:
> On Sat,  2 Apr 2016 10:55:21 -0700
> Benjamin Poirier <bpoir...@suse.com> wrote:
> 
> > The parameter of Kconfig "source" statements does not need to be quoted.
> > The current regex causes many kconfig files to be skipped and hence,
> > dependencies to be missed.
> > 
> > Also fix the whitespace repeat count.
> > 
> > Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
> 
> Thanks for sending this. I'll apply it. Should this be marked for
> stable? And if so, how far back?

The first problem dates back to the introduction of streamline_config.pl in
dcc6024 kconfig: add streamline_config.pl to scripts (v2.6.32-rc1)
The second problem started with
19e91b6 modsign: Allow external signing key to be specified (v4.3-rc1)

However, I'm not sure that adding the patch to stable is warranted. I
considered this with regards to the problem fixed by patch 1/2.

First, I searched for cases where dependency info that's currently missing
would lead to lead to symbols being erroneously deactivated but I did not find
any. Since these cases are not so obvious and are quite rare in general, it's
possible that I missed one but at this stage the problem is theoretical.

Second, even if there is such a case, I'm not sure that the problem is
"critical". streamline_config.pl may output an "invalid" config because
it misses some dependencies but the config will be fixed by the
invocation of silentoldconfig that comes right after in the
localmodconfig makefile rule. It might not be the config the user
wanted, but it will be valid.

However, while looking for such a case, I've noticed a few other issues in
streamline_config.pl and I'll send more patches shortly.

> 
> -- Steve
> 
> > ---
> >  scripts/kconfig/streamline_config.pl | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/scripts/kconfig/streamline_config.pl 
> > b/scripts/kconfig/streamline_config.pl
> > index f3d3fb4..7036ae3 100755
> > --- a/scripts/kconfig/streamline_config.pl
> > +++ b/scripts/kconfig/streamline_config.pl
> > @@ -188,7 +188,7 @@ sub read_kconfig {
> > $cont = 0;
> >  
> > # collect any Kconfig sources
> > -   if (/^source\s*"(.*)"/) {
> > +   if (/^source\s+"?([^"]+)/) {
> > my $kconfig = $1;
> > # prevent reading twice.
> > if (!defined($read_kconfigs{$kconfig})) {
> 
>

[PATCH 3/4] localmodconfig: Add missing $ to reference a variable

2016-04-10 Thread Benjamin Poirier

That is clearly what the original intention was. This does not change the
output .config but it prevents some useless processing.

! eq "m" is changed to the simpler eq "y"; symbols with values other than
m|y are not included in %orig_configs.

Signed-off-by: Benjamin Poirier <bpoir...@suse.com>
---
 scripts/kconfig/streamline_config.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/streamline_config.pl 
b/scripts/kconfig/streamline_config.pl
index f06972a..bbc160c 100755
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -454,7 +454,7 @@ sub parse_config_depends
$p =~ s/^[^$valid]*[$valid]+//;
 
# We only need to process if the depend config is a module
-   if (!defined($orig_configs{$conf}) || !$orig_configs{conf} eq "m") {
+   if (!defined($orig_configs{$conf}) || $orig_configs{$conf} eq "y") {
next;
}
 
-- 
2.7.4

1 2 3 4 >

1 - 100 of 384 matches

Mail list logo