[PATCH RESEND net-next] tracing: remove holes in events

2021-03-12 Thread Tony Lu
There are some holes in the event definitions, spaces are wasted. Based
on the analysis result of pahole and event format files, 22 events have
more than one hole. To change less and fix worst, 5 events are picked
up and fixed in this patch according the following rules.

Rules:

  - try not to affect reading habit and understanding of the fields;
  - can be completely fixed (all holes are removed);

NOTES:

  - changing the order of event fields breaks API compatibility,
programs should parse and determine the real data order, instead of
hard-coded the order of fields;
  - reduce holes as much as possible when adding / modifying;

Summary (#event_name #before -> #after):

 1. net_dev_start_xmit
5 holes (10 bytes) -> 0

 2. net_dev_rx_verbose_template
6 holes (17 bytes) -> 0

 3. tcp_probe
3 holes (8 bytes) -> 0

 4. qdisc_dequeue
2 holes (8 bytes) -> 0

 5. rpc_xdr_alignment
2 holes (8 bytes) -> 0

Link: https://www.spinics.net/lists/netdev/msg726308.html
Link: https://www.spinics.net/lists/netdev/msg726451.html
Cc: David Miller 
Cc: Eric Dumazet 
Cc: Steven Rostedt 
Cc: Ingo Molnar 
Suggested-by: Steven Rostedt 
Signed-off-by: Tony Lu 
---
 include/trace/events/net.h| 42 +--
 include/trace/events/qdisc.h  |  4 ++--
 include/trace/events/sunrpc.h |  4 ++--
 include/trace/events/tcp.h|  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 2399073c3afc..b1db7ab88d4b 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -20,18 +20,18 @@ TRACE_EVENT(net_dev_start_xmit,
TP_STRUCT__entry(
__string(   name,   dev->name   )
__field(u16,queue_mapping   )
+   __field(u16,protocol)
__field(const void *,   skbaddr )
+   __field(u8, ip_summed   )
__field(bool,   vlan_tagged )
__field(u16,vlan_proto  )
__field(u16,vlan_tci)
-   __field(u16,protocol)
-   __field(u8, ip_summed   )
+   __field(bool,   transport_offset_valid)
+   __field(u8, tx_flags)
__field(unsigned int,   len )
__field(unsigned int,   data_len)
__field(int,network_offset  )
-   __field(bool,   transport_offset_valid)
__field(int,transport_offset)
-   __field(u8, tx_flags)
__field(u16,gso_size)
__field(u16,gso_segs)
__field(u16,gso_type)
@@ -40,19 +40,19 @@ TRACE_EVENT(net_dev_start_xmit,
TP_fast_assign(
__assign_str(name, dev->name);
__entry->queue_mapping = skb->queue_mapping;
+   __entry->protocol = ntohs(skb->protocol);
__entry->skbaddr = skb;
+   __entry->ip_summed = skb->ip_summed;
__entry->vlan_tagged = skb_vlan_tag_present(skb);
__entry->vlan_proto = ntohs(skb->vlan_proto);
__entry->vlan_tci = skb_vlan_tag_get(skb);
-   __entry->protocol = ntohs(skb->protocol);
-   __entry->ip_summed = skb->ip_summed;
+   __entry->transport_offset_valid =
+   skb_transport_header_was_set(skb);
+   __entry->tx_flags = skb_shinfo(skb)->tx_flags;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
__entry->network_offset = skb_network_offset(skb);
-   __entry->transport_offset_valid =
-   skb_transport_header_was_set(skb);
__entry->transport_offset = skb_transport_offset(skb);
-   __entry->tx_flags = skb_shinfo(skb)->tx_flags;
__entry->gso_size = skb_shinfo(skb)->gso_size;
__entry->gso_segs = skb_shinfo(skb)->gso_segs;
__entry->gso_type = skb_shinfo(skb)->gso_type;
@@ -170,23 +170,23 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
TP_STRUCT__entry(
__string(   name,   skb->dev->name  )
__field(unsigned i

Re: [PATCH] tracing: remove holes in events

2021-03-11 Thread Tony Lu
On Thu, Mar 11, 2021 at 08:56:19AM -0500, Steven Rostedt wrote:
> On Thu, 11 Mar 2021 17:44:15 +0800
> Tony Lu  wrote:
> 
> > ---
> >  include/trace/events/net.h| 42 +--
> >  include/trace/events/qdisc.h  |  4 ++--
> >  include/trace/events/sunrpc.h |  4 ++--
> >  include/trace/events/tcp.h|  2 +-
> >  4 files changed, 26 insertions(+), 26 deletions(-)
> 
> 
> If all the above are owned by networking, then this patch needs to go
> through the networking tree.

Thanks, I will resend it.


Cheers,
Tony Lu

> 
> -- Steve


[PATCH] tracing: remove holes in events

2021-03-11 Thread Tony Lu
There are some holes in the event definitions, spaces are wasted. Based
on the analysis result of pahole and event format files, 22 events have
more than one hole. To change less and fix worst, 5 events are picked
up and fixed in this patch according the following rules.

Rules:

  - try not to affect reading habit and understanding of the fields;
  - can be completely fixed (all holes are removed);

NOTES:

  - changing the order of event fields breaks API compatibility,
programs should parse and determine the real data order at runtime,
instead of hard-coded the order of fields;
  - reduce holes as much as possible when adding / modifying;

Summary (#event_name #before -> #after):

 1. net_dev_start_xmit
5 holes (10 bytes) -> 0

 2. net_dev_rx_verbose_template
6 holes (17 bytes) -> 0

 3. tcp_probe
3 holes (8 bytes) -> 0

 4. qdisc_dequeue
2 holes (8 bytes) -> 0

 5. rpc_xdr_alignment
2 holes (8 bytes) -> 0

Link: https://www.spinics.net/lists/netdev/msg726308.html
Link: https://www.spinics.net/lists/netdev/msg726451.html
Suggested-by: Steven Rostedt 
Signed-off-by: Tony Lu 
---
 include/trace/events/net.h| 42 +--
 include/trace/events/qdisc.h  |  4 ++--
 include/trace/events/sunrpc.h |  4 ++--
 include/trace/events/tcp.h|  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 2399073c3afc..b1db7ab88d4b 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -20,18 +20,18 @@ TRACE_EVENT(net_dev_start_xmit,
TP_STRUCT__entry(
__string(   name,   dev->name   )
__field(u16,queue_mapping   )
+   __field(u16,protocol)
__field(const void *,   skbaddr )
+   __field(u8, ip_summed   )
__field(bool,   vlan_tagged )
__field(u16,vlan_proto  )
__field(u16,vlan_tci)
-   __field(u16,protocol)
-   __field(u8, ip_summed   )
+   __field(bool,   transport_offset_valid)
+   __field(u8, tx_flags)
__field(unsigned int,   len )
__field(unsigned int,   data_len)
__field(int,network_offset  )
-   __field(bool,   transport_offset_valid)
__field(int,transport_offset)
-   __field(u8, tx_flags)
__field(u16,gso_size)
__field(u16,gso_segs)
__field(u16,gso_type)
@@ -40,19 +40,19 @@ TRACE_EVENT(net_dev_start_xmit,
TP_fast_assign(
__assign_str(name, dev->name);
__entry->queue_mapping = skb->queue_mapping;
+   __entry->protocol = ntohs(skb->protocol);
__entry->skbaddr = skb;
+   __entry->ip_summed = skb->ip_summed;
__entry->vlan_tagged = skb_vlan_tag_present(skb);
__entry->vlan_proto = ntohs(skb->vlan_proto);
__entry->vlan_tci = skb_vlan_tag_get(skb);
-   __entry->protocol = ntohs(skb->protocol);
-   __entry->ip_summed = skb->ip_summed;
+   __entry->transport_offset_valid =
+   skb_transport_header_was_set(skb);
+   __entry->tx_flags = skb_shinfo(skb)->tx_flags;
__entry->len = skb->len;
__entry->data_len = skb->data_len;
__entry->network_offset = skb_network_offset(skb);
-   __entry->transport_offset_valid =
-   skb_transport_header_was_set(skb);
__entry->transport_offset = skb_transport_offset(skb);
-   __entry->tx_flags = skb_shinfo(skb)->tx_flags;
__entry->gso_size = skb_shinfo(skb)->gso_size;
__entry->gso_segs = skb_shinfo(skb)->gso_segs;
__entry->gso_type = skb_shinfo(skb)->gso_type;
@@ -170,23 +170,23 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
TP_STRUCT__entry(
__string(   name,   skb->dev->name  )
__field(unsigned int,   napi_id )
-   

Re: [PATCH] net: add net namespace inode for all net_dev events

2021-03-10 Thread Tony Lu
On Wed, Mar 10, 2021 at 11:31:12AM -0500, Steven Rostedt wrote:
> On Wed, 10 Mar 2021 17:03:40 +0800
> Tony Lu  wrote:
> 
> > I use pahole to read vmlinux.o directly with defconfig and
> > CONFIG_DEBUG_INFO enabled, the result shows 22 structs prefixed with
> > trace_event_raw_ that have at least one hole.
> 
> I was thinking of pahole too ;-)
> 
> But the information can also be captured from the format files with simple
> scripts as well. And perhaps be more tuned to see if there's actually a fix
> for them, and ignore reporting it if there is no fix, as all trace events
> are 4 byte aligned, and if we are off by one, sometimes it doesn't matter.

I am going to send a patch to fix this issue later. There are many
events have more than 1 holes, I am trying to pick up the events that are
really to be fixed.


Cheers,
Tony Lu

> 
> -- Steve


Re: [PATCH] net: add net namespace inode for all net_dev events

2021-03-10 Thread Tony Lu
On Wed, Mar 10, 2021 at 09:22:34AM +, Lorenz Bauer wrote:
> On Tue, 9 Mar 2021 at 20:12, Eric Dumazet  wrote:
> >
> > On 3/9/21 5:43 AM, Tony Lu wrote:
> > > There are lots of net namespaces on the host runs containers like k8s.
> > > It is very common to see the same interface names among different net
> > > namespaces, such as eth0. It is not possible to distinguish them without
> > > net namespace inode.
> > >
> > > This adds net namespace inode for all net_dev events, help us
> > > distinguish between different net devices.
> > >
> > > Output:
> > >   -0   [006] ..s.   133.306989: net_dev_xmit: 
> > > net_inum=4026531992 dev=eth0 skbaddr=11a87c68 len=54 rc=0
> > >
> > > Signed-off-by: Tony Lu 
> > > ---
> > >
> >
> > There was a proposal from Lorenz to use netns cookies (SO_NETNS_COOKIE) 
> > instead.
> >
> > They have a guarantee of being not reused.
> >
> > After 3d368ab87cf6681f9 ("net: initialize net->net_cookie at netns setup")
> > net->net_cookie is directly available.
> 
> The patch set is at
> https://lore.kernel.org/bpf/20210219154330.93615-1-...@cloudflare.com/
> but I decided to abandon it. I can work around my issue by comparing
> the netns inode of two processes, which is "good enough" for now.

Without the patch set, it is impossible to get net_cookie from
userspace, except bpf prog. AFAIK, netns inode has been widely used to
distinguish different netns, it is easy to use for docker
(/proc/${container_pid}/ns/net). It would be better to provide a unified
approach to do so.


Cheers,
Tony Lu

> 
> -- 
> Lorenz Bauer  |  Systems Engineer
> 6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
> 
> www.cloudflare.com


Re: [PATCH] net: add net namespace inode for all net_dev events

2021-03-10 Thread Tony Lu
On Tue, Mar 09, 2021 at 09:12:45PM +0100, Eric Dumazet wrote:
> 
> 
> On 3/9/21 5:43 AM, Tony Lu wrote:
> > There are lots of net namespaces on the host runs containers like k8s.
> > It is very common to see the same interface names among different net
> > namespaces, such as eth0. It is not possible to distinguish them without
> > net namespace inode.
> > 
> > This adds net namespace inode for all net_dev events, help us
> > distinguish between different net devices.
> > 
> > Output:
> >   -0   [006] ..s.   133.306989: net_dev_xmit: net_inum=4026531992 
> > dev=eth0 skbaddr=11a87c68 len=54 rc=0
> > 
> > Signed-off-by: Tony Lu 
> > ---
> >
> 
> There was a proposal from Lorenz to use netns cookies (SO_NETNS_COOKIE) 
> instead.
> 
> They have a guarantee of being not reused.
> 
> After 3d368ab87cf6681f9 ("net: initialize net->net_cookie at netns setup")
> net->net_cookie is directly available.

It looks better to identify ns with net_cookie rather than inode, and
get the value with NS_GET_COOKIE. I will switch net_inum to net_cookie
in the next patch.


Cheers,
Tony Lu

> 


Re: [PATCH] net: add net namespace inode for all net_dev events

2021-03-10 Thread Tony Lu
On Tue, Mar 09, 2021 at 12:40:11PM -0500, Steven Rostedt wrote:
> On Tue,  9 Mar 2021 12:43:50 +0800
> Tony Lu  wrote:
> 
> > There are lots of net namespaces on the host runs containers like k8s.
> > It is very common to see the same interface names among different net
> > namespaces, such as eth0. It is not possible to distinguish them without
> > net namespace inode.
> > 
> > This adds net namespace inode for all net_dev events, help us
> > distinguish between different net devices.
> > 
> > Output:
> >   -0   [006] ..s.   133.306989: net_dev_xmit: net_inum=4026531992 
> > dev=eth0 skbaddr=11a87c68 len=54 rc=0
> > 
> > Signed-off-by: Tony Lu 
> > ---
> >  include/trace/events/net.h | 35 +--
> >  1 file changed, 25 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/trace/events/net.h b/include/trace/events/net.h
> > index 2399073c3afc..a52f90d83411 100644
> > --- a/include/trace/events/net.h
> > +++ b/include/trace/events/net.h
> > @@ -35,6 +35,7 @@ TRACE_EVENT(net_dev_start_xmit,
> > __field(u16,gso_size)
> > __field(u16,gso_segs)
> > __field(u16,gso_type)
> > +   __field(unsigned int,   net_inum)
> > ),
> 
> This patch made me take a look at the net_dev_start_xmit trace event, and I
> see it's very "holy". That is, it has lots of holes in the structure.
> 
>   TP_STRUCT__entry(
>   __string(   name,   dev->name   )
>   __field(u16,queue_mapping   )
>   __field(const void *,   skbaddr )
>   __field(bool,   vlan_tagged )
>   __field(u16,vlan_proto  )
>   __field(u16,vlan_tci)
>   __field(u16,protocol)
>   __field(u8, ip_summed   )
>   __field(unsigned int,   len )
>   __field(unsigned int,   data_len)
>   __field(int,network_offset  )
>   __field(bool,   transport_offset_valid)
>   __field(int,transport_offset)
>   __field(u8, tx_flags)
>   __field(u16,gso_size)
>   __field(u16,gso_segs)
>   __field(u16,gso_type)
>   __field(unsigned int,   net_inum)
>   ),
> 
> If you look at /sys/kernel/tracing/events/net/net_dev_start_xmit/format
> 
> name: net_dev_start_xmit
> ID: 1581
> format:
>   field:unsigned short common_type;   offset:0;   size:2; 
> signed:0;
>   field:unsigned char common_flags;   offset:2;   size:1; 
> signed:0;
>   field:unsigned char common_preempt_count;   offset:3;   size:1; 
> signed:0;
>   field:int common_pid;   offset:4;   size:4; signed:1;
> 
>   field:__data_loc char[] name;   offset:8;   size:4; signed:1;
>   field:u16 queue_mapping;offset:12;  size:2; signed:0;
>   field:const void * skbaddr; offset:16;  size:8; signed:0;
> 
> Notice, queue_mapping is 2 bytes at offset 12 (ends at offset 14), but
> skbaddr starts at offset 16. That means there's two bytes wasted.
> 
>   field:bool vlan_tagged; offset:24;  size:1; signed:0;
>   field:u16 vlan_proto;   offset:26;  size:2; signed:0;
> 
> Another byte missing above (24 + 1 != 26)
> 
>   field:u16 vlan_tci; offset:28;  size:2; signed:0;
>   field:u16 protocol; offset:30;  size:2; signed:0;
>   field:u8 ip_summed; offset:32;  size:1; signed:0;
>   field:unsigned int len; offset:36;  size:4; signed:0;
> 
> Again another three bytes missing (32 + 1 != 36)
> 
>   field:unsigned int data_len;offset:40;  size:4; signed:0;
>   field:int network_offset;   offset:44;  size:4; signed:1;
>   field:bool transport_offset_valid;  offset:48;  size:1; 
> signed:0;
>   field:int transport_offset; offset:52;  size:4; signed:1;
> 
> Again, another 3 bytes missing (48 + 1 != 52)
> 
>   field:u8 tx_flags;

[PATCH] net: add net namespace inode for all net_dev events

2021-03-08 Thread Tony Lu
There are lots of net namespaces on the host runs containers like k8s.
It is very common to see the same interface names among different net
namespaces, such as eth0. It is not possible to distinguish them without
net namespace inode.

This adds net namespace inode for all net_dev events, help us
distinguish between different net devices.

Output:
  -0   [006] ..s.   133.306989: net_dev_xmit: net_inum=4026531992 
dev=eth0 skbaddr=11a87c68 len=54 rc=0

Signed-off-by: Tony Lu 
---
 include/trace/events/net.h | 35 +--
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 2399073c3afc..a52f90d83411 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -35,6 +35,7 @@ TRACE_EVENT(net_dev_start_xmit,
__field(u16,gso_size)
__field(u16,gso_segs)
__field(u16,gso_type)
+   __field(unsigned int,   net_inum)
),
 
TP_fast_assign(
@@ -56,10 +57,12 @@ TRACE_EVENT(net_dev_start_xmit,
__entry->gso_size = skb_shinfo(skb)->gso_size;
__entry->gso_segs = skb_shinfo(skb)->gso_segs;
__entry->gso_type = skb_shinfo(skb)->gso_type;
+   __entry->net_inum = dev_net(skb->dev)->ns.inum;
),
 
-   TP_printk("dev=%s queue_mapping=%u skbaddr=%p vlan_tagged=%d 
vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d len=%u 
data_len=%u network_offset=%d transport_offset_valid=%d transport_offset=%d 
tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x",
- __get_str(name), __entry->queue_mapping, __entry->skbaddr,
+   TP_printk("net_inum=%u dev=%s queue_mapping=%u skbaddr=%p 
vlan_tagged=%d vlan_proto=0x%04x vlan_tci=0x%04x protocol=0x%04x ip_summed=%d 
len=%u data_len=%u network_offset=%d transport_offset_valid=%d 
transport_offset=%d tx_flags=%d gso_size=%d gso_segs=%d gso_type=%#x",
+ __entry->net_inum, __get_str(name), __entry->queue_mapping,
+ __entry->skbaddr,
  __entry->vlan_tagged, __entry->vlan_proto, __entry->vlan_tci,
  __entry->protocol, __entry->ip_summed, __entry->len,
  __entry->data_len,
@@ -82,6 +85,7 @@ TRACE_EVENT(net_dev_xmit,
__field(unsigned int,   len )
__field(int,rc  )
__string(   name,   dev->name   )
+   __field(unsigned int,   net_inum)
),
 
TP_fast_assign(
@@ -89,10 +93,12 @@ TRACE_EVENT(net_dev_xmit,
__entry->len = skb_len;
__entry->rc = rc;
__assign_str(name, dev->name);
+   __entry->net_inum = dev_net(skb->dev)->ns.inum;
),
 
-   TP_printk("dev=%s skbaddr=%p len=%u rc=%d",
-   __get_str(name), __entry->skbaddr, __entry->len, __entry->rc)
+   TP_printk("net_inum=%u dev=%s skbaddr=%p len=%u rc=%d",
+   __entry->net_inum, __get_str(name), __entry->skbaddr,
+   __entry->len, __entry->rc)
 );
 
 TRACE_EVENT(net_dev_xmit_timeout,
@@ -106,16 +112,19 @@ TRACE_EVENT(net_dev_xmit_timeout,
__string(   name,   dev->name   )
__string(   driver, netdev_drivername(dev))
__field(int,queue_index )
+   __field(unsigned int,   net_inum)
),
 
TP_fast_assign(
__assign_str(name, dev->name);
__assign_str(driver, netdev_drivername(dev));
__entry->queue_index = queue_index;
+   __entry->net_inum = dev_net(dev)->ns.inum;
),
 
-   TP_printk("dev=%s driver=%s queue=%d",
-   __get_str(name), __get_str(driver), __entry->queue_index)
+   TP_printk("net_inum=%u dev=%s driver=%s queue=%d",
+   __entry->net_inum, __get_str(name), __get_str(driver),
+   __entry->queue_index)
 );
 
 DECLARE_EVENT_CLASS(net_dev_template,
@@ -128,16 +137,19 @@ DECLARE_EVENT_CLASS(net_dev_template,
__field(void *, skbaddr )
__field(unsigned int,   len )
__string(   name,   skb->dev->name  )
+   __field(unsigned int,   net_inum)
),
 
TP_fast_assign(
__entry->skbaddr = skb;
__entry->len = skb->len;
__assign_str(name, skb->dev->name);
+

[PATCH net-next 1/3] udp: introduce a new tracepoint for udp_send_skb

2019-05-29 Thread Tony Lu
This introduces a new tracepoint trace_udp_send, it will trace UDP
packets that are going to be send to the IP layer.

This exposes src and dst IP addresses and ports of the connection. We
could use kprobe or tcpdump to do similar things, however using tracepoint
makes it easier to use and to integrate into perf or ebpf.

Signed-off-by: Tony Lu 
---
 include/trace/events/udp.h | 81 ++
 net/ipv4/udp.c |  1 +
 net/ipv6/udp.c |  2 +
 3 files changed, 84 insertions(+)

diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h
index 336fe272889f..f2c26780e2a9 100644
--- a/include/trace/events/udp.h
+++ b/include/trace/events/udp.h
@@ -7,6 +7,38 @@
 
 #include 
 #include 
+#include 
+#include 
+#include 
+
+#define TP_STORE_V4MAPPED(__entry, saddr, daddr)   \
+   do {\
+   struct in6_addr *pin6;  \
+   \
+   pin6 = (struct in6_addr *)__entry->saddr_v6;\
+   ipv6_addr_set_v4mapped(saddr, pin6);\
+   pin6 = (struct in6_addr *)__entry->daddr_v6;\
+   ipv6_addr_set_v4mapped(daddr, pin6);\
+   } while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)  \
+   do {\
+   if (sk->sk_family == AF_INET6) {\
+   struct in6_addr *pin6;  \
+   \
+   pin6 = (struct in6_addr *)__entry->saddr_v6;\
+   *pin6 = saddr6; \
+   pin6 = (struct in6_addr *)__entry->daddr_v6;\
+   *pin6 = daddr6; \
+   } else {\
+   TP_STORE_V4MAPPED(__entry, saddr, daddr);   \
+   }   \
+   } while (0)
+#else
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)  \
+   TP_STORE_V4MAPPED(__entry, saddr, daddr)
+#endif
 
 TRACE_EVENT(udp_fail_queue_rcv_skb,
 
@@ -27,6 +59,55 @@ TRACE_EVENT(udp_fail_queue_rcv_skb,
TP_printk("rc=%d port=%hu", __entry->rc, __entry->lport)
 );
 
+DECLARE_EVENT_CLASS(udp_event_sk_skb,
+
+   TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+   TP_ARGS(sk, skb),
+
+   TP_STRUCT__entry(
+   __field(const void *, skbaddr)
+   __field(const void *, skaddr)
+   __field(__u16, sport)
+   __field(__u16, dport)
+   __array(__u8, saddr, 4)
+   __array(__u8, daddr, 4)
+   __array(__u8, saddr_v6, 16)
+   __array(__u8, daddr_v6, 16)
+   ),
+
+   TP_fast_assign(
+   struct inet_sock *inet = inet_sk(sk);
+   __be32 *p32;
+
+   __entry->skbaddr = skb;
+   __entry->skaddr = sk;
+
+   __entry->sport = ntohs(inet->inet_sport);
+   __entry->dport = ntohs(inet->inet_dport);
+
+   p32 = (__be32 *) __entry->saddr;
+   *p32 = inet->inet_saddr;
+
+   p32 = (__be32 *) __entry->daddr;
+   *p32 =  inet->inet_daddr;
+
+   TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+ sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+   ),
+
+   TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c 
daddrv6=%pI6c",
+ __entry->sport, __entry->dport, __entry->saddr,
+ __entry->daddr, __entry->saddr_v6, __entry->daddr_v6)
+);
+
+DEFINE_EVENT(udp_event_sk_skb, udp_send,
+
+   TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+   TP_ARGS(sk, skb)
+);
+
 #endif /* _TRACE_UDP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fb250ed53d4..3ff6fea9debe 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -898,6 +898,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 
*fl4,
uh->check = CSUM_MANGLED_0;
 
 send:
+   trace_udp_send(sk, skb);
err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 07fa579dfb96..3a26990d5dc8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -56,6 +56,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "udp_impl.h"
 
 static bool udp6_lib_exact_dif_match(st

[PATCH net-next 0/3] introduce two new tracepoints for udp

2019-05-29 Thread Tony Lu
This series introduces two new tracepoints trace_udp_send and
trace_udp_queue_rcv, and removes redundant new line from
tcp_event_sk_skb.

Tony Lu (3):
  udp: introduce a new tracepoint for udp_send_skb
  udp: introduce a new tracepoint for udp_queue_rcv_skb
  tcp: remove redundant new line from tcp_event_sk_skb

 include/trace/events/tcp.h |  2 +-
 include/trace/events/udp.h | 88 ++
 net/ipv4/udp.c |  2 +
 net/ipv6/udp.c |  3 ++
 4 files changed, 94 insertions(+), 1 deletion(-)

-- 
2.21.0



[PATCH net-next 2/3] udp: introduce a new tracepoint for udp_queue_rcv_skb

2019-05-29 Thread Tony Lu
This introduces a new tracepoint trace_udp_queue_rcv, it will trace UDP
packets that are going to be queued on the socket receive queue.

Signed-off-by: Tony Lu 
---
 include/trace/events/udp.h | 7 +++
 net/ipv4/udp.c | 1 +
 net/ipv6/udp.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h
index f2c26780e2a9..37daea5f7cb1 100644
--- a/include/trace/events/udp.h
+++ b/include/trace/events/udp.h
@@ -108,6 +108,13 @@ DEFINE_EVENT(udp_event_sk_skb, udp_send,
TP_ARGS(sk, skb)
 );
 
+DEFINE_EVENT(udp_event_sk_skb, udp_queue_rcv,
+
+   TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+   TP_ARGS(sk, skb)
+);
+
 #endif /* _TRACE_UDP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3ff6fea9debe..262d76559bd5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2238,6 +2238,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct 
sk_buff *skb,
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 inet_compute_pseudo);
 
+   trace_udp_queue_rcv(sk, skb);
ret = udp_queue_rcv_skb(sk, skb);
 
/* a return value > 0 means to resubmit the input, but
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3a26990d5dc8..49473c5d3c4b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -842,6 +842,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct 
sk_buff *skb,
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 ip6_compute_pseudo);
 
+   trace_udp_queue_rcv(sk, skb);
ret = udpv6_queue_rcv_skb(sk, skb);
 
/* a return value > 0 means to resubmit the input */
-- 
2.21.0



[PATCH net-next 3/3] tcp: remove redundant new line from tcp_event_sk_skb

2019-05-29 Thread Tony Lu
This removes '\n' from trace event class tcp_event_sk_skb to avoid
redundant new blank line and make output compact.

Signed-off-by: Tony Lu 
---
 include/trace/events/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 2bc9960a31aa..cf97f6339acb 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
  sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
),
 
-   TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c 
daddrv6=%pI6c state=%s\n",
+   TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c 
daddrv6=%pI6c state=%s",
  __entry->sport, __entry->dport, __entry->saddr, 
__entry->daddr,
  __entry->saddr_v6, __entry->daddr_v6,
  show_tcp_state_name(__entry->state))
-- 
2.21.0



RE: [PATCH v2] flush_icache_range: Export symbol to fix build errors

2014-08-20 Thread Tony Lu
>-Original Message-
>Fix building errors occuring due to a missing export of flush_icache_range()
>in
>
>kisskb.ellerman.id.au/kisskb/buildresult/11677809/
>
>ERROR: "flush_icache_range" [drivers/misc/lkdtm.ko] undefined!
>
>Signed-off-by: Pranith Kumar 
>Reported-by: Geert Uytterhoeven 
>CC: Andrew Morton 
>---
> arch/arc/mm/cache_arc700.c | 1 +
> arch/hexagon/mm/cache.c| 1 +
> arch/sh/mm/cache.c | 1 +
> arch/tile/kernel/smp.c | 1 +
> arch/xtensa/kernel/smp.c   | 1 +
> 5 files changed, 5 insertions(+)
>

For Tile,

Acked-by: Zhigang Lu 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH v2] flush_icache_range: Export symbol to fix build errors

2014-08-20 Thread Tony Lu
-Original Message-
Fix building errors occuring due to a missing export of flush_icache_range()
in

kisskb.ellerman.id.au/kisskb/buildresult/11677809/

ERROR: flush_icache_range [drivers/misc/lkdtm.ko] undefined!

Signed-off-by: Pranith Kumar bobby.pr...@gmail.com
Reported-by: Geert Uytterhoeven ge...@linux-m68k.org
CC: Andrew Morton a...@linux-foundation.org
---
 arch/arc/mm/cache_arc700.c | 1 +
 arch/hexagon/mm/cache.c| 1 +
 arch/sh/mm/cache.c | 1 +
 arch/tile/kernel/smp.c | 1 +
 arch/xtensa/kernel/smp.c   | 1 +
 5 files changed, 5 insertions(+)


For Tile,

Acked-by: Zhigang Lu z...@tilera.com
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [BUG] perf: can not resolve symbols for forked threads

2014-06-24 Thread Tony Lu
Thanks for your reply.

>-Original Message-
>From: Peter Zijlstra [mailto:pet...@infradead.org]
>Sent: Monday, June 23, 2014 5:13 PM
>To: Tony Lu
>Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo;
>linux-kernel@vger.kernel.org; Chris Metcalf
>Subject: Re: [BUG] perf: can not resolve symbols for forked threads
>
>On Thu, Jun 19, 2014 at 07:39:42AM +, Tony Lu wrote:
>> Hi
>>
>> I got the below output that shows perf can not resolve symbols for
>> forked threads. I did a system-wide collection from all CPUs after the
>> application hello run.
>
>There's no fork() in... :-)

Yes, but perf regards pthread_created threads as forked threads, and a 
PERF_RECORD_FORK event will be delivered when a thread is forked or cloned.

>
>> #include 
>> #include 
>> #include 
>> #include 
>> #define NUM_THREADS 5
>>
>> void foo(void)
>> {
>>   long i = 10;
>>   while (i--) {
>> ;
>>   }
>> }
>>
>> void *PrintHello(void *threadid)
>> {
>>long tid;
>>tid = (long)threadid;
>>printf("Hello World! It's me, thread #%ld!\n", tid);
>>foo();
>>pthread_exit(NULL);
>> }
>>
>> int main(int argc, char *argv[])
>> {
>>pthread_t threads[NUM_THREADS];
>>int rc;
>>long t;
>>for(t=0;t>  printf("In main: creating thread %ld\n", t);
>>  rc = pthread_create([t], NULL, PrintHello, (void *)t);
>>  if (rc){
>>printf("ERROR; return code from pthread_create() is %d\n", rc);
>>exit(-1);
>>}
>>  }
>>
>>/* Last thing that main() should do */
>>pthread_exit(NULL);
>> }
>
>That pthread_exit() is the problem; this results in:
>
>29456 pts/23   Zl 0:00  |   \_ [hello] 
>
>You want to wait for the threads to complete using pthread_join().
>
>I suspect the defunct state hides the process.

Yes, using pthread_join() can workaround this problem.
Does that mean the parent thread can not exit before the child threads? It 
still seems like a perf bug.

-Thanks
-Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [BUG] perf: can not resolve symbols for forked threads

2014-06-24 Thread Tony Lu
Thanks for your reply.

-Original Message-
From: Peter Zijlstra [mailto:pet...@infradead.org]
Sent: Monday, June 23, 2014 5:13 PM
To: Tony Lu
Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo;
linux-kernel@vger.kernel.org; Chris Metcalf
Subject: Re: [BUG] perf: can not resolve symbols for forked threads

On Thu, Jun 19, 2014 at 07:39:42AM +, Tony Lu wrote:
 Hi

 I got the below output that shows perf can not resolve symbols for
 forked threads. I did a system-wide collection from all CPUs after the
 application hello run.

There's no fork() in... :-)

Yes, but perf regards pthread_created threads as forked threads, and a 
PERF_RECORD_FORK event will be delivered when a thread is forked or cloned.


 #include pthread.h
 #include stdio.h
 #include stdlib.h
 #include unistd.h
 #define NUM_THREADS 5

 void foo(void)
 {
   long i = 10;
   while (i--) {
 ;
   }
 }

 void *PrintHello(void *threadid)
 {
long tid;
tid = (long)threadid;
printf(Hello World! It's me, thread #%ld!\n, tid);
foo();
pthread_exit(NULL);
 }

 int main(int argc, char *argv[])
 {
pthread_t threads[NUM_THREADS];
int rc;
long t;
for(t=0;tNUM_THREADS;t++){
  printf(In main: creating thread %ld\n, t);
  rc = pthread_create(threads[t], NULL, PrintHello, (void *)t);
  if (rc){
printf(ERROR; return code from pthread_create() is %d\n, rc);
exit(-1);
}
  }

/* Last thing that main() should do */
pthread_exit(NULL);
 }

That pthread_exit() is the problem; this results in:

29456 pts/23   Zl 0:00  |   \_ [hello] defunct

You want to wait for the threads to complete using pthread_join().

I suspect the defunct state hides the process.

Yes, using pthread_join() can workaround this problem.
Does that mean the parent thread can not exit before the child threads? It 
still seems like a perf bug.

-Thanks
-Tony
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[BUG] perf: can not resolve symbols for forked threads

2014-06-19 Thread Tony Lu
Hi

I got the below output that shows perf can not resolve symbols for forked 
threads. I did a system-wide collection from all CPUs after the application 
hello run.

-bash-4.1$ ./perf --version
perf version 3.16.rc1.ge99cfa2
-bash-4.1$ ./hello & ./perf record -a sleep 5
-bash-4.1$ ./perf report
# Overhead  Command   Shared Object Symbol
#   ...  ..  .
#
54.77%hello  [unknown]   [.] 0x00400610   
28.59%hello  [unknown]   [.] 0x00400615   
16.53%hello  [unknown]   [.] 0x0040061f   
 0.01%hello  [kernel.kallsyms]   [k] __rcu_process_callbacks  
 0.01% perf  [kernel.kallsyms]   [k] copy_user_generic_string 
 0.01%sleep  [kernel.kallsyms]   [k] filemap_fault


/* 
FILE: hello.c
** DESCRIPTION:
** A "hello world" Pthreads program to trigger this bug.
***/
#include 
#include 
#include 
#include 
#define NUM_THREADS 5

void foo(void)
{
  long i = 10;
  while (i--) {
;
  }
}

void *PrintHello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf("Hello World! It's me, thread #%ld!\n", tid);
   foo();
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   int rc;
   long t;
   for(t=0;thttp://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[BUG] perf: can not resolve symbols for forked threads

2014-06-19 Thread Tony Lu
Hi

I got the below output that shows perf can not resolve symbols for forked 
threads. I did a system-wide collection from all CPUs after the application 
hello run.

-bash-4.1$ ./perf --version
perf version 3.16.rc1.ge99cfa2
-bash-4.1$ ./hello  ./perf record -a sleep 5
-bash-4.1$ ./perf report
# Overhead  Command   Shared Object Symbol
#   ...  ..  .
#
54.77%hello  [unknown]   [.] 0x00400610   
28.59%hello  [unknown]   [.] 0x00400615   
16.53%hello  [unknown]   [.] 0x0040061f   
 0.01%hello  [kernel.kallsyms]   [k] __rcu_process_callbacks  
 0.01% perf  [kernel.kallsyms]   [k] copy_user_generic_string 
 0.01%sleep  [kernel.kallsyms]   [k] filemap_fault


/* 
FILE: hello.c
** DESCRIPTION:
** A hello world Pthreads program to trigger this bug.
***/
#include pthread.h
#include stdio.h
#include stdlib.h
#include unistd.h
#define NUM_THREADS 5

void foo(void)
{
  long i = 10;
  while (i--) {
;
  }
}

void *PrintHello(void *threadid)
{
   long tid;
   tid = (long)threadid;
   printf(Hello World! It's me, thread #%ld!\n, tid);
   foo();
   pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   int rc;
   long t;
   for(t=0;tNUM_THREADS;t++){
 printf(In main: creating thread %ld\n, t);
 rc = pthread_create(threads[t], NULL, PrintHello, (void *)t);
 if (rc){
   printf(ERROR; return code from pthread_create() is %d\n, rc);
   exit(-1);
   }
 }

   /* Last thing that main() should do */
   pthread_exit(NULL);
}

Thanks
-Tony
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 1/4] tile: Add support for handling PMC hardware

2014-03-06 Thread Tony Lu
>-Original Message-
>From: Peter Zijlstra [mailto:pet...@infradead.org]
>Sent: Thursday, March 06, 2014 5:11 PM
>To: Tony Lu
>Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo; Chris Metcalf;
>linux-kernel@vger.kernel.org
>Subject: Re: [PATCH 1/4] tile: Add support for handling PMC hardware
>
>On Thu, Mar 06, 2014 at 06:13:48AM +, Tony Lu wrote:
>
>The changelogs are all very sparse. But in general the stuff looks to
>have the right shape.
>
>It looks like you do software NMIs with interrupt priority levels; which
>is a perfectly fine way, other archs do the same.
>
>One thing I didn't spot is a tile irq_work implementation; perf likes to
>have one.

We do not have tile irq_work for now. It seems like perf works fine even 
without the arch irq_work. Anyway, we will investigate it, and submit it as a 
separate patch when it gets ready.

Thanks
-Tony

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro

2014-03-06 Thread Tony Lu
>-Original Message-
>From: Peter Zijlstra [mailto:pet...@infradead.org]
>Sent: Thursday, March 06, 2014 5:07 PM
>To: Tony Lu
>Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo; Chris Metcalf;
>linux-kernel@vger.kernel.org
>Subject: Re: [PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro
>
>On Thu, Mar 06, 2014 at 06:26:11AM +, Tony Lu wrote:
>> +static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
>> +[PERF_COUNT_HW_CACHE_OP_MAX]
>> +[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
>> +[C(L1D)] = {
>> +   [C(OP_READ)] = {
>> +   [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
>> +   [C(RESULT_MISS)] = 0x21, /* RD_MISS */
>> +   },
>> +   [C(OP_WRITE)] = {
>> +   [C(RESULT_ACCESS)] = 0x22, /* WR_MISS */
>> +   [C(RESULT_MISS)] = TILE_OP_UNSUPP,
>> +   },
>
>If that's the wr_miss event as the comment says, it should probably be
>listed one down.

Yes, you are right. Thanks for pointing it out. It should be:
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+   [C(RESULT_MISS)] = 0x22, /* WR_MISS */
+   },

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro

2014-03-06 Thread Tony Lu
-Original Message-
From: Peter Zijlstra [mailto:pet...@infradead.org]
Sent: Thursday, March 06, 2014 5:07 PM
To: Tony Lu
Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo; Chris Metcalf;
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro

On Thu, Mar 06, 2014 at 06:26:11AM +, Tony Lu wrote:
 +static const int tile_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
 +[PERF_COUNT_HW_CACHE_OP_MAX]
 +[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 +[C(L1D)] = {
 +   [C(OP_READ)] = {
 +   [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
 +   [C(RESULT_MISS)] = 0x21, /* RD_MISS */
 +   },
 +   [C(OP_WRITE)] = {
 +   [C(RESULT_ACCESS)] = 0x22, /* WR_MISS */
 +   [C(RESULT_MISS)] = TILE_OP_UNSUPP,
 +   },

If that's the wr_miss event as the comment says, it should probably be
listed one down.

Yes, you are right. Thanks for pointing it out. It should be:
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)] = TILE_OP_UNSUPP,
+   [C(RESULT_MISS)] = 0x22, /* WR_MISS */
+   },

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 1/4] tile: Add support for handling PMC hardware

2014-03-06 Thread Tony Lu
-Original Message-
From: Peter Zijlstra [mailto:pet...@infradead.org]
Sent: Thursday, March 06, 2014 5:11 PM
To: Tony Lu
Cc: Paul Mackerras; Ingo Molnar; Arnaldo Carvalho de Melo; Chris Metcalf;
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/4] tile: Add support for handling PMC hardware

On Thu, Mar 06, 2014 at 06:13:48AM +, Tony Lu wrote:

The changelogs are all very sparse. But in general the stuff looks to
have the right shape.

It looks like you do software NMIs with interrupt priority levels; which
is a perfectly fine way, other archs do the same.

One thing I didn't spot is a tile irq_work implementation; perf likes to
have one.

We do not have tile irq_work for now. It seems like perf works fine even 
without the arch irq_work. Anyway, we will investigate it, and submit it as a 
separate patch when it gets ready.

Thanks
-Tony

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4] perf tools: Allow building for tile

2014-03-05 Thread Tony Lu
Tested by building perf:
- Cross-compiled for tile on x86_64
- Built natively on tile

Signed-off-by: Zhigang Lu 
Signed-off-by: Chris Metcalf 
---
 tools/perf/config/Makefile.arch | 3 ++-
 tools/perf/perf.h   | 8 
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch
index fef8ae9..4b06719 100644
--- a/tools/perf/config/Makefile.arch
+++ b/tools/perf/config/Makefile.arch
@@ -5,7 +5,8 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e 
s/sun4u/sparc64/ \
   -e s/arm.*/arm/ -e s/sa110/arm/ \
   -e s/s390x/s390/ -e s/parisc64/parisc/ \
   -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
+  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
+  -e s/tile.*/tile/ )
 
 # Additional ARCH settings for x86
 ifeq ($(ARCH),i386)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e84fa26..75caf68 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -139,6 +139,14 @@
 #define CPUINFO_PROC   "core ID"
 #endif
 
+#ifdef __tile__
+#define mb()   asm volatile ("mf" ::: "memory")
+#define wmb()  asm volatile ("mf" ::: "memory")
+#define rmb()  asm volatile ("mf" ::: "memory")
+#define cpu_relax()asm volatile ("mfspr zero, PASS" ::: "memory")
+#define CPUINFO_PROC"model name"
+#endif
+
 #define barrier() asm volatile ("" ::: "memory")
 
 #ifndef cpu_relax
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro

2014-03-05 Thread Tony Lu
Add perf support for tile architecture.

Signed-off-by: Zhigang Lu 
Signed-off-by: Chris Metcalf 
---
 arch/tile/Kconfig  |2 +
 arch/tile/include/asm/perf_event.h |   22 +
 arch/tile/kernel/Makefile  |1 +
 arch/tile/kernel/irq.c |   18 +
 arch/tile/kernel/perf_event.c  | 1005 
 5 files changed, 1048 insertions(+)
 create mode 100644 arch/tile/include/asm/perf_event.h
 create mode 100644 arch/tile/kernel/perf_event.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 3067b15..31c8c62 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,6 +3,8 @@

 config TILE
def_bool y
+   select HAVE_PERF_EVENTS
+   select USE_PMC if PERF_EVENTS
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
diff --git a/arch/tile/include/asm/perf_event.h 
b/arch/tile/include/asm/perf_event.h
new file mode 100644
index 000..59c5b16
--- /dev/null
+++ b/arch/tile/include/asm/perf_event.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PERF_EVENT_H
+#define _ASM_TILE_PERF_EVENT_H
+
+#include 
+DECLARE_PER_CPU(u64, perf_irqs);
+
+unsigned long handle_syscall_link_address(void);
+#endif /* _ASM_TILE_PERF_EVENT_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 71d8353..21f77bf 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o
 else
 obj-$(CONFIG_PCI)  += pci.o
 endif
+obj-$(CONFIG_PERF_EVENTS)  += perf_event.o
 obj-$(CONFIG_USE_PMC)  += pmc.o
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 0586fdb..906a76b 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 

 /* Bit-flag stored in irq_desc->chip_data to indicate HW-cleared irqs. */
 #define IS_HW_CLEARED 1
@@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq)
 }

 /*
+ * /proc/interrupts printing:
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#ifdef CONFIG_PERF_EVENTS
+   int i;
+
+   seq_printf(p, "%*s: ", prec, "PMI");
+
+   for_each_online_cpu(i)
+   seq_printf(p, "%10llu ", per_cpu(perf_irqs, i));
+   seq_puts(p, "  perf_events\n");
+#endif
+   return 0;
+}
+
+/*
  * Generic, controller-independent functions:
  */

diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c
new file mode 100644
index 000..53ddb5d
--- /dev/null
+++ b/arch/tile/kernel/perf_event.c
@@ -0,0 +1,1005 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ * Perf_events support for Tile processor.
+ *
+ * This code is based upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner 
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra 
+ *  Copyright (C) 2009 Intel Corporation, 
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define TILE_MAX_COUNTERS  4
+
+#define PERF_COUNT_0_IDX   0
+#define PERF_COUNT_1_IDX   1
+#define AUX_PERF_COUNT_0_IDX   2
+#define AUX_PERF_COUNT_1_IDX   3
+
+struct cpu_hw_events {
+   int n_events;
+   struct perf_event   *events[TILE_MAX_COUNTERS]; /* counter order */
+   struct perf_event   *event_list[TILE_MAX_COUNTERS]; /* enabled
+   order */
+   int 

[PATCH 2/4] tile: Enable NMIs on return from handle_nmi() without errors

2014-03-05 Thread Tony Lu
NMI interrupts mask ALL interrupts before calling the handler,
so we need to unmask NMIs according to the value handle_nmi() returns.
If it returns zero, the NMIs should be re-enabled; if it returns
a non-zero error, the NMIs should be disabled.

Signed-off-by: Zhigang Lu 
Signed-off-by: Chris Metcalf 
---
 arch/tile/kernel/intvec_32.S | 11 +++
 arch/tile/kernel/intvec_64.S | 11 ++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 605ffbd..cdbda45 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -946,6 +946,13 @@ STD_ENTRY(interrupt_return)
bzt r30, .Lrestore_regs
 3:
 
+   /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */
+   {
+moveli r0, lo16(1 << (INT_PERF_COUNT - 32))
+bz r31, .Lrestore_regs
+   }
+   aulir0, r0, ha16(1 << (INT_AUX_PERF_COUNT - 32))
+   mtspr   SPR_INTERRUPT_MASK_RESET_K_1, r0
 
/*
 * We now commit to returning from this interrupt, since we will be
@@ -1171,6 +1178,10 @@ handle_nmi:
 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
}
FEEDBACK_REENTER(handle_nmi)
+   {
+movei  r30, 1
+seqr31, r0, zero
+   }
j   interrupt_return
STD_ENDPROC(handle_nmi)
 
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 8f892a5..5b67efc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -971,6 +971,15 @@ STD_ENTRY(interrupt_return)
beqzt   r30, .Lrestore_regs
 3:
 
+#if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT
+# error Bad interrupt assumption
+#endif
+   {
+movei  r0, 3   /* two adjacent bits for the PERF_COUNT mask */
+beqz   r31, .Lrestore_regs
+   }
+   shlir0, r0, INT_PERF_COUNT
+   mtspr   SPR_INTERRUPT_MASK_RESET_K, r0
 
/*
 * We now commit to returning from this interrupt, since we will be
@@ -1187,7 +1196,7 @@ handle_nmi:
FEEDBACK_REENTER(handle_nmi)
{
 movei  r30, 1
-move   r31, r0
+cmpeq  r31, r0, zero
}
j   interrupt_return
STD_ENDPROC(handle_nmi)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4] tile: Add support for handling PMC hardware

2014-03-05 Thread Tony Lu
The PMC module is used by perf_events, oprofile and watchdogs.

Signed-off-by: Zhigang Lu 
Signed-off-by: Chris Metcalf 
---
 arch/tile/Kconfig|   4 ++
 arch/tile/include/asm/pmc.h  |  64 +++
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/intvec_32.S |  13 ++---
 arch/tile/kernel/intvec_64.S |  13 ++---
 arch/tile/kernel/pmc.c   | 121 +++
 6 files changed, 204 insertions(+), 12 deletions(-)
 create mode 100644 arch/tile/include/asm/pmc.h
 create mode 100644 arch/tile/kernel/pmc.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index b3692ce..3067b15 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -66,6 +66,10 @@ config HUGETLB_SUPER_PAGES
 config GENERIC_TIME_VSYSCALL
def_bool y
 
+# Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled.
+config USE_PMC
+   bool
+
 # FIXME: tilegx can implement a more efficient rwsem.
 config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h
new file mode 100644
index 000..7ae3956
--- /dev/null
+++ b/arch/tile/include/asm/pmc.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PMC_H
+#define _ASM_TILE_PMC_H
+
+#include 
+
+#define TILE_BASE_COUNTERS 2
+
+/* Bitfields below are derived from SPR PERF_COUNT_CTL*/
+#ifndef __tilegx__
+/* PERF_COUNT_CTL on TILEPro */
+#define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL   (1 << 8) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV   (1 << 9) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK  0x7f/* 7 bits for event SEL,
+   COUNT_0_SEL */
+#define TILE_PLM_MASK  0x780   /* 4 bits priv level msks,
+   COUNT_0_MASK*/
+#define TILE_EVENT_MASK(TILE_SEL_MASK | TILE_PLM_MASK)
+
+#else /* __tilegx__*/
+/* PERF_COUNT_CTL on TILEGx*/
+#define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL   (1 << 11) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV   (1 << 12) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK  0x3f/* 6 bits for event SEL,
+   COUNT_0_SEL*/
+#define TILE_BOX_MASK  0x1c0   /* 3 bits box msks,
+   COUNT_0_BOX */
+#define TILE_PLM_MASK  0x3c00  /* 4 bits priv level msks,
+   COUNT_0_MASK */
+#define TILE_EVENT_MASK(TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK)
+#endif /* __tilegx__*/
+
+/* Takes register and fault number.  Returns error to disable the interrupt. */
+typedef int (*perf_irq_t)(struct pt_regs *, int);
+
+int userspace_perf_handler(struct pt_regs *regs, int fault);
+
+perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq);
+void release_pmc_hardware(void);
+
+unsigned long pmc_get_overflow(void);
+void pmc_ack_overflow(unsigned long status);
+
+void unmask_pmc_interrupts(void);
+void mask_pmc_interrupts(void);
+
+#endif /* _ASM_TILE_PMC_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 27a2bf3..71d8353 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o
 else
 obj-$(CONFIG_PCI)  += pci.o
 endif
+obj-$(CONFIG_USE_PMC)  += pmc.o
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
 obj-$(CONFIG_FUNCTION_TRACER)  += ftrace.o mcount_64.o
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 2cbe6d5..605ffbd 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -313,13 +313,13 @@ intvec_\vecname:
 movei  r3, 0
}
.else
-   .ifc \c_routine, op_handle_perf_interrupt
+   .ifc \c_routine, handle_perf_interrupt
{
 mfspr  r2, PERF_COUNT_STS
 movei  r3, -1   /* not used, but set for consistency */
}
.else
-   .ifc \c_routine, op_handle_aux_perf_interrupt
+   .ifc \c_routine, handle_perf_interrupt
{
 mfspr  r2, AUX_PERF_COUNT_STS
 movei  r3, -1   /* not used, but set for consistency */
@@ -1835,8 +1835,9 @@ int_unalign:
 /* Include .intrpt array of interrupt vectors */
.section ".intrpt", "ax"
 
-#define 

[PATCH 0/4] tile: Add perf_events support for tile

2014-03-05 Thread Tony Lu
From: Zhigang Lu 

This patchset adds perf support for tile architecture. The code is based on x86 
perf event code, and it has been tested on tile for 2 years.

Zhigang Lu (4):
  tile: Add support for handling PMC hardware
  tile: Enable NMIs on return from handle_nmi() without errors
  tile/perf: Support perf_events on tilegx and tilepro
  perf tools:  Allow building for tile

 arch/tile/Kconfig  |6 +
 arch/tile/include/asm/perf_event.h |   22 +
 arch/tile/include/asm/pmc.h|   64 +++
 arch/tile/kernel/Makefile  |2 +
 arch/tile/kernel/intvec_32.S   |   24 +-
 arch/tile/kernel/intvec_64.S   |   24 +-
 arch/tile/kernel/irq.c |   18 +
 arch/tile/kernel/perf_event.c  | 1005 
 arch/tile/kernel/pmc.c |  121 +
 tools/perf/config/Makefile.arch|3 +-
 tools/perf/perf.h  |8 +
 11 files changed, 1283 insertions(+), 14 deletions(-)
 create mode 100644 arch/tile/include/asm/perf_event.h
 create mode 100644 arch/tile/include/asm/pmc.h
 create mode 100644 arch/tile/kernel/perf_event.c
 create mode 100644 arch/tile/kernel/pmc.c

--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/4] tile: Add perf_events support for tile

2014-03-05 Thread Tony Lu
From: Zhigang Lu z...@tilera.com

This patchset adds perf support for tile architecture. The code is based on x86 
perf event code, and it has been tested on tile for 2 years.

Zhigang Lu (4):
  tile: Add support for handling PMC hardware
  tile: Enable NMIs on return from handle_nmi() without errors
  tile/perf: Support perf_events on tilegx and tilepro
  perf tools:  Allow building for tile

 arch/tile/Kconfig  |6 +
 arch/tile/include/asm/perf_event.h |   22 +
 arch/tile/include/asm/pmc.h|   64 +++
 arch/tile/kernel/Makefile  |2 +
 arch/tile/kernel/intvec_32.S   |   24 +-
 arch/tile/kernel/intvec_64.S   |   24 +-
 arch/tile/kernel/irq.c |   18 +
 arch/tile/kernel/perf_event.c  | 1005 
 arch/tile/kernel/pmc.c |  121 +
 tools/perf/config/Makefile.arch|3 +-
 tools/perf/perf.h  |8 +
 11 files changed, 1283 insertions(+), 14 deletions(-)
 create mode 100644 arch/tile/include/asm/perf_event.h
 create mode 100644 arch/tile/include/asm/pmc.h
 create mode 100644 arch/tile/kernel/perf_event.c
 create mode 100644 arch/tile/kernel/pmc.c

--
1.8.3.1
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4] tile: Add support for handling PMC hardware

2014-03-05 Thread Tony Lu
The PMC module is used by perf_events, oprofile and watchdogs.

Signed-off-by: Zhigang Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 arch/tile/Kconfig|   4 ++
 arch/tile/include/asm/pmc.h  |  64 +++
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/intvec_32.S |  13 ++---
 arch/tile/kernel/intvec_64.S |  13 ++---
 arch/tile/kernel/pmc.c   | 121 +++
 6 files changed, 204 insertions(+), 12 deletions(-)
 create mode 100644 arch/tile/include/asm/pmc.h
 create mode 100644 arch/tile/kernel/pmc.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index b3692ce..3067b15 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -66,6 +66,10 @@ config HUGETLB_SUPER_PAGES
 config GENERIC_TIME_VSYSCALL
def_bool y
 
+# Enable PMC if PERF_EVENTS, OPROFILE, or WATCHPOINTS are enabled.
+config USE_PMC
+   bool
+
 # FIXME: tilegx can implement a more efficient rwsem.
 config RWSEM_GENERIC_SPINLOCK
def_bool y
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h
new file mode 100644
index 000..7ae3956
--- /dev/null
+++ b/arch/tile/include/asm/pmc.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PMC_H
+#define _ASM_TILE_PMC_H
+
+#include linux/ptrace.h
+
+#define TILE_BASE_COUNTERS 2
+
+/* Bitfields below are derived from SPR PERF_COUNT_CTL*/
+#ifndef __tilegx__
+/* PERF_COUNT_CTL on TILEPro */
+#define TILE_CTL_EXCL_USER (1  7) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL   (1  8) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV   (1  9) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK  0x7f/* 7 bits for event SEL,
+   COUNT_0_SEL */
+#define TILE_PLM_MASK  0x780   /* 4 bits priv level msks,
+   COUNT_0_MASK*/
+#define TILE_EVENT_MASK(TILE_SEL_MASK | TILE_PLM_MASK)
+
+#else /* __tilegx__*/
+/* PERF_COUNT_CTL on TILEGx*/
+#define TILE_CTL_EXCL_USER (1  10) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL   (1  11) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV   (1  12) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK  0x3f/* 6 bits for event SEL,
+   COUNT_0_SEL*/
+#define TILE_BOX_MASK  0x1c0   /* 3 bits box msks,
+   COUNT_0_BOX */
+#define TILE_PLM_MASK  0x3c00  /* 4 bits priv level msks,
+   COUNT_0_MASK */
+#define TILE_EVENT_MASK(TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK)
+#endif /* __tilegx__*/
+
+/* Takes register and fault number.  Returns error to disable the interrupt. */
+typedef int (*perf_irq_t)(struct pt_regs *, int);
+
+int userspace_perf_handler(struct pt_regs *regs, int fault);
+
+perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq);
+void release_pmc_hardware(void);
+
+unsigned long pmc_get_overflow(void);
+void pmc_ack_overflow(unsigned long status);
+
+void unmask_pmc_interrupts(void);
+void mask_pmc_interrupts(void);
+
+#endif /* _ASM_TILE_PMC_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 27a2bf3..71d8353 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o
 else
 obj-$(CONFIG_PCI)  += pci.o
 endif
+obj-$(CONFIG_USE_PMC)  += pmc.o
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
 obj-$(CONFIG_FUNCTION_TRACER)  += ftrace.o mcount_64.o
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 2cbe6d5..605ffbd 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -313,13 +313,13 @@ intvec_\vecname:
 movei  r3, 0
}
.else
-   .ifc \c_routine, op_handle_perf_interrupt
+   .ifc \c_routine, handle_perf_interrupt
{
 mfspr  r2, PERF_COUNT_STS
 movei  r3, -1   /* not used, but set for consistency */
}
.else
-   .ifc \c_routine, op_handle_aux_perf_interrupt
+   .ifc \c_routine, handle_perf_interrupt
{
 mfspr  r2, AUX_PERF_COUNT_STS
 movei  r3, -1   /* not used, but set for consistency */
@@ -1835,8 +1835,9 @@ int_unalign:
 /* Include .intrpt array of interrupt vectors */

[PATCH 2/4] tile: Enable NMIs on return from handle_nmi() without errors

2014-03-05 Thread Tony Lu
NMI interrupts mask ALL interrupts before calling the handler,
so we need to unmask NMIs according to the value handle_nmi() returns.
If it returns zero, the NMIs should be re-enabled; if it returns
a non-zero error, the NMIs should be disabled.

Signed-off-by: Zhigang Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 arch/tile/kernel/intvec_32.S | 11 +++
 arch/tile/kernel/intvec_64.S | 11 ++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 605ffbd..cdbda45 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -946,6 +946,13 @@ STD_ENTRY(interrupt_return)
bzt r30, .Lrestore_regs
 3:
 
+   /* We are relying on INT_PERF_COUNT at 33, and AUX_PERF_COUNT at 48 */
+   {
+moveli r0, lo16(1  (INT_PERF_COUNT - 32))
+bz r31, .Lrestore_regs
+   }
+   aulir0, r0, ha16(1  (INT_AUX_PERF_COUNT - 32))
+   mtspr   SPR_INTERRUPT_MASK_RESET_K_1, r0
 
/*
 * We now commit to returning from this interrupt, since we will be
@@ -1171,6 +1178,10 @@ handle_nmi:
 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
}
FEEDBACK_REENTER(handle_nmi)
+   {
+movei  r30, 1
+seqr31, r0, zero
+   }
j   interrupt_return
STD_ENDPROC(handle_nmi)
 
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 8f892a5..5b67efc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -971,6 +971,15 @@ STD_ENTRY(interrupt_return)
beqzt   r30, .Lrestore_regs
 3:
 
+#if INT_PERF_COUNT + 1 != INT_AUX_PERF_COUNT
+# error Bad interrupt assumption
+#endif
+   {
+movei  r0, 3   /* two adjacent bits for the PERF_COUNT mask */
+beqz   r31, .Lrestore_regs
+   }
+   shlir0, r0, INT_PERF_COUNT
+   mtspr   SPR_INTERRUPT_MASK_RESET_K, r0
 
/*
 * We now commit to returning from this interrupt, since we will be
@@ -1187,7 +1196,7 @@ handle_nmi:
FEEDBACK_REENTER(handle_nmi)
{
 movei  r30, 1
-move   r31, r0
+cmpeq  r31, r0, zero
}
j   interrupt_return
STD_ENDPROC(handle_nmi)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] tile/perf: Support perf_events on tilegx and tilepro

2014-03-05 Thread Tony Lu
Add perf support for tile architecture.

Signed-off-by: Zhigang Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 arch/tile/Kconfig  |2 +
 arch/tile/include/asm/perf_event.h |   22 +
 arch/tile/kernel/Makefile  |1 +
 arch/tile/kernel/irq.c |   18 +
 arch/tile/kernel/perf_event.c  | 1005 
 5 files changed, 1048 insertions(+)
 create mode 100644 arch/tile/include/asm/perf_event.h
 create mode 100644 arch/tile/kernel/perf_event.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 3067b15..31c8c62 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -3,6 +3,8 @@

 config TILE
def_bool y
+   select HAVE_PERF_EVENTS
+   select USE_PMC if PERF_EVENTS
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_KVM if !TILEGX
diff --git a/arch/tile/include/asm/perf_event.h 
b/arch/tile/include/asm/perf_event.h
new file mode 100644
index 000..59c5b16
--- /dev/null
+++ b/arch/tile/include/asm/perf_event.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_PERF_EVENT_H
+#define _ASM_TILE_PERF_EVENT_H
+
+#include linux/percpu.h
+DECLARE_PER_CPU(u64, perf_irqs);
+
+unsigned long handle_syscall_link_address(void);
+#endif /* _ASM_TILE_PERF_EVENT_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 71d8353..21f77bf 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_PCI) += pci_gx.o
 else
 obj-$(CONFIG_PCI)  += pci.o
 endif
+obj-$(CONFIG_PERF_EVENTS)  += perf_event.o
 obj-$(CONFIG_USE_PMC)  += pmc.o
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 0586fdb..906a76b 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -21,6 +21,7 @@
 #include hv/drv_pcie_rc_intf.h
 #include arch/spr_def.h
 #include asm/traps.h
+#include linux/perf_event.h

 /* Bit-flag stored in irq_desc-chip_data to indicate HW-cleared irqs. */
 #define IS_HW_CLEARED 1
@@ -261,6 +262,23 @@ void ack_bad_irq(unsigned int irq)
 }

 /*
+ * /proc/interrupts printing:
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+#ifdef CONFIG_PERF_EVENTS
+   int i;
+
+   seq_printf(p, %*s: , prec, PMI);
+
+   for_each_online_cpu(i)
+   seq_printf(p, %10llu , per_cpu(perf_irqs, i));
+   seq_puts(p,   perf_events\n);
+#endif
+   return 0;
+}
+
+/*
  * Generic, controller-independent functions:
  */

diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c
new file mode 100644
index 000..53ddb5d
--- /dev/null
+++ b/arch/tile/kernel/perf_event.c
@@ -0,0 +1,1005 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ *
+ * Perf_events support for Tile processor.
+ *
+ * This code is based upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner t...@linutronix.de
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra pzijl...@redhat.com
+ *  Copyright (C) 2009 Intel Corporation, markus.t.metz...@intel.com
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ */
+
+#include linux/kprobes.h
+#include linux/kernel.h
+#include linux/kdebug.h
+#include linux/mutex.h
+#include linux/bitmap.h
+#include linux/irq.h
+#include linux/interrupt.h
+#include linux/perf_event.h
+#include linux/atomic.h
+#include asm/traps.h
+#include asm/stack.h
+#include asm/pmc.h
+#include hv/hypervisor.h
+
+#define TILE_MAX_COUNTERS  4
+
+#define PERF_COUNT_0_IDX   0
+#define PERF_COUNT_1_IDX   1
+#define AUX_PERF_COUNT_0_IDX   2
+#define AUX_PERF_COUNT_1_IDX   3
+

[PATCH 4/4] perf tools: Allow building for tile

2014-03-05 Thread Tony Lu
Tested by building perf:
- Cross-compiled for tile on x86_64
- Built natively on tile

Signed-off-by: Zhigang Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 tools/perf/config/Makefile.arch | 3 ++-
 tools/perf/perf.h   | 8 
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch
index fef8ae9..4b06719 100644
--- a/tools/perf/config/Makefile.arch
+++ b/tools/perf/config/Makefile.arch
@@ -5,7 +5,8 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e 
s/sun4u/sparc64/ \
   -e s/arm.*/arm/ -e s/sa110/arm/ \
   -e s/s390x/s390/ -e s/parisc64/parisc/ \
   -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
+  -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
+  -e s/tile.*/tile/ )
 
 # Additional ARCH settings for x86
 ifeq ($(ARCH),i386)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e84fa26..75caf68 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -139,6 +139,14 @@
 #define CPUINFO_PROC   core ID
 #endif
 
+#ifdef __tile__
+#define mb()   asm volatile (mf ::: memory)
+#define wmb()  asm volatile (mf ::: memory)
+#define rmb()  asm volatile (mf ::: memory)
+#define cpu_relax()asm volatile (mfspr zero, PASS ::: memory)
+#define CPUINFO_PROCmodel name
+#endif
+
 #define barrier() asm volatile ( ::: memory)
 
 #ifndef cpu_relax
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] perf tools: Fix the logic of thread__fork

2014-02-10 Thread Tony Lu
Thanks, I see. I was using the mainline tree, but it was just updated to 
3.13-rc8.

>-Original Message-
>From: Arnaldo Carvalho de Melo [mailto:arnaldo.m...@gmail.com] On Behalf Of
>Arnaldo Carvalho de Melo
>Sent: Monday, February 10, 2014 8:39 PM
>To: Tony Lu
>Cc: Peter Zijlstra; Paul Mackerras; Ingo Molnar; linux-kernel@vger.kernel.org;
>Chris Metcalf
>Subject: Re: [PATCH] perf tools: Fix the logic of thread__fork
>
>Em Mon, Feb 10, 2014 at 04:21:23AM +, Tony Lu escreveu:
>> Revert the logic of checking the return value of thread__set_comm().
>>
>> If thread__set_comm() returns zero without errors, we should not return
>> immediately, instead we should finish the rest of the clone work.
>> Otherwise, perf report would fail to resolve symbols sampled in forked
>> threads.
>
>You should mention against which tree your patch is supposed to be
>applied, in this case a fix was made in december:
>
>[acme@ssdandy linux]$ git show 8d00be815c05ed0f0202f606bab4e54f98fd3b30
>commit 8d00be815c05ed0f0202f606bab4e54f98fd3b30
>Author: David Ahern 
>Date:   Tue Dec 10 21:35:38 2013 -0700
>
>perf tools: Fix inverted error verification bug in thread__fork
>
>Commit 1902efe7f for the new comm infra added the wrong check for return
>code on thread__set_comm. err == 0 is normal, so don't return at that
>point unless err != 0.
>
>Signed-off-by: David Ahern 
>Cc: Frederic Weisbecker 
>Link:
>http://lkml.kernel.org/r/1386736538-23525-1-git-send-email-dsah...@gmail.co
>m
>Signed-off-by: Arnaldo Carvalho de Melo 
>
>> Signed-off-by: Zhigang Lu 
>> ---
>>  tools/perf/util/thread.c |2 +-
>>  1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
>> index 49eaf1d..e394861 100644
>> --- a/tools/perf/util/thread.c
>> +++ b/tools/perf/util/thread.c
>> @@ -126,7 +126,7 @@ int thread__fork(struct thread *thread, struct thread
>*parent, u64 timestamp)
>>  if (!comm)
>>  return -ENOMEM;
>>  err = thread__set_comm(thread, comm, timestamp);
>> -if (!err)
>> +if (err)
>>  return err;
>>  thread->comm_set = true;
>>  }
>> --
>> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] perf tools: Fix the logic of thread__fork

2014-02-10 Thread Tony Lu
Thanks, I see. I was using the mainline tree, but it was just updated to 
3.13-rc8.

-Original Message-
From: Arnaldo Carvalho de Melo [mailto:arnaldo.m...@gmail.com] On Behalf Of
Arnaldo Carvalho de Melo
Sent: Monday, February 10, 2014 8:39 PM
To: Tony Lu
Cc: Peter Zijlstra; Paul Mackerras; Ingo Molnar; linux-kernel@vger.kernel.org;
Chris Metcalf
Subject: Re: [PATCH] perf tools: Fix the logic of thread__fork

Em Mon, Feb 10, 2014 at 04:21:23AM +, Tony Lu escreveu:
 Revert the logic of checking the return value of thread__set_comm().

 If thread__set_comm() returns zero without errors, we should not return
 immediately, instead we should finish the rest of the clone work.
 Otherwise, perf report would fail to resolve symbols sampled in forked
 threads.

You should mention against which tree your patch is supposed to be
applied, in this case a fix was made in december:

[acme@ssdandy linux]$ git show 8d00be815c05ed0f0202f606bab4e54f98fd3b30
commit 8d00be815c05ed0f0202f606bab4e54f98fd3b30
Author: David Ahern dsah...@gmail.com
Date:   Tue Dec 10 21:35:38 2013 -0700

perf tools: Fix inverted error verification bug in thread__fork

Commit 1902efe7f for the new comm infra added the wrong check for return
code on thread__set_comm. err == 0 is normal, so don't return at that
point unless err != 0.

Signed-off-by: David Ahern dsah...@gmail.com
Cc: Frederic Weisbecker fweis...@gmail.com
Link:
http://lkml.kernel.org/r/1386736538-23525-1-git-send-email-dsah...@gmail.co
m
Signed-off-by: Arnaldo Carvalho de Melo a...@redhat.com

 Signed-off-by: Zhigang Lu z...@tilera.com
 ---
  tools/perf/util/thread.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
 index 49eaf1d..e394861 100644
 --- a/tools/perf/util/thread.c
 +++ b/tools/perf/util/thread.c
 @@ -126,7 +126,7 @@ int thread__fork(struct thread *thread, struct thread
*parent, u64 timestamp)
  if (!comm)
  return -ENOMEM;
  err = thread__set_comm(thread, comm, timestamp);
 -if (!err)
 +if (err)
  return err;
  thread-comm_set = true;
  }
 --
 1.7.1
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] perf tools: Fix the logic of thread__fork

2014-02-09 Thread Tony Lu
Revert the logic of checking the return value of thread__set_comm().

If thread__set_comm() returns zero without errors, we should not return
immediately, instead we should finish the rest of the clone work.
Otherwise, perf report would fail to resolve symbols sampled in forked
threads.

Signed-off-by: Zhigang Lu 
---
 tools/perf/util/thread.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 49eaf1d..e394861 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -126,7 +126,7 @@ int thread__fork(struct thread *thread, struct thread 
*parent, u64 timestamp)
if (!comm)
return -ENOMEM;
err = thread__set_comm(thread, comm, timestamp);
-   if (!err)
+   if (err)
return err;
thread->comm_set = true;
}
-- 
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] perf tools: Fix the logic of thread__fork

2014-02-09 Thread Tony Lu
Revert the logic of checking the return value of thread__set_comm().

If thread__set_comm() returns zero without errors, we should not return
immediately, instead we should finish the rest of the clone work.
Otherwise, perf report would fail to resolve symbols sampled in forked
threads.

Signed-off-by: Zhigang Lu z...@tilera.com
---
 tools/perf/util/thread.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 49eaf1d..e394861 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -126,7 +126,7 @@ int thread__fork(struct thread *thread, struct thread 
*parent, u64 timestamp)
if (!comm)
return -ENOMEM;
err = thread__set_comm(thread, comm, timestamp);
-   if (!err)
+   if (err)
return err;
thread-comm_set = true;
}
-- 
1.7.1
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ftrace: default to tilegx if ARCH=tile is specified

2013-12-05 Thread Tony Lu
This matches the existing behavior in arch/tile/Makefile for defconfig.

Reported-by: fengguang...@intel.com
Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
Chris Metcalf will push this change via the tile tree.

 scripts/recordmcount.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d0da66396f62..91280b82da08 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -364,7 +364,8 @@ if ($arch eq "x86_64") {
 } elsif ($arch eq "blackfin") {
 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$";
 $mcount_adjust = -4;
-} elsif ($arch eq "tilegx") {
+} elsif ($arch eq "tilegx" || $arch eq "tile") {
+# Default to the newer TILE-Gx architecture if only "tile" is given.
 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$";
 $type = ".quad";
 $alignment = 8;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ftrace: default to tilegx if ARCH=tile is specified

2013-12-05 Thread Tony Lu
This matches the existing behavior in arch/tile/Makefile for defconfig.

Reported-by: fengguang...@intel.com
Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
Chris Metcalf will push this change via the tile tree.

 scripts/recordmcount.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d0da66396f62..91280b82da08 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -364,7 +364,8 @@ if ($arch eq x86_64) {
 } elsif ($arch eq blackfin) {
 $mcount_regex = ^\\s*([0-9a-fA-F]+):.*\\s__mcount\$;
 $mcount_adjust = -4;
-} elsif ($arch eq tilegx) {
+} elsif ($arch eq tilegx || $arch eq tile) {
+# Default to the newer TILE-Gx architecture if only tile is given.
 $mcount_regex = ^\\s*([0-9a-fA-F]+):.*\\s__mcount\$;
 $type = .quad;
 $alignment = 8;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] tile: support kprobes on tilegx

2013-08-13 Thread Tony Lu
This change includes support for Kprobes, Jprobes and Return Probes.

Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
v2: implement Masami Hiramatsu's suggestion to add an insn_has_control()
check to disallow placing probes on instructions that modify control flow.
We can improve this in a later change if it seems useful.

 arch/tile/Kconfig|   2 +
 arch/tile/include/asm/Kbuild |   1 -
 arch/tile/include/asm/kdebug.h   |  28 ++
 arch/tile/include/asm/kprobes.h  |  79 
 arch/tile/include/asm/ptrace.h   |   1 +
 arch/tile/include/uapi/arch/opcode_tilegx.h  |   1 +
 arch/tile/include/uapi/arch/opcode_tilepro.h |   1 +
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/kprobes.c   | 528 +++
 arch/tile/kernel/smp.c   |  14 +-
 arch/tile/kernel/traps.c |  42 +++
 arch/tile/kernel/vmlinux.lds.S   |   1 +
 arch/tile/mm/fault.c |  12 +
 samples/kprobes/kprobe_example.c |   9 +
 14 files changed, 716 insertions(+), 4 deletions(-)
 create mode 100644 arch/tile/include/asm/kdebug.h
 create mode 100644 arch/tile/include/asm/kprobes.h
 create mode 100644 arch/tile/kernel/kprobes.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e1600be..ecff467 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -125,6 +125,8 @@ config TILEGX
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b17b9b8..4c0b3c2 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += kdebug.h
 generic-y += local.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 000..5bbbfa9
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
+
+#include 
+
+enum die_val {
+   DIE_OOPS = 1,
+   DIE_BREAK,
+   DIE_SSTEPBP,
+   DIE_PAGE_FAULT,
+   DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 000..d8f9a83
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include 
+#include 
+#include 
+
+#include 
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE  2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+   flush_icache_range((unsigned long)p->addr,  \
+  (unsigned long)p->addr + \
+  (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+   kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+   struct kprobe *kp;
+   unsigned long status;
+   unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+   (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+   - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR)   \
+   ((

[PATCH v2] tile: support ASLR fully

2013-08-13 Thread Tony Lu
With this change, tile Linux now supports address-space layout
randomization for shared objects, stack, heap and vdso.

Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
v2: implement Jiri Kosina's suggestion to copy the math in
mmap_rnd() for x86.

 arch/tile/include/asm/elf.h |  4 
 arch/tile/mm/mmap.c | 24 ++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 31d854f..e1da88e 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -137,6 +137,10 @@ do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
 } while (0)
 
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #ifdef CONFIG_COMPAT
 
 #define COMPAT_ELF_PLATFORM "tilegx-m32"
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index d67d91e..4b99344 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,16 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 #else
int is_32bit = 0;
 #endif
+   unsigned long random_factor = 0UL;
+
+   /*
+*  8 bits of randomness in 32bit mmaps, 24 address space bits
+* 12 bits of randomness in 64bit mmaps, 28 address space bits
+*/
+   if (current->flags & PF_RANDOMIZE) {
+   if (is_32bit)
+   random_factor = get_random_int() % (1<<8);
+   else
+   random_factor = get_random_int() % (1<<12);
+   
+   random_factor <<= PAGE_SHIFT;
+   }
 
/*
 * Use standard layout if the expected stack growth is unlimited
 * or we are running native 64 bits.
 */
-   if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
-   mm->mmap_base = TASK_UNMAPPED_BASE;
+   if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   unsigned long range_end = mm->brk + 0x0200;
+   return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] tile: support ASLR fully

2013-08-13 Thread Tony Lu
With this change, tile Linux now supports address-space layout
randomization for shared objects, stack, heap and vdso.

Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
v2: implement Jiri Kosina's suggestion to copy the math in
mmap_rnd() for x86.

 arch/tile/include/asm/elf.h |  4 
 arch/tile/mm/mmap.c | 24 ++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 31d854f..e1da88e 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -137,6 +137,10 @@ do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
 } while (0)
 
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #ifdef CONFIG_COMPAT
 
 #define COMPAT_ELF_PLATFORM tilegx-m32
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index d67d91e..4b99344 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,16 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 #else
int is_32bit = 0;
 #endif
+   unsigned long random_factor = 0UL;
+
+   /*
+*  8 bits of randomness in 32bit mmaps, 24 address space bits
+* 12 bits of randomness in 64bit mmaps, 28 address space bits
+*/
+   if (current-flags  PF_RANDOMIZE) {
+   if (is_32bit)
+   random_factor = get_random_int() % (18);
+   else
+   random_factor = get_random_int() % (112);
+   
+   random_factor = PAGE_SHIFT;
+   }
 
/*
 * Use standard layout if the expected stack growth is unlimited
 * or we are running native 64 bits.
 */
-   if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
-   mm-mmap_base = TASK_UNMAPPED_BASE;
+   if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+   mm-mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm-get_unmapped_area = arch_get_unmapped_area;
} else {
mm-mmap_base = mmap_base(mm);
mm-get_unmapped_area = arch_get_unmapped_area_topdown;
}
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   unsigned long range_end = mm-brk + 0x0200;
+   return randomize_range(mm-brk, range_end, 0) ? : mm-brk;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] tile: support kprobes on tilegx

2013-08-13 Thread Tony Lu
This change includes support for Kprobes, Jprobes and Return Probes.

Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
v2: implement Masami Hiramatsu's suggestion to add an insn_has_control()
check to disallow placing probes on instructions that modify control flow.
We can improve this in a later change if it seems useful.

 arch/tile/Kconfig|   2 +
 arch/tile/include/asm/Kbuild |   1 -
 arch/tile/include/asm/kdebug.h   |  28 ++
 arch/tile/include/asm/kprobes.h  |  79 
 arch/tile/include/asm/ptrace.h   |   1 +
 arch/tile/include/uapi/arch/opcode_tilegx.h  |   1 +
 arch/tile/include/uapi/arch/opcode_tilepro.h |   1 +
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/kprobes.c   | 528 +++
 arch/tile/kernel/smp.c   |  14 +-
 arch/tile/kernel/traps.c |  42 +++
 arch/tile/kernel/vmlinux.lds.S   |   1 +
 arch/tile/mm/fault.c |  12 +
 samples/kprobes/kprobe_example.c |   9 +
 14 files changed, 716 insertions(+), 4 deletions(-)
 create mode 100644 arch/tile/include/asm/kdebug.h
 create mode 100644 arch/tile/include/asm/kprobes.h
 create mode 100644 arch/tile/kernel/kprobes.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e1600be..ecff467 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -125,6 +125,8 @@ config TILEGX
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b17b9b8..4c0b3c2 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += kdebug.h
 generic-y += local.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 000..5bbbfa9
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
+
+#include linux/notifier.h
+
+enum die_val {
+   DIE_OOPS = 1,
+   DIE_BREAK,
+   DIE_SSTEPBP,
+   DIE_PAGE_FAULT,
+   DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 000..d8f9a83
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include linux/types.h
+#include linux/ptrace.h
+#include linux/percpu.h
+
+#include arch/opcode.h
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE  2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+   flush_icache_range((unsigned long)p-addr,  \
+  (unsigned long)p-addr + \
+  (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+   kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+   struct kprobe *kp;
+   unsigned long status;
+   unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+   (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+   - sizeof(struct pt_regs

[PATCH] tile: support ASLR fully

2013-08-09 Thread Tony Lu
With this change, tile Linux now supports address-space layout
randomization for shared objects, stack, heap and vdso.

Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
 arch/tile/include/asm/elf.h |  4 
 arch/tile/mm/mmap.c | 20 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 31d854f..e1da88e 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -137,6 +137,10 @@ do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
 } while (0)
 
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #ifdef CONFIG_COMPAT
 
 #define COMPAT_ELF_PLATFORM "tilegx-m32"
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index d67d91e..b3686ce 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,16 +58,32 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 #else
int is_32bit = 0;
 #endif
+   unsigned long random_factor = 0UL;
+
+   if (current->flags & PF_RANDOMIZE) {
+   random_factor = get_random_int();
+   random_factor = random_factor << PAGE_SHIFT;
+   if (is_32bit)
+   random_factor &= 0xfful;
+   else
+   random_factor &= 0xffful;
+   }
 
/*
 * Use standard layout if the expected stack growth is unlimited
 * or we are running native 64 bits.
 */
-   if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
-   mm->mmap_base = TASK_UNMAPPED_BASE;
+   if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   unsigned long range_end = mm->brk + 0x0200;
+   return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] tile: support ftrace on tilegx

2013-08-09 Thread Tony Lu
This commit adds support for static ftrace, graph function support,
and dynamic tracer support.

Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
I assume it makes sense for this to be pushed via the tile tree;
it's currently queued in tile-next.

 arch/tile/Kconfig  |   7 +-
 arch/tile/include/asm/ftrace.h |  22 +++-
 arch/tile/kernel/Makefile  |   6 +
 arch/tile/kernel/ftrace.c  | 246 +
 arch/tile/kernel/mcount_64.S   | 224 +
 arch/tile/kernel/vmlinux.lds.S |   1 +
 arch/tile/lib/exports.c|   6 +
 scripts/recordmcount.pl|   4 +
 8 files changed, 514 insertions(+), 2 deletions(-)
 create mode 100644 arch/tile/kernel/ftrace.c
 create mode 100644 arch/tile/kernel/mcount_64.S

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e76180e..e1600be 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -119,7 +119,12 @@ config HVC_TILE
def_bool y
 
 config TILEGX
-   bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+   bool "Building for TILE-Gx (64-bit) processor"
+   select HAVE_FUNCTION_TRACER
+   select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+   select HAVE_FUNCTION_GRAPH_TRACER
+   select HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_MCOUNT_RECORD
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 461459b..13a9bb81 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -15,6 +15,26 @@
 #ifndef _ASM_TILE_FTRACE_H
 #define _ASM_TILE_FTRACE_H
 
-/* empty */
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(__mcount))
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void __mcount(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+   return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /*  CONFIG_DYNAMIC_FTRACE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index c4a957a..2e6eaa1 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -9,6 +9,11 @@ obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \
sysfs.o time.o traps.o unaligned.o vdso.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
 
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
+endif
+
 obj-$(CONFIG_HARDWALL) += hardwall.o
 obj-$(CONFIG_COMPAT)   += compat.o compat_signal.o
 obj-$(CONFIG_SMP)  += smpboot.o smp.o tlb.o
@@ -22,5 +27,6 @@ obj-$(CONFIG_PCI) += pci.o
 endif
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
+obj-$(CONFIG_FUNCTION_TRACER)  += ftrace.o mcount_64.o
 
 obj-y  += vdso/
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
new file mode 100644
index 000..f1c4520
--- /dev/null
+++ b/arch/tile/kernel/ftrace.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * TILE-Gx specific ftrace support
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static inline tilegx_bundle_bits NOP(void)
+{
+   return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+   create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+   create_Opcode_X0(RRR_0_OPCODE_X0) |
+   create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) |
+   create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) |
+   create_Opcode_X1(RRR_0_OPCODE_X1);
+}
+
+static int machine_stopped __read_mostly;
+
+int ftrace_arch_code_modify_prepare(void)
+{
+   machine_stopped = 1;
+   return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+   flush_icache_range(0, CHIP_L1I_CACHE_SIZE());
+   machine_stopped = 0;
+   return 0;
+}
+
+/*
+ * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic
+ * tracer just add one cycle overhead to every kernel function when disabled.
+ */
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+  bool link)
+{
+   tilegx_bundle_bits 

[PATCH] tile: support kprobes on tilegx

2013-08-09 Thread Tony Lu
This change includes support for Kprobes, Jprobes and Return Probes.

Signed-off-by: Tony Lu 
Signed-off-by: Chris Metcalf 
---
I assume it makes sense for this to be pushed via the tile tree;
it's currently queued in tile-next.

 arch/tile/Kconfig|   2 +
 arch/tile/include/asm/Kbuild |   1 -
 arch/tile/include/asm/kdebug.h   |  28 ++
 arch/tile/include/asm/kprobes.h  |  79 +
 arch/tile/include/asm/ptrace.h   |   1 +
 arch/tile/include/uapi/arch/opcode_tilegx.h  |   1 +
 arch/tile/include/uapi/arch/opcode_tilepro.h |   1 +
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/kprobes.c   | 476 +++
 arch/tile/kernel/smp.c   |  14 +-
 arch/tile/kernel/traps.c |  42 +++
 arch/tile/kernel/vmlinux.lds.S   |   1 +
 arch/tile/mm/fault.c |  12 +
 samples/kprobes/kprobe_example.c |   9 +
 14 files changed, 664 insertions(+), 4 deletions(-)
 create mode 100644 arch/tile/include/asm/kdebug.h
 create mode 100644 arch/tile/include/asm/kprobes.h
 create mode 100644 arch/tile/kernel/kprobes.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e1600be..ecff467 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -125,6 +125,8 @@ config TILEGX
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b17b9b8..4c0b3c2 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += kdebug.h
 generic-y += local.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 000..5bbbfa9
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
+
+#include 
+
+enum die_val {
+   DIE_OOPS = 1,
+   DIE_BREAK,
+   DIE_SSTEPBP,
+   DIE_PAGE_FAULT,
+   DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 000..d8f9a83
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include 
+#include 
+#include 
+
+#include 
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE  2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+   flush_icache_range((unsigned long)p->addr,  \
+  (unsigned long)p->addr + \
+  (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+   kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+   struct kprobe *kp;
+   unsigned long status;
+   unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+   (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+   - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR)   \
+   ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR)   \
+   ? MAX_JPROBES_STACK_ADD

[PATCH] tile: support kprobes on tilegx

2013-08-09 Thread Tony Lu
This change includes support for Kprobes, Jprobes and Return Probes.

Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
I assume it makes sense for this to be pushed via the tile tree;
it's currently queued in tile-next.

 arch/tile/Kconfig|   2 +
 arch/tile/include/asm/Kbuild |   1 -
 arch/tile/include/asm/kdebug.h   |  28 ++
 arch/tile/include/asm/kprobes.h  |  79 +
 arch/tile/include/asm/ptrace.h   |   1 +
 arch/tile/include/uapi/arch/opcode_tilegx.h  |   1 +
 arch/tile/include/uapi/arch/opcode_tilepro.h |   1 +
 arch/tile/kernel/Makefile|   1 +
 arch/tile/kernel/kprobes.c   | 476 +++
 arch/tile/kernel/smp.c   |  14 +-
 arch/tile/kernel/traps.c |  42 +++
 arch/tile/kernel/vmlinux.lds.S   |   1 +
 arch/tile/mm/fault.c |  12 +
 samples/kprobes/kprobe_example.c |   9 +
 14 files changed, 664 insertions(+), 4 deletions(-)
 create mode 100644 arch/tile/include/asm/kdebug.h
 create mode 100644 arch/tile/include/asm/kprobes.h
 create mode 100644 arch/tile/kernel/kprobes.c

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e1600be..ecff467 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -125,6 +125,8 @@ config TILEGX
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_KPROBES
+   select HAVE_KRETPROBES
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b17b9b8..4c0b3c2 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
-generic-y += kdebug.h
 generic-y += local.h
 generic-y += msgbuf.h
 generic-y += mutex.h
diff --git a/arch/tile/include/asm/kdebug.h b/arch/tile/include/asm/kdebug.h
new file mode 100644
index 000..5bbbfa9
--- /dev/null
+++ b/arch/tile/include/asm/kdebug.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
+
+#include linux/notifier.h
+
+enum die_val {
+   DIE_OOPS = 1,
+   DIE_BREAK,
+   DIE_SSTEPBP,
+   DIE_PAGE_FAULT,
+   DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 000..d8f9a83
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include linux/types.h
+#include linux/ptrace.h
+#include linux/percpu.h
+
+#include arch/opcode.h
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE  2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+   flush_icache_range((unsigned long)p-addr,  \
+  (unsigned long)p-addr + \
+  (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+   kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+   struct kprobe *kp;
+   unsigned long status;
+   unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+   (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+   - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR)   \
+   ADDR

[PATCH] tile: support ftrace on tilegx

2013-08-09 Thread Tony Lu
This commit adds support for static ftrace, graph function support,
and dynamic tracer support.

Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
I assume it makes sense for this to be pushed via the tile tree;
it's currently queued in tile-next.

 arch/tile/Kconfig  |   7 +-
 arch/tile/include/asm/ftrace.h |  22 +++-
 arch/tile/kernel/Makefile  |   6 +
 arch/tile/kernel/ftrace.c  | 246 +
 arch/tile/kernel/mcount_64.S   | 224 +
 arch/tile/kernel/vmlinux.lds.S |   1 +
 arch/tile/lib/exports.c|   6 +
 scripts/recordmcount.pl|   4 +
 8 files changed, 514 insertions(+), 2 deletions(-)
 create mode 100644 arch/tile/kernel/ftrace.c
 create mode 100644 arch/tile/kernel/mcount_64.S

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index e76180e..e1600be 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -119,7 +119,12 @@ config HVC_TILE
def_bool y
 
 config TILEGX
-   bool Building with TILE-Gx (64-bit) compiler and toolchain
+   bool Building for TILE-Gx (64-bit) processor
+   select HAVE_FUNCTION_TRACER
+   select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+   select HAVE_FUNCTION_GRAPH_TRACER
+   select HAVE_DYNAMIC_FTRACE
+   select HAVE_FTRACE_MCOUNT_RECORD
 
 config TILEPRO
def_bool !TILEGX
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 461459b..13a9bb81 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -15,6 +15,26 @@
 #ifndef _ASM_TILE_FTRACE_H
 #define _ASM_TILE_FTRACE_H
 
-/* empty */
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(__mcount))
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void __mcount(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+   return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /*  CONFIG_DYNAMIC_FTRACE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
 
 #endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index c4a957a..2e6eaa1 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -9,6 +9,11 @@ obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \
sysfs.o time.o traps.o unaligned.o vdso.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
 
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
+endif
+
 obj-$(CONFIG_HARDWALL) += hardwall.o
 obj-$(CONFIG_COMPAT)   += compat.o compat_signal.o
 obj-$(CONFIG_SMP)  += smpboot.o smp.o tlb.o
@@ -22,5 +27,6 @@ obj-$(CONFIG_PCI) += pci.o
 endif
 obj-$(CONFIG_TILE_USB) += usb.o
 obj-$(CONFIG_TILE_HVGLUE_TRACE)+= hvglue_trace.o
+obj-$(CONFIG_FUNCTION_TRACER)  += ftrace.o mcount_64.o
 
 obj-y  += vdso/
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
new file mode 100644
index 000..f1c4520
--- /dev/null
+++ b/arch/tile/kernel/ftrace.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * TILE-Gx specific ftrace support
+ */
+
+#include linux/ftrace.h
+#include linux/uaccess.h
+
+#include asm/cacheflush.h
+#include asm/ftrace.h
+#include asm/sections.h
+
+#include arch/opcode.h
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static inline tilegx_bundle_bits NOP(void)
+{
+   return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+   create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+   create_Opcode_X0(RRR_0_OPCODE_X0) |
+   create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) |
+   create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) |
+   create_Opcode_X1(RRR_0_OPCODE_X1);
+}
+
+static int machine_stopped __read_mostly;
+
+int ftrace_arch_code_modify_prepare(void)
+{
+   machine_stopped = 1;
+   return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+   flush_icache_range(0, CHIP_L1I_CACHE_SIZE());
+   machine_stopped = 0;
+   return 0;
+}
+
+/*
+ * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic
+ * tracer just add one cycle overhead to every kernel function when disabled.
+ */
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned

[PATCH] tile: support ASLR fully

2013-08-09 Thread Tony Lu
With this change, tile Linux now supports address-space layout
randomization for shared objects, stack, heap and vdso.

Signed-off-by: Tony Lu z...@tilera.com
Signed-off-by: Chris Metcalf cmetc...@tilera.com
---
 arch/tile/include/asm/elf.h |  4 
 arch/tile/mm/mmap.c | 20 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 31d854f..e1da88e 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -137,6 +137,10 @@ do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
 } while (0)
 
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
 #ifdef CONFIG_COMPAT
 
 #define COMPAT_ELF_PLATFORM tilegx-m32
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index d67d91e..b3686ce 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,16 +58,32 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 #else
int is_32bit = 0;
 #endif
+   unsigned long random_factor = 0UL;
+
+   if (current-flags  PF_RANDOMIZE) {
+   random_factor = get_random_int();
+   random_factor = random_factor  PAGE_SHIFT;
+   if (is_32bit)
+   random_factor = 0xfful;
+   else
+   random_factor = 0xffful;
+   }
 
/*
 * Use standard layout if the expected stack growth is unlimited
 * or we are running native 64 bits.
 */
-   if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
-   mm-mmap_base = TASK_UNMAPPED_BASE;
+   if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+   mm-mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm-get_unmapped_area = arch_get_unmapped_area;
} else {
mm-mmap_base = mmap_base(mm);
mm-get_unmapped_area = arch_get_unmapped_area_topdown;
}
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   unsigned long range_end = mm-brk + 0x0200;
+   return randomize_range(mm-brk, range_end, 0) ? : mm-brk;
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-03-04 Thread Tony Lu
Thanks for you following up.

My apologize that I just found that it is one change I made before that causes 
this problem. This change forces mkfs.xfs to format xfs partitions whose 
sectorsize were not smaller than 4096 bytes, which was due to a bug that 
earlier versions of xfs used (struct *page)->private(long) as a bitmap to 
represent each block's state within a page (the size of a page could be 64K or 
larger, then it needs 128 bit or more to represent each block's state within a 
page).

This is reproducible on 2.6.38.6 kernel on X86. But I do not get why this 
change makes the xfs log inconsistent during mount/cp/umount operations.

diff -dur xfsprogs-3.1.4.ori/include/xfs_alloc_btree.h 
xfsprogs-3.1.4/include/xfs_alloc_btree.h
--- xfsprogs-3.1.4.ori/include/xfs_alloc_btree.h2010-01-30 
03:46:13.0 +0800
+++ xfsprogs-3.1.4/include/xfs_alloc_btree.h2013-03-04 16:11:41.0 
+0800
@@ -59,7 +59,7 @@
 #define XFS_MAX_BLOCKSIZE_LOG  16  /* i.e. 65536 bytes */
 #define XFS_MIN_BLOCKSIZE  (1 << XFS_MIN_BLOCKSIZE_LOG)
 #define XFS_MAX_BLOCKSIZE  (1 << XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG 9   /* i.e. 512 bytes */
+#define XFS_MIN_SECTORSIZE_LOG 12  /* i.e. 512 bytes */
 #define XFS_MAX_SECTORSIZE_LOG 15  /* i.e. 32768 bytes */
 #define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
 #define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG)

Thanks
-Tony

>-Original Message-
>From: Mark Tinguely [mailto:tingu...@sgi.com]
>Sent: Saturday, March 02, 2013 4:24 AM
>To: Tony Lu
>Cc: Alex Elder; linux-kernel@vger.kernel.org; Chris Metcalf; x...@oss.sgi.com;
>Ben Myers; Dave Chinner; linux-fsde...@vger.kernel.org
>Subject: Re: [PATCH] xfs: Fix possible truncation of log data in
>xlog_bread_noalign()
>
>On 03/01/13 09:51, Mark Tinguely wrote:
>> On 02/26/13 01:28, Tony Lu wrote:
>>> I get a reliable way to reproduce this bug. The logprint and metadump
>>> are attached.
>>>
>>> Kernel version: 2.6.38.8
>>> Mkfs.xfs version: xfsprogs-3.1.1
>>> mkfs.xfs -s size=4096 /dev/sda1
>>>
>>> Run the following mount-cp-umount script to reproduce:
>>> #!/bin/sh
>>> device=/dev/sda1
>>> mount_point=/mnt
>>> times=10
>>>
>>> for ((num=1;num<=$times;num++))
>>> do
>>> echo "$num mount $device $mount_point"
>>> mount $device $mount_point
>>>
>>> echo "cp -rf /bin $mount_point/$num"
>>> cp -rf /bin $mount_point/$num
>>>
>>> echo "$num umount $device $mount_point"
>>> umount $mount_point
>>>
>>> #num=$(($num + 1))
>>> done
>>>
>>> After several times of mount/cp/umount, this xfs crashes, and the xfs
>>> partition can not be mounted any more. Here is the output of console.
>>> -sh-4.1# ./umount-test
>>> 1 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> cp -rf /bin /mnt/1
>>> 1 umount /dev/sda1 /mnt
>>> 2 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> cp -rf /bin /mnt/2
>>> 2 umount /dev/sda1 /mnt
>>> 3 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> cp -rf /bin /mnt/3
>>> 3 umount /dev/sda1 /mnt
>>> 4 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> cp -rf /bin /mnt/4
>>> 4 umount /dev/sda1 /mnt
>>> 5 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> Starting XFS recovery on filesystem: sda1 (logdev: internal)
>>> Ending XFS recovery on filesystem: sda1 (logdev: internal)cp -rf /bin
>>> /mnt/5
>>> 5 umount /dev/sda1 /mnt
>>> 6 mount /dev/sda1 /mnt
>>>
>>> XFS mounting filesystem sda1
>>> Starting XFS recovery on filesystem: sda1 (logdev: internal)
>>> Ending XFS recovery on filesystem: sda1 (logdev: internal)Interrupt
>>> cp -rf /bin /mnt/6
>>> 6 umount /dev/sda1 /mnt
>>> 7 mount /dev/sda1 /mnt
>>>
>>> XFS mounting filesystem sda1
>>> cp -rf /bin /mnt/7
>>> 7 umount /dev/sda1 /mnt
>>> Interrupt
>>> 8 mount /dev/sda1 /mnt
>>> XFS mounting filesystem sda1
>>> Starting XFS recovery on filesystem: sda1 (logdev: internal)
>>> XFS: xlog_recover_process_data: bad clientid
>>> XFS: log mount/recovery failed: error 5
>>> XFS: log mount failed
>>>
>>> Thanks
>>> -Tony
>>
>> It works fine on a 2.6.32 machine I had sitting around - and I never
>> required log recovery.
>>
>> I think you need to answer Dave's question as to why 

RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-03-04 Thread Tony Lu
Thanks for you following up.

My apologize that I just found that it is one change I made before that causes 
this problem. This change forces mkfs.xfs to format xfs partitions whose 
sectorsize were not smaller than 4096 bytes, which was due to a bug that 
earlier versions of xfs used (struct *page)-private(long) as a bitmap to 
represent each block's state within a page (the size of a page could be 64K or 
larger, then it needs 128 bit or more to represent each block's state within a 
page).

This is reproducible on 2.6.38.6 kernel on X86. But I do not get why this 
change makes the xfs log inconsistent during mount/cp/umount operations.

diff -dur xfsprogs-3.1.4.ori/include/xfs_alloc_btree.h 
xfsprogs-3.1.4/include/xfs_alloc_btree.h
--- xfsprogs-3.1.4.ori/include/xfs_alloc_btree.h2010-01-30 
03:46:13.0 +0800
+++ xfsprogs-3.1.4/include/xfs_alloc_btree.h2013-03-04 16:11:41.0 
+0800
@@ -59,7 +59,7 @@
 #define XFS_MAX_BLOCKSIZE_LOG  16  /* i.e. 65536 bytes */
 #define XFS_MIN_BLOCKSIZE  (1  XFS_MIN_BLOCKSIZE_LOG)
 #define XFS_MAX_BLOCKSIZE  (1  XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG 9   /* i.e. 512 bytes */
+#define XFS_MIN_SECTORSIZE_LOG 12  /* i.e. 512 bytes */
 #define XFS_MAX_SECTORSIZE_LOG 15  /* i.e. 32768 bytes */
 #define XFS_MIN_SECTORSIZE (1  XFS_MIN_SECTORSIZE_LOG)
 #define XFS_MAX_SECTORSIZE (1  XFS_MAX_SECTORSIZE_LOG)

Thanks
-Tony

-Original Message-
From: Mark Tinguely [mailto:tingu...@sgi.com]
Sent: Saturday, March 02, 2013 4:24 AM
To: Tony Lu
Cc: Alex Elder; linux-kernel@vger.kernel.org; Chris Metcalf; x...@oss.sgi.com;
Ben Myers; Dave Chinner; linux-fsde...@vger.kernel.org
Subject: Re: [PATCH] xfs: Fix possible truncation of log data in
xlog_bread_noalign()

On 03/01/13 09:51, Mark Tinguely wrote:
 On 02/26/13 01:28, Tony Lu wrote:
 I get a reliable way to reproduce this bug. The logprint and metadump
 are attached.

 Kernel version: 2.6.38.8
 Mkfs.xfs version: xfsprogs-3.1.1
 mkfs.xfs -s size=4096 /dev/sda1

 Run the following mount-cp-umount script to reproduce:
 #!/bin/sh
 device=/dev/sda1
 mount_point=/mnt
 times=10

 for ((num=1;num=$times;num++))
 do
 echo $num mount $device $mount_point
 mount $device $mount_point

 echo cp -rf /bin $mount_point/$num
 cp -rf /bin $mount_point/$num

 echo $num umount $device $mount_point
 umount $mount_point

 #num=$(($num + 1))
 done

 After several times of mount/cp/umount, this xfs crashes, and the xfs
 partition can not be mounted any more. Here is the output of console.
 -sh-4.1# ./umount-test
 1 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 cp -rf /bin /mnt/1
 1 umount /dev/sda1 /mnt
 2 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 cp -rf /bin /mnt/2
 2 umount /dev/sda1 /mnt
 3 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 cp -rf /bin /mnt/3
 3 umount /dev/sda1 /mnt
 4 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 cp -rf /bin /mnt/4
 4 umount /dev/sda1 /mnt
 5 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 Starting XFS recovery on filesystem: sda1 (logdev: internal)
 Ending XFS recovery on filesystem: sda1 (logdev: internal)cp -rf /bin
 /mnt/5
 5 umount /dev/sda1 /mnt
 6 mount /dev/sda1 /mnt

 XFS mounting filesystem sda1
 Starting XFS recovery on filesystem: sda1 (logdev: internal)
 Ending XFS recovery on filesystem: sda1 (logdev: internal)Interrupt
 cp -rf /bin /mnt/6
 6 umount /dev/sda1 /mnt
 7 mount /dev/sda1 /mnt

 XFS mounting filesystem sda1
 cp -rf /bin /mnt/7
 7 umount /dev/sda1 /mnt
 Interrupt
 8 mount /dev/sda1 /mnt
 XFS mounting filesystem sda1
 Starting XFS recovery on filesystem: sda1 (logdev: internal)
 XFS: xlog_recover_process_data: bad clientid
 XFS: log mount/recovery failed: error 5
 XFS: log mount failed

 Thanks
 -Tony

 It works fine on a 2.6.32 machine I had sitting around - and I never
 required log recovery.

 I think you need to answer Dave's question as to why is your unmounts
 are requiring recovery?

 Are there errors in the /var/log/messages?

 I downloaded the Linux 2.6.38.8 source and take a look if I can recreate
 the problem.

 --Mark.

I could not reproduce the problem on a vanilla install. XFS shutdown and
remounted cleanly running your script (several iterations looping set to
100).

I started fsstress on another XFS partition on the same disk to see if I
could force a shutdown race. With CONFIG_XFS_DEBUG=y, I could trigger
other ASSERTs on the fsstress partition so I never stayed up long enough
to cause a shutdown race.

Not wanting to patch that version of Linux/XFS, I am bailing here. If
you want to turn on the XFS debug it may point out why your filesystem
is not shutting down cleanly.

--Mark.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-23 Thread Tony Lu
>> For example, if xlog_bread_noalign() wants to read blocks from #1
>> to # 9, in which case the passed parameter blk_no is 1, and nbblks
>> is 8, sectBBsize is 8, after the round down and round up
>> operations, we get blk_no as 0, and nbblks as still 8. We
>> definitely lose the last block of the log data.
>
>Yes, I fully understand that. But I also understand how the log
>works and that this behaviour *should not happen*. That's why
>I'm asking questions about what the problem you are trying to fix.

I am not sure about this, since I saw many reads on non-sector-align blocks 
even when successfully mounting good XFS partitions. 
-sh-4.1# mount /dev/sda3 /home/
XFS (sda3): Mounting Filesystem
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=61447,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
...
xlog_bread_noalign:blk_no=8695,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=4600,nbblks=4096,l_sectBBsize=8
xlog_bread_noalign:blk_no=8184,nbblks=512,l_sectBBsize=8

And also there is code in xlog_write_log_records() which handles 
non-sector-align reads and writes.

/* We may need to do a read at the start to fill in part of
 * the buffer in the starting sector not covered by the first
 * write below.
 */
balign = round_down(start_block, sectbb);
if (balign != start_block) {
error = xlog_bread_noalign(log, start_block, 1, bp);
if (error)
goto out_put_bp;

j = start_block - balign;
}

>Ramdisks don't persist over a reboot, so you must have had some
>other way of reproducing the problem. Can you tell me how you
>reproduced it on a ramdisk? Better yet, send me a script that
>reproduces the problem?

I will try to reproduce it. Basically it is a loop of mount, creating many 
files and unmount.

Thanks
-Tony
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-23 Thread Tony Lu
>-Original Message-
>From: Ben Myers [mailto:b...@sgi.com]
>
>Hi Tony,
>
>On Fri, Feb 22, 2013 at 08:12:52AM +, Tony Lu wrote:
>> I encountered the following panic when using xfs partitions as rootfs, which
>> is due to the truncated log data read by xlog_bread_noalign(). We should
>> extend the buffer by one extra log sector to ensure there's enough space to
>> accommodate requested log data, which we indeed did in xlog_get_bp(), but we
>> forgot to do in xlog_bread_noalign().
>>
>> XFS mounting filesystem sda2
>> Starting XFS recovery on filesystem: sda2 (logdev: internal)
>> XFS: xlog_recover_process_data: bad clientid
>> XFS: log mount/recovery failed: error 5
>> XFS: log mount failedVFS: Cannot open root device "sda2" or unknown-block(8,)
>> Please append a correct "root=" boot option; here are the available partitio:
>> 0800   156290904 sda  driver: sd
>>   080131463271 sda1 ----
>>   080231463302 sda2 ----
>>   080331463302 sda3 ----
>>   0804   1 sda4 ----
>>   080510490413 sda5 ----
>>   080651407968 sda6 ----
>> Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(8,)
>>
>> Starting stack dump of tid 1, pid 1 (swapper) on cpu 35 at cycle 42273138234
>>   frame 0: 0xfff70016e5a0 dump_stack+0x0/0x20 (sp 0xfe03fbedfe88)
>>   frame 1: 0xfff7004af470 panic+0x150/0x3a0 (sp 0xfe03fbedfe88)
>>   frame 2: 0xfff700881e88 mount_block_root+0x2c0/0x4c8 (sp
>0xfe03fbe)
>>   frame 3: 0xfff700882390 prepare_namespace+0x250/0x358 (sp
>0xfe03fb)
>>   frame 4: 0xfff700880778 kernel_init+0x4c8/0x520 (sp
>0xfe03fbedffb0)
>>   frame 5: 0xfff70011ecb8 start_kernel_thread+0x18/0x20 (sp
>0xfe03fb)
>> Stack dump complete
>>
>> Signed-off-by: Zhigang Lu 
>> Reviewed-by: Chris Metcalf 
>
>Looks fine to me.  I'll pull it in after some testing.
>
>Do you happen to have a metadump of this filesystem?
>
>Reviewed-by: Ben Myers 

Sorry I did not keep the metadump of it. But I kept some debugging info when I 
debugged and fixed it a year ago.

Starting XFS recovery on filesystem: ram0 (logdev: internal)
xlog_bread_noalign--before round down/up: blk_no=0xf4d,nbblks=0x1
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x4
xlog_bread_noalign--before round down/up: blk_no=0xf4d,nbblks=0x1
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x4
xlog_bread_noalign--before round down/up: blk_no=0xf4e,nbblks=0x3f
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x40
XFS: xlog_recover_process_data: bad clientid
Assertion failed: 0, file: 
/home/scratch/zlu/zlu-main/sys/linux/source/fs/xfs/xfs_log_recover.c, line: 2852
BUG: failure at 
/home/scratch/zlu/zlu-main/sys/linux/source/fs/xfs/support/debug.c:100/assfail()!
Kernel panic - not syncing: BUG!

Starting stack dump of tid 843, pid 843 (mount) on cpu 1 at cycle 345934778384
  frame 0: 0xfff7001380a0 dump_stack+0x0/0x20 (sp 0xfe43e55df7b0)
  frame 1: 0xfff7003b5470 panic+0x150/0x3a0 (sp 0xfe43e55df7b0)
  frame 2: 0xfff700824cf0 assfail+0x80/0x80 (sp 0xfe43e55df858)
  frame 3: 0xfff70037c7c0 xlog_recover_process_data+0x598/0x698 (sp 
0xfe43e55df868)
  frame 4: 0xfff7002c55e8 xlog_do_recovery_pass+0x810/0x908 (sp 
0xfe43e55df8e8)
  frame 5: 0xfff70068f0d8 xlog_do_log_recovery+0xc8/0x1d8 (sp 
0xfe43e55dfa48)
  frame 6: 0xfff70054cf60 xlog_do_recover+0x48/0x380 (sp 0xfe43e55dfa88)
  frame 7: 0xfff7006fdbf0 xlog_recover+0x138/0x170 (sp 0xfe43e55dfac0)
  frame 8: 0xfff7005b2d70 xfs_log_mount+0x150/0x2e8 (sp 0xfe43e55dfb00)
  frame 9: 0xfff700269830 xfs_mountfs+0x510/0xb20 (sp 0xfe43e55dfb38)
  frame 10: 0xfff700486930 xfs_fs_fill_super+0x2e0/0x3f0 (sp 
0xfe43e55dfba8)
  frame 11: 0xfff7000950c8 mount_bdev+0x168/0x2d0 (sp 0xfe43e55dfbe0)
  frame 12: 0xfff700071e08 vfs_kern_mount+0x110/0x408 (sp 
0xfe43e55dfc50)
  frame 13: 0xfff7000badf8 do_kern_mount+0x68/0x1e0 (sp 0xfe43e55dfc98)
  frame 14: 0xfff700046470 do_mount+0x200/0x878 (sp 0xfe43e55dfcd8)
  frame 15: 0xfff7000c8050 sys_mount+0xd0/0x1a0 (sp 0xfe43e55dfd60)
  frame 16: 0xfff7001a2c30 handle_syscall+0x280/0x340 (sp 
0xfe43e55dfdc0)
  
  frame 17: 0xd46688 libc-2.12.so[c2+1d] (sp 0x1ddf4b0)
  frame 18: 0x160 mount[155+2] (sp 0x1ddf4b0)
  frame 19: 0x1557dc0 mount[155+2] (sp 0x1ddf500)
  frame 20: 0x1558a80 mount[155+2

RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-23 Thread Tony Lu
-Original Message-
From: Ben Myers [mailto:b...@sgi.com]

Hi Tony,

On Fri, Feb 22, 2013 at 08:12:52AM +, Tony Lu wrote:
 I encountered the following panic when using xfs partitions as rootfs, which
 is due to the truncated log data read by xlog_bread_noalign(). We should
 extend the buffer by one extra log sector to ensure there's enough space to
 accommodate requested log data, which we indeed did in xlog_get_bp(), but we
 forgot to do in xlog_bread_noalign().

 XFS mounting filesystem sda2
 Starting XFS recovery on filesystem: sda2 (logdev: internal)
 XFS: xlog_recover_process_data: bad clientid
 XFS: log mount/recovery failed: error 5
 XFS: log mount failedVFS: Cannot open root device sda2 or unknown-block(8,)
 Please append a correct root= boot option; here are the available partitio:
 0800   156290904 sda  driver: sd
   080131463271 sda1 ----
   080231463302 sda2 ----
   080331463302 sda3 ----
   0804   1 sda4 ----
   080510490413 sda5 ----
   080651407968 sda6 ----
 Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(8,)

 Starting stack dump of tid 1, pid 1 (swapper) on cpu 35 at cycle 42273138234
   frame 0: 0xfff70016e5a0 dump_stack+0x0/0x20 (sp 0xfe03fbedfe88)
   frame 1: 0xfff7004af470 panic+0x150/0x3a0 (sp 0xfe03fbedfe88)
   frame 2: 0xfff700881e88 mount_block_root+0x2c0/0x4c8 (sp
0xfe03fbe)
   frame 3: 0xfff700882390 prepare_namespace+0x250/0x358 (sp
0xfe03fb)
   frame 4: 0xfff700880778 kernel_init+0x4c8/0x520 (sp
0xfe03fbedffb0)
   frame 5: 0xfff70011ecb8 start_kernel_thread+0x18/0x20 (sp
0xfe03fb)
 Stack dump complete

 Signed-off-by: Zhigang Lu z...@tilera.com
 Reviewed-by: Chris Metcalf cmetc...@tilera.com

Looks fine to me.  I'll pull it in after some testing.

Do you happen to have a metadump of this filesystem?

Reviewed-by: Ben Myers b...@sgi.com

Sorry I did not keep the metadump of it. But I kept some debugging info when I 
debugged and fixed it a year ago.

Starting XFS recovery on filesystem: ram0 (logdev: internal)
xlog_bread_noalign--before round down/up: blk_no=0xf4d,nbblks=0x1
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x4
xlog_bread_noalign--before round down/up: blk_no=0xf4d,nbblks=0x1
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x4
xlog_bread_noalign--before round down/up: blk_no=0xf4e,nbblks=0x3f
xlog_bread_noalign--after round down/up: blk_no=0xf4c,nbblks=0x40
XFS: xlog_recover_process_data: bad clientid
Assertion failed: 0, file: 
/home/scratch/zlu/zlu-main/sys/linux/source/fs/xfs/xfs_log_recover.c, line: 2852
BUG: failure at 
/home/scratch/zlu/zlu-main/sys/linux/source/fs/xfs/support/debug.c:100/assfail()!
Kernel panic - not syncing: BUG!

Starting stack dump of tid 843, pid 843 (mount) on cpu 1 at cycle 345934778384
  frame 0: 0xfff7001380a0 dump_stack+0x0/0x20 (sp 0xfe43e55df7b0)
  frame 1: 0xfff7003b5470 panic+0x150/0x3a0 (sp 0xfe43e55df7b0)
  frame 2: 0xfff700824cf0 assfail+0x80/0x80 (sp 0xfe43e55df858)
  frame 3: 0xfff70037c7c0 xlog_recover_process_data+0x598/0x698 (sp 
0xfe43e55df868)
  frame 4: 0xfff7002c55e8 xlog_do_recovery_pass+0x810/0x908 (sp 
0xfe43e55df8e8)
  frame 5: 0xfff70068f0d8 xlog_do_log_recovery+0xc8/0x1d8 (sp 
0xfe43e55dfa48)
  frame 6: 0xfff70054cf60 xlog_do_recover+0x48/0x380 (sp 0xfe43e55dfa88)
  frame 7: 0xfff7006fdbf0 xlog_recover+0x138/0x170 (sp 0xfe43e55dfac0)
  frame 8: 0xfff7005b2d70 xfs_log_mount+0x150/0x2e8 (sp 0xfe43e55dfb00)
  frame 9: 0xfff700269830 xfs_mountfs+0x510/0xb20 (sp 0xfe43e55dfb38)
  frame 10: 0xfff700486930 xfs_fs_fill_super+0x2e0/0x3f0 (sp 
0xfe43e55dfba8)
  frame 11: 0xfff7000950c8 mount_bdev+0x168/0x2d0 (sp 0xfe43e55dfbe0)
  frame 12: 0xfff700071e08 vfs_kern_mount+0x110/0x408 (sp 
0xfe43e55dfc50)
  frame 13: 0xfff7000badf8 do_kern_mount+0x68/0x1e0 (sp 0xfe43e55dfc98)
  frame 14: 0xfff700046470 do_mount+0x200/0x878 (sp 0xfe43e55dfcd8)
  frame 15: 0xfff7000c8050 sys_mount+0xd0/0x1a0 (sp 0xfe43e55dfd60)
  frame 16: 0xfff7001a2c30 handle_syscall+0x280/0x340 (sp 
0xfe43e55dfdc0)
  syscall while in user mode
  frame 17: 0xd46688 libc-2.12.so[c2+1d] (sp 0x1ddf4b0)
  frame 18: 0x160 mount[155+2] (sp 0x1ddf4b0)
  frame 19: 0x1557dc0 mount[155+2] (sp 0x1ddf500)
  frame 20: 0x1558a80 mount[155+2] (sp 0x1ddf858)
  frame 21: 0x1559a60 mount[155+2] (sp 0x1ddf930)
  frame 22: 0xc3e5e8 libc-2.12.so[c2+1d] (sp 0x1ddfaf8)
Stack dump complete
Client requested halt.

Thanks
-Tony
--
To unsubscribe from this list: send

RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-23 Thread Tony Lu
 For example, if xlog_bread_noalign() wants to read blocks from #1
 to # 9, in which case the passed parameter blk_no is 1, and nbblks
 is 8, sectBBsize is 8, after the round down and round up
 operations, we get blk_no as 0, and nbblks as still 8. We
 definitely lose the last block of the log data.

Yes, I fully understand that. But I also understand how the log
works and that this behaviour *should not happen*. That's why
I'm asking questions about what the problem you are trying to fix.

I am not sure about this, since I saw many reads on non-sector-align blocks 
even when successfully mounting good XFS partitions. 
-sh-4.1# mount /dev/sda3 /home/
XFS (sda3): Mounting Filesystem
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=61447,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
...
xlog_bread_noalign:blk_no=8695,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=4600,nbblks=4096,l_sectBBsize=8
xlog_bread_noalign:blk_no=8184,nbblks=512,l_sectBBsize=8

And also there is code in xlog_write_log_records() which handles 
non-sector-align reads and writes.

/* We may need to do a read at the start to fill in part of
 * the buffer in the starting sector not covered by the first
 * write below.
 */
balign = round_down(start_block, sectbb);
if (balign != start_block) {
error = xlog_bread_noalign(log, start_block, 1, bp);
if (error)
goto out_put_bp;

j = start_block - balign;
}

Ramdisks don't persist over a reboot, so you must have had some
other way of reproducing the problem. Can you tell me how you
reproduced it on a ramdisk? Better yet, send me a script that
reproduces the problem?

I will try to reproduce it. Basically it is a loop of mount, creating many 
files and unmount.

Thanks
-Tony
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-22 Thread Tony Lu
>From: Dave Chinner [mailto:da...@fromorbit.com]
>On Fri, Feb 22, 2013 at 08:12:52AM +, Tony Lu wrote:
>> I encountered the following panic when using xfs partitions as rootfs, which
>> is due to the truncated log data read by xlog_bread_noalign(). We should
>> extend the buffer by one extra log sector to ensure there's enough space to
>> accommodate requested log data, which we indeed did in xlog_get_bp(), but we
>> forgot to do in xlog_bread_noalign().
>
>We've never done that round up in xlog_bread_noalign(). It shouldn't
>be necessary as xlog_get_bp() and xlog_bread_noalign() are doing
>fundamentally different things. That is, xlog_get_bp() is ensuring
>the buffer is large enough for the upcoming IO that will be
>requested, while xlog_bread_noalign() is simply ensuring what it is
>passed is correctly aligned to device sector boundaries.

I set the sector size as 4096 when making the xfs filesystem.
-sh-4.1# mkfs.xfs -s size=4096 -f /dev/sda3

In this case, xlog_bread_noalign() needs to do such round up and round down 
frequently. And it is used to ensure what it is passed is aligned to the log 
sector size, but not the device sector boundaries.

Here is the debug info I added when mounting this xfs partition.
-sh-4.1# mount /dev/sda3 /home/
XFS (sda3): Mounting Filesystem
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=61447,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
...
xlog_bread_noalign:blk_no=8695,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=4600,nbblks=4096,l_sectBBsize=8
xlog_bread_noalign:blk_no=8184,nbblks=512,l_sectBBsize=8

>So, if you have to fudge an extra block for xlog_bread_noalign(),
>that implies that what xlog_bread_noalign() was passed was probably
>not correct. It also implies that you are using sector sizes larger
>than 512 bytes, because that's the only time this might matter. Put
>simply, this:

While debugging, I found when it crashed, the blk_no was not align to the log 
sector size and nnblks was aligned to the log sector size, which makes sense.

For example, if xlog_bread_noalign() wants to read blocks from #1 to # 9, in 
which case the passed parameter blk_no is 1, and nbblks is 8, sectBBsize is 8, 
after the round down and round up operations, we get blk_no as 0, and nbblks as 
still 8. We definitely lose the last block of the log data.

>> XFS mounting filesystem sda2
>> Starting XFS recovery on filesystem: sda2 (logdev: internal)
>> XFS: xlog_recover_process_data: bad clientid
>> XFS: log mount/recovery failed: error 5
>> XFS: log mount failed
>
>Is not sufficient information for me to determine if you've correctly
>analysed the problem you were seeing and that this is the correct
>fix for it. I don't even know what kernel you are seeing this on, or
>how you are reproducing it.

I was using the 2.6.38.6 kernel, and using xfs as a rootfs partition. After 
untaring the rootfs files on the xfs partition, and tried to reboot from the 
xfs, then the panic occasionally occurred.

>
>Note that I'm not saying the fix isn't necessary or correct, just
>that I cannot review it based this commit message.  Given that this
>code is essentially unchanged in behaviour since the large sector
>size support was adding in 2003(*), understanding how it is
>deficient is critical part of the reviewi process
>
>Information you need to provide so I have a chance of reviewing
>whether it is correct or not:
>
>   - what kernel you saw this on,
>   - what the filesystem configuration was
>   - what workload reproduced this problem (a test case would
> be nice, and xfstest even better)
>   - the actual contents of the log that lead to the short read
> during recovery
>   - whether xfs_logprint was capable of parsing the log
> correctly
>   - where in the actual log recovery process the failure
> occurred (e.g. was it trying to recover transactions from
> a section of a wrapped log?)

I hope I can provide the corrupted log for you, but probably I could not find 
it, since I fixed this bug a year ago. Recently when I do some clean-up on my 
code, I find this one, so I think I should return it back to the community.

>IOWs, please show your working so we can determine if this is the
>root cause of the problem you are seeing. :)
>
>(*)
>http://oss.sgi.com/cgi-bin/gitweb.cgi?p=archive/xfs-import.git;a=commitdiff
>;h=f14e527f411712f89178c31370b5d733ea1d0280
>
>FWIW, I think your change might need work - there's the possibility
>that is can round up the length beyond the end of the log if we ask
>to read up to the last sector of the log (i.e. blkno + blklen ==
>end of log) and then round up blklen by one sector
>
Good catch, you are rig

[PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-22 Thread Tony Lu
I encountered the following panic when using xfs partitions as rootfs, which
is due to the truncated log data read by xlog_bread_noalign(). We should
extend the buffer by one extra log sector to ensure there's enough space to
accommodate requested log data, which we indeed did in xlog_get_bp(), but we
forgot to do in xlog_bread_noalign().

XFS mounting filesystem sda2
Starting XFS recovery on filesystem: sda2 (logdev: internal)
XFS: xlog_recover_process_data: bad clientid
XFS: log mount/recovery failed: error 5
XFS: log mount failedVFS: Cannot open root device "sda2" or unknown-block(8,)
Please append a correct "root=" boot option; here are the available partitio:
0800   156290904 sda  driver: sd
  080131463271 sda1 ----
  080231463302 sda2 ----
  080331463302 sda3 ----
  0804   1 sda4 ----
  080510490413 sda5 ----
  080651407968 sda6 ----
Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(8,)

Starting stack dump of tid 1, pid 1 (swapper) on cpu 35 at cycle 42273138234
  frame 0: 0xfff70016e5a0 dump_stack+0x0/0x20 (sp 0xfe03fbedfe88)
  frame 1: 0xfff7004af470 panic+0x150/0x3a0 (sp 0xfe03fbedfe88)
  frame 2: 0xfff700881e88 mount_block_root+0x2c0/0x4c8 (sp 0xfe03fbe)
  frame 3: 0xfff700882390 prepare_namespace+0x250/0x358 (sp 0xfe03fb)
  frame 4: 0xfff700880778 kernel_init+0x4c8/0x520 (sp 0xfe03fbedffb0)
  frame 5: 0xfff70011ecb8 start_kernel_thread+0x18/0x20 (sp 0xfe03fb)
Stack dump complete

Signed-off-by: Zhigang Lu 
Reviewed-by: Chris Metcalf 
---
 fs/xfs/xfs_log_recover.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb8..64264a5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,6 +179,21 @@ xlog_bread_noalign(
return EFSCORRUPTED;
}
 
+   /*
+* The blk_no may be a non-sector-aligned block offset, in
+* which case we round down the blk_no to be aligned with
+* the sector size, and if the nbblks is sector-aligned,
+* an I/O of the size nbblks could truncate the requested
+* log data.  If the requested size is only 1 basic block it
+* will never straddle a sector boundary, so this won't be
+* an issue.  Nor will this be a problem if the log I/O is
+* done in basic blocks (sector size 1).  But otherwise we
+* extend the buffer by one extra log sector to ensure
+* there's space to accommodate this possibility.
+*/
+   if (nbblks > 1 && log->l_sectBBsize > 1)
+   nbblks += log->l_sectBBsize;
+
blk_no = round_down(blk_no, log->l_sectBBsize);
nbblks = round_up(nbblks, log->l_sectBBsize);
 
-- 
1.7.10.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-22 Thread Tony Lu
I encountered the following panic when using xfs partitions as rootfs, which
is due to the truncated log data read by xlog_bread_noalign(). We should
extend the buffer by one extra log sector to ensure there's enough space to
accommodate requested log data, which we indeed did in xlog_get_bp(), but we
forgot to do in xlog_bread_noalign().

XFS mounting filesystem sda2
Starting XFS recovery on filesystem: sda2 (logdev: internal)
XFS: xlog_recover_process_data: bad clientid
XFS: log mount/recovery failed: error 5
XFS: log mount failedVFS: Cannot open root device sda2 or unknown-block(8,)
Please append a correct root= boot option; here are the available partitio:
0800   156290904 sda  driver: sd
  080131463271 sda1 ----
  080231463302 sda2 ----
  080331463302 sda3 ----
  0804   1 sda4 ----
  080510490413 sda5 ----
  080651407968 sda6 ----
Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(8,)

Starting stack dump of tid 1, pid 1 (swapper) on cpu 35 at cycle 42273138234
  frame 0: 0xfff70016e5a0 dump_stack+0x0/0x20 (sp 0xfe03fbedfe88)
  frame 1: 0xfff7004af470 panic+0x150/0x3a0 (sp 0xfe03fbedfe88)
  frame 2: 0xfff700881e88 mount_block_root+0x2c0/0x4c8 (sp 0xfe03fbe)
  frame 3: 0xfff700882390 prepare_namespace+0x250/0x358 (sp 0xfe03fb)
  frame 4: 0xfff700880778 kernel_init+0x4c8/0x520 (sp 0xfe03fbedffb0)
  frame 5: 0xfff70011ecb8 start_kernel_thread+0x18/0x20 (sp 0xfe03fb)
Stack dump complete

Signed-off-by: Zhigang Lu z...@tilera.com
Reviewed-by: Chris Metcalf cmetc...@tilera.com
---
 fs/xfs/xfs_log_recover.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb8..64264a5 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,6 +179,21 @@ xlog_bread_noalign(
return EFSCORRUPTED;
}
 
+   /*
+* The blk_no may be a non-sector-aligned block offset, in
+* which case we round down the blk_no to be aligned with
+* the sector size, and if the nbblks is sector-aligned,
+* an I/O of the size nbblks could truncate the requested
+* log data.  If the requested size is only 1 basic block it
+* will never straddle a sector boundary, so this won't be
+* an issue.  Nor will this be a problem if the log I/O is
+* done in basic blocks (sector size 1).  But otherwise we
+* extend the buffer by one extra log sector to ensure
+* there's space to accommodate this possibility.
+*/
+   if (nbblks  1  log-l_sectBBsize  1)
+   nbblks += log-l_sectBBsize;
+
blk_no = round_down(blk_no, log-l_sectBBsize);
nbblks = round_up(nbblks, log-l_sectBBsize);
 
-- 
1.7.10.3
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH] xfs: Fix possible truncation of log data in xlog_bread_noalign()

2013-02-22 Thread Tony Lu
From: Dave Chinner [mailto:da...@fromorbit.com]
On Fri, Feb 22, 2013 at 08:12:52AM +, Tony Lu wrote:
 I encountered the following panic when using xfs partitions as rootfs, which
 is due to the truncated log data read by xlog_bread_noalign(). We should
 extend the buffer by one extra log sector to ensure there's enough space to
 accommodate requested log data, which we indeed did in xlog_get_bp(), but we
 forgot to do in xlog_bread_noalign().

We've never done that round up in xlog_bread_noalign(). It shouldn't
be necessary as xlog_get_bp() and xlog_bread_noalign() are doing
fundamentally different things. That is, xlog_get_bp() is ensuring
the buffer is large enough for the upcoming IO that will be
requested, while xlog_bread_noalign() is simply ensuring what it is
passed is correctly aligned to device sector boundaries.

I set the sector size as 4096 when making the xfs filesystem.
-sh-4.1# mkfs.xfs -s size=4096 -f /dev/sda3

In this case, xlog_bread_noalign() needs to do such round up and round down 
frequently. And it is used to ensure what it is passed is aligned to the log 
sector size, but not the device sector boundaries.

Here is the debug info I added when mounting this xfs partition.
-sh-4.1# mount /dev/sda3 /home/
XFS (sda3): Mounting Filesystem
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=61447,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=0,nbblks=1,l_sectBBsize=8
...
xlog_bread_noalign:blk_no=8695,nbblks=1,l_sectBBsize=8
xlog_bread_noalign:blk_no=4600,nbblks=4096,l_sectBBsize=8
xlog_bread_noalign:blk_no=8184,nbblks=512,l_sectBBsize=8

So, if you have to fudge an extra block for xlog_bread_noalign(),
that implies that what xlog_bread_noalign() was passed was probably
not correct. It also implies that you are using sector sizes larger
than 512 bytes, because that's the only time this might matter. Put
simply, this:

While debugging, I found when it crashed, the blk_no was not align to the log 
sector size and nnblks was aligned to the log sector size, which makes sense.

For example, if xlog_bread_noalign() wants to read blocks from #1 to # 9, in 
which case the passed parameter blk_no is 1, and nbblks is 8, sectBBsize is 8, 
after the round down and round up operations, we get blk_no as 0, and nbblks as 
still 8. We definitely lose the last block of the log data.

 XFS mounting filesystem sda2
 Starting XFS recovery on filesystem: sda2 (logdev: internal)
 XFS: xlog_recover_process_data: bad clientid
 XFS: log mount/recovery failed: error 5
 XFS: log mount failed

Is not sufficient information for me to determine if you've correctly
analysed the problem you were seeing and that this is the correct
fix for it. I don't even know what kernel you are seeing this on, or
how you are reproducing it.

I was using the 2.6.38.6 kernel, and using xfs as a rootfs partition. After 
untaring the rootfs files on the xfs partition, and tried to reboot from the 
xfs, then the panic occasionally occurred.


Note that I'm not saying the fix isn't necessary or correct, just
that I cannot review it based this commit message.  Given that this
code is essentially unchanged in behaviour since the large sector
size support was adding in 2003(*), understanding how it is
deficient is critical part of the reviewi process

Information you need to provide so I have a chance of reviewing
whether it is correct or not:

   - what kernel you saw this on,
   - what the filesystem configuration was
   - what workload reproduced this problem (a test case would
 be nice, and xfstest even better)
   - the actual contents of the log that lead to the short read
 during recovery
   - whether xfs_logprint was capable of parsing the log
 correctly
   - where in the actual log recovery process the failure
 occurred (e.g. was it trying to recover transactions from
 a section of a wrapped log?)

I hope I can provide the corrupted log for you, but probably I could not find 
it, since I fixed this bug a year ago. Recently when I do some clean-up on my 
code, I find this one, so I think I should return it back to the community.

IOWs, please show your working so we can determine if this is the
root cause of the problem you are seeing. :)

(*)
http://oss.sgi.com/cgi-bin/gitweb.cgi?p=archive/xfs-import.git;a=commitdiff
;h=f14e527f411712f89178c31370b5d733ea1d0280

FWIW, I think your change might need work - there's the possibility
that is can round up the length beyond the end of the log if we ask
to read up to the last sector of the log (i.e. blkno + blklen ==
end of log) and then round up blklen by one sector

Good catch, you are right on this. To avoid this possibility, I changed the 
patch a little bit as following.
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -171,6 +171,7 @@ xlog_bread_noalign(
struct xfs_buf  *bp)
 {
int error;
+   xfs_daddr_t orig_blk_no = blk_no

Fix perf DSOs' map address if .text is not the first secion of vmlinux

2012-11-08 Thread Tony Lu
>From 1bacfabf8369764126758bbbea1d3963ac778cce Mon Sep 17 00:00:00 2001
From: Lu Zhigang 
Date: Thu, 8 Nov 2012 04:31:05 -0500
Subject: [PATCH 1/1] perf symbol: Don't assume .text section is the first
 section of vmlinux

The start address derived from /proc/kallsyms in is the start address of
kernel, but not the start address of .text section of kernel. If the .text
section is not at the beginning of vmlinux, perf will mess up the sections'
address range, thus failing to resolve the kernel symbols.

Verified on TILE architecture whose kernel sections are as following.
Sections:
Idx Name  Size  VMA   LMA   File off  Algn
  0 .intrpt1  3fe8  fff7    0001  2**3
  CONTENTS, ALLOC, LOAD, READONLY, CODE
  1 .text 008485a0  fff70002  0002  0002  2**6
  CONTENTS, ALLOC, LOAD, READONLY, CODE
  2 .init.text00047e88  fff70087  0087  0087  2**3
...

Signed-off-by: Lu Zhigang 
---
 tools/perf/util/symbol-elf.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index db0cc92..7fc219b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -645,6 +645,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
Elf_Scn *sec, *sec_strndx;
Elf *elf;
int nr = 0;
+   u64 kernel_start = map->start;
 
dso->symtab_type = syms_ss->type;
 
@@ -746,6 +747,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
goto new_symbol;
 
if (strcmp(section_name, ".text") == 0) {
+   map->start = kernel_start + shdr.sh_offset;
curr_map = map;
curr_dso = dso;
goto new_symbol;
@@ -759,7 +761,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
u64 start = sym.st_value;
 
if (kmodule)
-   start += map->start + shdr.sh_offset;
+   start += kernel_start + shdr.sh_offset;
 
curr_dso = dso__new(dso_name);
if (curr_dso == NULL)
-- 
1.7.10.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Fix perf DSOs' map address if .text is not the first secion of vmlinux

2012-11-08 Thread Tony Lu
From 1bacfabf8369764126758bbbea1d3963ac778cce Mon Sep 17 00:00:00 2001
From: Lu Zhigang z...@tilera.com
Date: Thu, 8 Nov 2012 04:31:05 -0500
Subject: [PATCH 1/1] perf symbol: Don't assume .text section is the first
 section of vmlinux

The start address derived from /proc/kallsyms in is the start address of
kernel, but not the start address of .text section of kernel. If the .text
section is not at the beginning of vmlinux, perf will mess up the sections'
address range, thus failing to resolve the kernel symbols.

Verified on TILE architecture whose kernel sections are as following.
Sections:
Idx Name  Size  VMA   LMA   File off  Algn
  0 .intrpt1  3fe8  fff7    0001  2**3
  CONTENTS, ALLOC, LOAD, READONLY, CODE
  1 .text 008485a0  fff70002  0002  0002  2**6
  CONTENTS, ALLOC, LOAD, READONLY, CODE
  2 .init.text00047e88  fff70087  0087  0087  2**3
...

Signed-off-by: Lu Zhigang z...@tilera.com
---
 tools/perf/util/symbol-elf.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index db0cc92..7fc219b 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -645,6 +645,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
Elf_Scn *sec, *sec_strndx;
Elf *elf;
int nr = 0;
+   u64 kernel_start = map-start;
 
dso-symtab_type = syms_ss-type;
 
@@ -746,6 +747,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
goto new_symbol;
 
if (strcmp(section_name, .text) == 0) {
+   map-start = kernel_start + shdr.sh_offset;
curr_map = map;
curr_dso = dso;
goto new_symbol;
@@ -759,7 +761,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
u64 start = sym.st_value;
 
if (kmodule)
-   start += map-start + shdr.sh_offset;
+   start += kernel_start + shdr.sh_offset;
 
curr_dso = dso__new(dso_name);
if (curr_dso == NULL)
-- 
1.7.10.3
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/