commit feaf4fe8a8e4509540286899d02cd88f09c0d343
Author: Eric Paris <eparis@redhat.com>
Date:   Thu Aug 9 14:08:12 2012 -0400

    Network/Security: allocate security data when we allocate unicast sockets
    
    commit be9f4a44e7d41cee (ipv4: tcp: remove per net tcp_sock) added a
    regression because it did not properly initialize the new per cpu sockets.
    This was reported and bisected by John Stultz:
    
    [   69.272927] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
    [   69.273374] IP: [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.273374] PGD 3a85b067 PUD 3f50b067 PMD 0
    [   69.273374] Oops: 0000 [#1] PREEMPT SMP
    [   69.273374] CPU 3
    [   69.273374] Pid: 2392, comm: hwclock Not tainted 3.6.0-rc1john+ #106 Bochs Bochs
    [   69.273374] RIP: 0010:[<ffffffff8132e7c4>]  [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.273374] RSP: 0018:ffff88003f003720  EFLAGS: 00010246
    [   69.273374] RAX: 0000000000000000 RBX: ffff88003f5fa9d8 RCX: 0000000000000006
    [   69.273374] RDX: ffff88003f003740 RSI: ffff88003c6b256c RDI: ffff88003f5fa9d8
                                                                             [ OK ]
    [   69.273374] RBP: ffff88003f0037a0 R08: 0000000000000000 R09: ffff88003f1d0cc0
    [   69.273374] R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000000
    [   69.273374] R13: 0000000000000002 R14: ffff88003f0037c0 R15: 0000000000000004
    [   69.273374] FS:  00007fa398211700(0000) GS:ffff88003f000000(0000) knlGS:0000000000000000
    [   69.273374] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    [   69.273374] CR2: 0000000000000010 CR3: 000000003b52a000 CR4: 00000000000006e0
    [   69.273374] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    [   69.273374] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
    [   69.273374] Process hwclock (pid: 2392, threadinfo ffff88003a0ee000, task ffff88003fa82b80)
    [   69.273374] Stack:
    [   69.273374]  ffff88003c6b2558 0000000000000006 0000000000000000 0000160067d70002
    [   69.273374]  0f02000a0202000a 0000000000000000 0000000000000000 0000000000000000
    [   69.273374]  ffff88003f003802 ffff88003f003728 ffff88003f1d42d0 ffff88003d6c3560
    [   69.273374] Call Trace:
    [   69.273374]  <IRQ>
    [   69.273374]  [<ffffffff8132eaab>] selinux_ip_postroute+0x2ab/0x3e0
    [   69.273374]  [<ffffffff8132ec1c>] selinux_ipv4_postroute+0x1c/0x20
    [   69.273374]  [<ffffffff8198265c>] nf_iterate+0xac/0x140
    [   69.273374]  [<ffffffff819827a5>] nf_hook_slow+0xb5/0x210
    [   69.273374]  [<ffffffff8199cbba>] ip_output+0xaa/0x150
    [   69.273374]  [<ffffffff8199a9af>] ip_local_out+0x7f/0x110
    [   69.273374]  [<ffffffff8199d82e>] ip_send_skb+0xe/0x40
    [   69.273374]  [<ffffffff8199d88b>] ip_push_pending_frames+0x2b/0x30
    [   69.273374]  [<ffffffff8199dc97>] ip_send_unicast_reply+0x2c7/0x3c0
    [   69.273374]  [<ffffffff819bb215>] tcp_v4_send_reset+0x1f5/0x3f0
    [   69.273374]  [<ffffffff819bf04b>] tcp_v4_rcv+0x2bb/0x1080
    [   69.273374]  [<ffffffff81994d73>] ip_local_deliver_finish+0x133/0x4d0
    [   69.273374]  [<ffffffff819953e0>] ip_local_deliver+0x90/0xa0
    [   69.273374]  [<ffffffff819945b2>] ip_rcv_finish+0x262/0x8f0
    [   69.273374]  [<ffffffff81995742>] ip_rcv+0x352/0x3a0
    [   69.323844]  [<ffffffff81925244>] __netif_receive_skb+0xcb4/0x10e0
    [   69.323844]  [<ffffffff8192ba5d>] netif_receive_skb+0x18d/0x230
    [   69.323844]  [<ffffffff81746abc>] virtnet_poll+0x58c/0x7b0
    [   69.323844]  [<ffffffff8192cf59>] net_rx_action+0x289/0x550
    [   69.323844]  [<ffffffff8105846a>] __do_softirq+0x1da/0x560
    [   69.323844]  [<ffffffff81b5c2bc>] call_softirq+0x1c/0x30
    [   69.323844]  [<ffffffff81004d75>] do_softirq+0x105/0x1e0
    [   69.323844]  [<ffffffff81058bbe>] irq_exit+0x9e/0x100
    [   69.323844]  [<ffffffff81b5c9d3>] do_IRQ+0x63/0xd0
    [   69.323844]  [<ffffffff81b5a56f>] common_interrupt+0x6f/0x6f
    [   69.323844]  <EOI>
    [   69.323844]  [<ffffffff810993ad>] __might_sleep+0x1cd/0x280
    [   69.323844]  [<ffffffff81160e74>] might_fault+0x34/0xb0
    [   69.323844]  [<ffffffff8105657e>] sys_gettimeofday+0xbe/0xf0
    [   69.323844]  [<ffffffff81b5afe9>] system_call_fastpath+0x16/0x1b
    [   69.323844] Code: c0 45 31 c9 b1 01 ba 2a 00 00 00 e8 a7 89 ff ff 85 c0 b9 00 00 6f 00 74 0e 48 83 c4 70 89 c8 5b 41 5c 5d c3 0f 1f 00 0f b6 4d ef <41> 8b 7c 24 10 48 8d 55 c0 48 89 de e8 ab 6d 01 00 83 f8 01 19
    [   69.323844] RIP  [<ffffffff8132e7c4>] selinux_ip_postroute_compat+0xa4/0xe0
    [   69.323844]  RSP <ffff88003f003720>
    [   69.323844] CR2: 0000000000000010
    [   69.357489] ---[ end trace 0cd3e1a60dee6096 ]---
    [   69.358353] Kernel panic - not syncing: Fatal exception in interrupt
    
    The reason for the regresion is because of how the new sock is created.  The
    old socket was created using inet_ctl_sock_create() which uses all generic
    functions to establish the struct socket, struct sock, and do all of the
    allocation and initialization of the socket and its appropriate security data.
    
    aka:
    
    inet_ctl_sock_create
      sock_create_kern
        __sock_create
          pf->create (inet_create)
            sk_alloc
              sk_prot_alloc
                sec_sk_alloc()
    
    These new per_cpu skip all of that initialization and instead try to do it by
    hand.  Doing it by hand causes a second regression.  The __sock_create()
    function calls security_socket_post_create() which initializes the securty
    state on both the socket and the sock.  However here we don't set up the
    security structure.  I'd like to use security_socket_post_create() but it needs
    the socket and in this case we skipped straight to the struct sock.  Looks like
    this custom hackary is going to require a second patch which exposes the inards
    of security_socket_post_create() as security_sock_post_create() so we can do
    the labeling of this created this way.  But at least this one won't panic the
    kernel.
    
    Reported-by: John Stultz <johnstul@us.ibm.com>
    Bisected-by: John Stultz <johnstul@us.ibm.com>
    Signed-off-by: Eric Paris <eparis@redhat.com>
    Cc: Eric Dumazet <edumazet@google.com>
    Cc: Paul Moore <paul@paul-moore.com>
    Cc: "Serge E. Hallyn" <serge@hallyn.com>

diff --git a/include/linux/security.h b/include/linux/security.h
index 4e5a73c..1e0c5a7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1601,7 +1601,7 @@ struct security_operations {
 	int (*socket_sock_rcv_skb) (struct sock *sk, struct sk_buff *skb);
 	int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
 	int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid);
-	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
+	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority, int numa_node);
 	void (*sk_free_security) (struct sock *sk);
 	void (*sk_clone_security) (const struct sock *sk, struct sock *newsk);
 	void (*sk_getsecid) (struct sock *sk, u32 *secid);
@@ -2539,7 +2539,7 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb);
 int security_socket_getpeersec_stream(struct socket *sock, char __user *optval,
 				      int __user *optlen, unsigned len);
 int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid);
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority);
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node);
 void security_sk_free(struct sock *sk);
 void security_sk_clone(const struct sock *sk, struct sock *newsk);
 void security_sk_classify_flow(struct sock *sk, struct flowi *fl);
@@ -2667,7 +2667,7 @@ static inline int security_socket_getpeersec_dgram(struct socket *sock, struct s
 	return -ENOPROTOOPT;
 }
 
-static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
 	return 0;
 }
diff --git a/include/net/ip.h b/include/net/ip.h
index bd5e444..340905d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -105,7 +105,7 @@ extern void		ip_send_check(struct iphdr *ip);
 extern int		__ip_local_out(struct sk_buff *skb);
 extern int		ip_local_out(struct sk_buff *skb);
 extern int		ip_queue_xmit(struct sk_buff *skb, struct flowi *fl);
-extern void		ip_init(void);
+extern int		ip_init(void);
 extern int		ip_append_data(struct sock *sk, struct flowi4 *fl4,
 				       int getfrag(void *from, char *to, int offset, int len,
 						   int odd, struct sk_buff *skb),
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced8..2cab455 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1186,7 +1186,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 	if (sk != NULL) {
 		kmemcheck_annotate_bitfield(sk, flags);
 
-		if (security_sk_alloc(sk, family, priority))
+		if (security_sk_alloc(sk, family, priority, numa_node_id()))
 			goto out_free;
 
 		if (!try_module_get(prot->owner))
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6681ccf..3f79f37 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1720,7 +1720,8 @@ static int __init inet_init(void)
 	 *	Set the IP module up
 	 */
 
-	ip_init();
+	if (ip_init() < 0)
+		panic("Failed to initialize ip.\n");
 
 	tcp_v4_init();
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 76dde25..4a775b3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1545,12 +1545,23 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 	ip_rt_put(rt);
 }
 
-void __init ip_init(void)
+int __init ip_init(void)
 {
+	struct inet_sock *sock;
+	int rc, cpu;
+
 	ip_rt_init();
 	inet_initpeers();
 
 #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
 	igmp_mc_proc_init();
 #endif
+
+	for_each_possible_cpu(cpu) {
+		sock = &per_cpu(unicast_sock, cpu);
+		rc = security_sk_alloc(&sock->sk, PF_INET, GFP_KERNEL, cpu_to_node(cpu));
+		if (rc)
+			return rc;
+	}
+	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 61095df..0525d28 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -650,7 +650,7 @@ static int cap_socket_getpeersec_dgram(struct socket *sock,
 	return -ENOPROTOOPT;
 }
 
-static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 860aeb3..02a7f76 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1146,9 +1146,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u
 }
 EXPORT_SYMBOL(security_socket_getpeersec_dgram);
 
-int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
+int security_sk_alloc(struct sock *sk, int family, gfp_t priority, int numa_node)
 {
-	return security_ops->sk_alloc_security(sk, family, priority);
+	return security_ops->sk_alloc_security(sk, family, priority, numa_node);
 }
 
 void security_sk_free(struct sock *sk)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c77f63..bdcfd0c 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4289,11 +4289,12 @@ out:
 	return 0;
 }
 
-static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority,
+				     int numa_node)
 {
 	struct sk_security_struct *sksec;
 
-	sksec = kzalloc(sizeof(*sksec), priority);
+	sksec = kmalloc_node(sizeof(*sksec), priority | __GFP_ZERO, numa_node);
 	if (!sksec)
 		return -ENOMEM;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8221514..b43ae5d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1754,12 +1754,14 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode)
  *
  * Returns 0 on success, -ENOMEM is there's no memory
  */
-static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags)
+static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags,
+				   int numa_node)
 {
 	char *csp = smk_of_current();
 	struct socket_smack *ssp;
 
-	ssp = kzalloc(sizeof(struct socket_smack), gfp_flags);
+	ssp = kmalloc_node(sizeof(struct socket_smack), gfp_flags | __GFP_ZERO,
+			   numa_node);
 	if (ssp == NULL)
 		return -ENOMEM;
 
