From 68f097d1ee7f96fd6fa2fbcb2778b7638dcacdcd Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Dec 2017 11:48:32 +0200 Subject: [PATCH 1/4] bpo-30416: Protect the optimizer during constant folding. It no longer spends much time doing complex calculations and no longer consumes much memory for creating large constants that will be dropped later. This fixes also bpo-21074. --- .../2017-12-14-11-48-19.bpo-30416.hlHo_9.rst | 4 + Python/ast_opt.c | 155 +++++++++++++++--- 2 files changed, 134 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst b/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst new file mode 100644 index 00000000000000..532700e248164a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-12-14-11-48-19.bpo-30416.hlHo_9.rst @@ -0,0 +1,4 @@ +The optimizer is now protected from spending much time doing complex +calculations and consuming much memory for creating large constants in +constant folding. Increased limits for constants that can be produced in +constant folding. diff --git a/Python/ast_opt.c b/Python/ast_opt.c index 80531cc6ac5a6f..d3b7103de07341 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -125,6 +125,132 @@ fold_unaryop(expr_ty node, PyArena *arena) return make_const(node, newval, arena); } +/* Check whether a collection doesn't containing too much items (including + subcollections). This protects from creating a constant that needs + too much time for calculating a hash. + "limit" is the maximal number of items. + Returns the negative number if the total number of items exceeds the + limit. Otherwise returns the limit minus the total number of items. +*/ + +static Py_ssize_t +check_complexity(PyObject *obj, Py_ssize_t limit) +{ + if (PyTuple_Check(obj)) { + Py_ssize_t i; + limit -= PyTuple_GET_SIZE(obj); + for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) { + limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit); + } + return limit; + } + else if (PyFrozenSet_Check(obj)) { + Py_ssize_t i = 0; + PyObject *item; + Py_hash_t hash; + limit -= PySet_GET_SIZE(obj); + while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) { + limit = check_complexity(item, limit); + } + } + return limit; +} + +#define MAX_INT_SIZE 128 +#define MAX_COLLECTION_SIZE 256 +#define MAX_STR_SIZE 4096 +#define MAX_TOTAL_ITEMS 1000000 + +static PyObject * +safe_multiply(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = _PyLong_NumBits(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (vbits + wbits > MAX_INT_SIZE) { + return NULL; + } + } + else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) { + Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) : + PySet_GET_SIZE(w); + if (size) { + long n = PyLong_AsLong(v); + if (n < 0 || n > MAX_COLLECTION_SIZE / size) { + return NULL; + } + if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) { + return NULL; + } + } + } + else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) { + Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) : + PyBytes_GET_SIZE(w); + if (size) { + long n = PyLong_AsLong(v); + if (n < 0 || n > MAX_STR_SIZE / size) { + return NULL; + } + } + } + else if (PyLong_Check(w) && + (PyTuple_Check(v) || PyFrozenSet_Check(v) || + PyUnicode_Check(v) || PyBytes_Check(v))) + { + return safe_multiply(w, v); + } + + return PyNumber_Multiply(v, w); +} + +static PyObject * +safe_power(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w) > 0) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = PyLong_AsSize_t(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (vbits > MAX_INT_SIZE / wbits) { + return NULL; + } + } + + return PyNumber_Power(v, w, Py_None); +} + +static PyObject * +safe_lshift(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && Py_SIZE(v) && Py_SIZE(w)) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = PyLong_AsSize_t(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (vbits + wbits > MAX_INT_SIZE) { + return NULL; + } + } + + return PyNumber_Lshift(v, w); +} + +static PyObject * +safe_mod(PyObject *v, PyObject *w) +{ + if (PyUnicode_Check(v) || PyBytes_Check(v)) { + return NULL; + } + + return PyNumber_Remainder(v, w); +} + static int fold_binop(expr_ty node, PyArena *arena) { @@ -147,7 +273,7 @@ fold_binop(expr_ty node, PyArena *arena) newval = PyNumber_Subtract(lv, rv); break; case Mult: - newval = PyNumber_Multiply(lv, rv); + newval = safe_multiply(lv, rv); break; case Div: newval = PyNumber_TrueDivide(lv, rv); @@ -156,13 +282,13 @@ fold_binop(expr_ty node, PyArena *arena) newval = PyNumber_FloorDivide(lv, rv); break; case Mod: - newval = PyNumber_Remainder(lv, rv); + newval = safe_mod(lv, rv); break; case Pow: - newval = PyNumber_Power(lv, rv, Py_None); + newval = safe_power(lv, rv); break; case LShift: - newval = PyNumber_Lshift(lv, rv); + newval = safe_lshift(lv, rv); break; case RShift: newval = PyNumber_Rshift(lv, rv); @@ -180,27 +306,6 @@ fold_binop(expr_ty node, PyArena *arena) return 1; } - if (newval == NULL) { - if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { - return 0; - } - PyErr_Clear(); - return 1; - } - - /* Avoid creating large constants. */ - Py_ssize_t size = PyObject_Size(newval); - if (size == -1) { - if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { - Py_DECREF(newval); - return 0; - } - PyErr_Clear(); - } - else if (size > 20) { - Py_DECREF(newval); - return 1; - } return make_const(node, newval, arena); } From 5ded1400f3120dcebf2bdd7ba59807b61be7043a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Dec 2017 12:37:17 +0200 Subject: [PATCH 2/4] Fix tests. --- Lib/test/test_memoryio.py | 3 ++- Lib/test/test_peepholer.py | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index e16c57e9435864..cd2faba1791c77 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -759,7 +759,8 @@ def test_sizeof(self): check = self.check_sizeof self.assertEqual(object.__sizeof__(io.BytesIO()), basesize) check(io.BytesIO(), basesize ) - check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000)) + n = 1000 # use a variable to prevent constant folding + check(io.BytesIO(b'a' * n), basesize + sys.getsizeof(b'a' * n)) # Various tests of copy-on-write behaviour for BytesIO. diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index efc0afecf5abdb..0cc1e92907b52b 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -175,8 +175,15 @@ def test_folding_of_binops_on_constants(self): self.assertInBytecode(code, 'LOAD_CONST', 'b') # Verify that large sequences do not result from folding - code = compile('a="x"*1000', '', 'single') + code = compile('a="x"*10000', '', 'single') + self.assertInBytecode(code, 'LOAD_CONST', 10000) + self.assertNotIn("x"*10000, code.co_consts) + code = compile('a=1<<1000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 1000) + self.assertNotIn(1<<1000, code.co_consts) + code = compile('a=2**1000', '', 'single') + self.assertInBytecode(code, 'LOAD_CONST', 1000) + self.assertNotIn(2**1000, code.co_consts) def test_binary_subscr_on_unicode(self): # valid code get optimized From 87b1b2b1cfe62123369e67b72a8c05bb131bcf9b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Dec 2017 17:24:36 +0200 Subject: [PATCH 3/4] Address review comments. --- Python/ast_opt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/ast_opt.c b/Python/ast_opt.c index d3b7103de07341..bff1573e6669bd 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -156,10 +156,10 @@ check_complexity(PyObject *obj, Py_ssize_t limit) return limit; } -#define MAX_INT_SIZE 128 -#define MAX_COLLECTION_SIZE 256 -#define MAX_STR_SIZE 4096 -#define MAX_TOTAL_ITEMS 1000000 +#define MAX_INT_SIZE 128 /* bits */ +#define MAX_COLLECTION_SIZE 256 /* items */ +#define MAX_STR_SIZE 4096 /* characters */ +#define MAX_TOTAL_ITEMS 1000000 /* including nested collections */ static PyObject * safe_multiply(PyObject *v, PyObject *w) @@ -233,7 +233,7 @@ safe_lshift(PyObject *v, PyObject *w) if (vbits == (size_t)-1 || wbits == (size_t)-1) { return NULL; } - if (vbits + wbits > MAX_INT_SIZE) { + if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) { return NULL; } } From 86326ee3a315ce4651dfb9f088e38fa0ba8aaa72 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Dec 2017 17:59:02 +0200 Subject: [PATCH 4/4] Tweak limits. --- Python/ast_opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ast_opt.c b/Python/ast_opt.c index bff1573e6669bd..fa67c83de15850 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -159,7 +159,7 @@ check_complexity(PyObject *obj, Py_ssize_t limit) #define MAX_INT_SIZE 128 /* bits */ #define MAX_COLLECTION_SIZE 256 /* items */ #define MAX_STR_SIZE 4096 /* characters */ -#define MAX_TOTAL_ITEMS 1000000 /* including nested collections */ +#define MAX_TOTAL_ITEMS 1024 /* including nested collections */ static PyObject * safe_multiply(PyObject *v, PyObject *w)