Skip to content

GH-93964: Harden overflow checks before _PyBytes_Resize in compile.c #94044

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Strengthened compiler overflow checks to prevent crashes when compiling very large source files.
53 changes: 36 additions & 17 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7500,16 +7500,42 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
write_except_byte(a, (value&0x3f) | msb);
}

/* Make room for at least (logical_length+to_add)*unitsize in the
bytes object. Use exponential growth for O(1) amortized runtime. */
static int
bytes_make_room(PyObject **bytes, int logical_length,
int to_add, Py_ssize_t unitsize)
{
// Make sure we can successfully do the addition.
if (logical_length > INT_MAX - to_add) {
PyErr_NoMemory();
return 0;
}
// The existing logical buffer should always fit in a Py_ssize_t
assert(logical_length <= PY_SSIZE_T_MAX / unitsize);
Py_ssize_t b_len = PyBytes_GET_SIZE(*bytes);
if (unitsize * logical_length >= b_len - to_add * unitsize) {
// There's not enough room. Double it.
if (b_len > PY_SSIZE_T_MAX / 2) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has the same issue as the backport.
We want to be able to index all code object structures with an int, so you'll need INT_MAX instead of PY_SSIZE_T_MAX and it should be an overflow error, not a memory error.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, we want a_bytecode to be able to have INT_MAX code units, not just INT_MAX bytes, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably want the smaller amount to avoid risk of overflow. So the length in bytes should be less than INT_MAX.

PyErr_NoMemory();
return 0;
}
if (_PyBytes_Resize(bytes, b_len * 2) < 0) {
return 0;
}
}
return 1;
}

/* See Objects/exception_handling_notes.txt for details of layout */
#define MAX_SIZE_OF_ENTRY 20

static int
assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) {
if (_PyBytes_Resize(&a->a_except_table, len * 2) < 0)
return 0;
if (!bytes_make_room(&a->a_except_table, a->a_except_table_off,
MAX_SIZE_OF_ENTRY, 1)) {
return 0;
}
int size = end-start;
assert(end > start);
Expand Down Expand Up @@ -7650,12 +7676,9 @@ write_location_info_no_column(struct assembler* a, int length, int line_delta)
static int
write_location_info_entry(struct assembler* a, struct instr* i, int isize)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable);
if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) {
assert(len > THEORETICAL_MAX_ENTRY_SIZE);
if (_PyBytes_Resize(&a->a_linetable, len*2) < 0) {
return 0;
}
if (!bytes_make_room(&a->a_linetable, a->a_location_off,
THEORETICAL_MAX_ENTRY_SIZE, 1)) {
return 0;
}
if (i->i_loc.lineno < 0) {
write_location_info_none(a, isize);
Expand Down Expand Up @@ -7710,15 +7733,11 @@ assemble_emit_location(struct assembler* a, struct instr* i)
static int
assemble_emit(struct assembler *a, struct instr *i)
{
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
_Py_CODEUNIT *code;

int size = instr_size(i);
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
if (len > PY_SSIZE_T_MAX / 2)
return 0;
if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0)
return 0;
if (!bytes_make_room(&a->a_bytecode, a->a_offset,
size, sizeof(_Py_CODEUNIT))) {
return 0;
}
code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;
a->a_offset += size;
Expand Down