Skip to content

Commit c2f1010

Browse files
committed
json: Optimize escaping string in Encoder
1 parent 4e294f6 commit c2f1010

File tree

1 file changed

+115
-33
lines changed

1 file changed

+115
-33
lines changed

Modules/_json.c

Lines changed: 115 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
5151
char sort_keys;
5252
char skipkeys;
5353
int allow_nan;
54-
PyCFunction fast_encode;
54+
int (*fast_encode)(PyUnicodeWriter *, PyObject*);
5555
} PyEncoderObject;
5656

5757
#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
@@ -102,8 +102,8 @@ static PyObject *
102102
_encoded_const(PyObject *obj);
103103
static void
104104
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
105-
static PyObject *
106-
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
105+
static int
106+
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj);
107107
static PyObject *
108108
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
109109

@@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr)
303303
return rval;
304304
}
305305

306+
// Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
307+
static int
308+
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
309+
{
310+
Py_ssize_t i;
311+
Py_ssize_t input_chars;
312+
Py_ssize_t chars;
313+
Py_ssize_t copy_len = 0;
314+
const void *input;
315+
int kind;
316+
int ret;
317+
unsigned char buf[12];
318+
319+
input_chars = PyUnicode_GET_LENGTH(pystr);
320+
input = PyUnicode_DATA(pystr);
321+
kind = PyUnicode_KIND(pystr);
322+
323+
ret = PyUnicodeWriter_WriteChar(writer, '"');
324+
if (ret) return ret;
325+
326+
for (i = 0; i < input_chars; i++) {
327+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
328+
if (S_CHAR(c)) {
329+
copy_len++;
330+
}
331+
else {
332+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
333+
if (ret) return ret;
334+
copy_len = 0;
335+
336+
chars = ascii_escape_unichar(c, buf, 0);
337+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
338+
if (ret) return ret;
339+
}
340+
}
341+
342+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
343+
if (ret) return ret;
344+
345+
return PyUnicodeWriter_WriteChar(writer, '"');
346+
}
347+
348+
// Take a PyUnicode pystr and write an escaped string to writer.
349+
static int
350+
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
351+
{
352+
Py_ssize_t i;
353+
Py_ssize_t input_chars;
354+
Py_ssize_t chars;
355+
Py_ssize_t copy_len = 0;
356+
const void *input;
357+
int kind;
358+
int ret;
359+
unsigned char buf[12];
360+
361+
input_chars = PyUnicode_GET_LENGTH(pystr);
362+
input = PyUnicode_DATA(pystr);
363+
kind = PyUnicode_KIND(pystr);
364+
365+
ret = PyUnicodeWriter_WriteChar(writer, '"');
366+
if (ret) return ret;
367+
368+
for (i = 0; i < input_chars; i++) {
369+
Py_UCS4 c = PyUnicode_READ(kind, input, i);
370+
if (c <= 0x1f || c == '\\' || c == '"') {
371+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
372+
if (ret) return ret;
373+
copy_len = 0;
374+
375+
chars = ascii_escape_unichar(c, buf, 0);
376+
ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars);
377+
if (ret) return ret;
378+
}
379+
else {
380+
copy_len++;
381+
}
382+
}
383+
384+
ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i);
385+
if (ret) return ret;
386+
return PyUnicodeWriter_WriteChar(writer, '"');
387+
}
388+
306389
static void
307390
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
308391
{
@@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12551338

12561339
if (PyCFunction_Check(s->encoder)) {
12571340
PyCFunction f = PyCFunction_GetFunction(s->encoder);
1258-
if (f == py_encode_basestring_ascii || f == py_encode_basestring) {
1259-
s->fast_encode = f;
1341+
if (f == py_encode_basestring_ascii){
1342+
s->fast_encode = write_escaped_ascii;
1343+
}
1344+
else if (f == py_encode_basestring) {
1345+
s->fast_encode = write_escaped_unicode;
12601346
}
12611347
}
12621348

@@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
14371523
return PyFloat_Type.tp_repr(obj);
14381524
}
14391525

1440-
static PyObject *
1441-
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1526+
static int
1527+
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
14421528
{
1443-
/* Return the JSON representation of a string */
1444-
PyObject *encoded;
1529+
/* Append stolen and then decrement its reference count */
1530+
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
1531+
Py_DECREF(stolen);
1532+
return rval;
1533+
}
14451534

1535+
static int
1536+
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
1537+
{
14461538
if (s->fast_encode) {
1447-
return s->fast_encode(NULL, obj);
1539+
return s->fast_encode(writer, obj);
1540+
}
1541+
1542+
/* Return the JSON representation of a string */
1543+
PyObject *encoded = PyObject_CallOneArg(s->encoder, obj);
1544+
if (encoded == NULL) {
1545+
return -1;
14481546
}
1449-
encoded = PyObject_CallOneArg(s->encoder, obj);
14501547
if (encoded != NULL && !PyUnicode_Check(encoded)) {
14511548
PyErr_Format(PyExc_TypeError,
14521549
"encoder() must return a string, not %.80s",
14531550
Py_TYPE(encoded)->tp_name);
14541551
Py_DECREF(encoded);
1455-
return NULL;
1552+
return -1;
14561553
}
1457-
return encoded;
1458-
}
1459-
1460-
static int
1461-
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
1462-
{
1463-
/* Append stolen and then decrement its reference count */
1464-
int rval = PyUnicodeWriter_WriteStr(writer, stolen);
1465-
Py_DECREF(stolen);
1466-
return rval;
1554+
return _steal_accumulate(writer, encoded);
14671555
}
14681556

14691557
static int
@@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14851573
return PyUnicodeWriter_WriteUTF8(writer, "false", 5);
14861574
}
14871575
else if (PyUnicode_Check(obj)) {
1488-
PyObject *encoded = encoder_encode_string(s, obj);
1489-
if (encoded == NULL)
1490-
return -1;
1491-
return _steal_accumulate(writer, encoded);
1576+
return encoder_write_string(s, writer, obj);
14921577
}
14931578
else if (PyLong_Check(obj)) {
14941579
if (PyLong_CheckExact(obj)) {
@@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15771662
PyObject *item_separator)
15781663
{
15791664
PyObject *keystr = NULL;
1580-
PyObject *encoded;
1665+
int rv;
15811666

15821667
if (PyUnicode_Check(key)) {
15831668
keystr = Py_NewRef(key);
@@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
16171702
}
16181703
}
16191704

1620-
encoded = encoder_encode_string(s, keystr);
1705+
rv = encoder_write_string(s, writer, keystr);
16211706
Py_DECREF(keystr);
1622-
if (encoded == NULL) {
1623-
return -1;
1707+
if (rv != 0) {
1708+
return rv;
16241709
}
16251710

1626-
if (_steal_accumulate(writer, encoded) < 0) {
1627-
return -1;
1628-
}
16291711
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
16301712
return -1;
16311713
}

0 commit comments

Comments
 (0)