@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
51
51
char sort_keys ;
52
52
char skipkeys ;
53
53
int allow_nan ;
54
- PyCFunction fast_encode ;
54
+ int ( * fast_encode )( PyUnicodeWriter * , PyObject * ) ;
55
55
} PyEncoderObject ;
56
56
57
57
#define PyEncoderObject_CAST (op ) ((PyEncoderObject *)(op))
@@ -102,8 +102,8 @@ static PyObject *
102
102
_encoded_const (PyObject * obj );
103
103
static void
104
104
raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end );
105
- static PyObject *
106
- encoder_encode_string (PyEncoderObject * s , PyObject * obj );
105
+ static int
106
+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj );
107
107
static PyObject *
108
108
encoder_encode_float (PyEncoderObject * s , PyObject * obj );
109
109
@@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr)
303
303
return rval ;
304
304
}
305
305
306
+ // Take a PyUnicode pystr and write an ASCII-only escaped string to writer.
307
+ static int
308
+ write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
309
+ {
310
+ Py_ssize_t i ;
311
+ Py_ssize_t input_chars ;
312
+ Py_ssize_t chars ;
313
+ Py_ssize_t copy_len = 0 ;
314
+ const void * input ;
315
+ int kind ;
316
+ int ret ;
317
+ unsigned char buf [12 ];
318
+
319
+ input_chars = PyUnicode_GET_LENGTH (pystr );
320
+ input = PyUnicode_DATA (pystr );
321
+ kind = PyUnicode_KIND (pystr );
322
+
323
+ ret = PyUnicodeWriter_WriteChar (writer , '"' );
324
+ if (ret ) return ret ;
325
+
326
+ for (i = 0 ; i < input_chars ; i ++ ) {
327
+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
328
+ if (S_CHAR (c )) {
329
+ copy_len ++ ;
330
+ }
331
+ else {
332
+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
333
+ if (ret ) return ret ;
334
+ copy_len = 0 ;
335
+
336
+ chars = ascii_escape_unichar (c , buf , 0 );
337
+ ret = PyUnicodeWriter_WriteUTF8 (writer , (const char * )buf , chars );
338
+ if (ret ) return ret ;
339
+ }
340
+ }
341
+
342
+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
343
+ if (ret ) return ret ;
344
+
345
+ return PyUnicodeWriter_WriteChar (writer , '"' );
346
+ }
347
+
348
+ // Take a PyUnicode pystr and write an escaped string to writer.
349
+ static int
350
+ write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
351
+ {
352
+ Py_ssize_t i ;
353
+ Py_ssize_t input_chars ;
354
+ Py_ssize_t chars ;
355
+ Py_ssize_t copy_len = 0 ;
356
+ const void * input ;
357
+ int kind ;
358
+ int ret ;
359
+ unsigned char buf [12 ];
360
+
361
+ input_chars = PyUnicode_GET_LENGTH (pystr );
362
+ input = PyUnicode_DATA (pystr );
363
+ kind = PyUnicode_KIND (pystr );
364
+
365
+ ret = PyUnicodeWriter_WriteChar (writer , '"' );
366
+ if (ret ) return ret ;
367
+
368
+ for (i = 0 ; i < input_chars ; i ++ ) {
369
+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
370
+ if (c <= 0x1f || c == '\\' || c == '"' ) {
371
+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
372
+ if (ret ) return ret ;
373
+ copy_len = 0 ;
374
+
375
+ chars = ascii_escape_unichar (c , buf , 0 );
376
+ ret = PyUnicodeWriter_WriteUTF8 (writer , (const char * )buf , chars );
377
+ if (ret ) return ret ;
378
+ }
379
+ else {
380
+ copy_len ++ ;
381
+ }
382
+ }
383
+
384
+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , i - copy_len , i );
385
+ if (ret ) return ret ;
386
+ return PyUnicodeWriter_WriteChar (writer , '"' );
387
+ }
388
+
306
389
static void
307
390
raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
308
391
{
@@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1255
1338
1256
1339
if (PyCFunction_Check (s -> encoder )) {
1257
1340
PyCFunction f = PyCFunction_GetFunction (s -> encoder );
1258
- if (f == py_encode_basestring_ascii || f == py_encode_basestring ) {
1259
- s -> fast_encode = f ;
1341
+ if (f == py_encode_basestring_ascii ){
1342
+ s -> fast_encode = write_escaped_ascii ;
1343
+ }
1344
+ else if (f == py_encode_basestring ) {
1345
+ s -> fast_encode = write_escaped_unicode ;
1260
1346
}
1261
1347
}
1262
1348
@@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1437
1523
return PyFloat_Type .tp_repr (obj );
1438
1524
}
1439
1525
1440
- static PyObject *
1441
- encoder_encode_string ( PyEncoderObject * s , PyObject * obj )
1526
+ static int
1527
+ _steal_accumulate ( PyUnicodeWriter * writer , PyObject * stolen )
1442
1528
{
1443
- /* Return the JSON representation of a string */
1444
- PyObject * encoded ;
1529
+ /* Append stolen and then decrement its reference count */
1530
+ int rval = PyUnicodeWriter_WriteStr (writer , stolen );
1531
+ Py_DECREF (stolen );
1532
+ return rval ;
1533
+ }
1445
1534
1535
+ static int
1536
+ encoder_write_string (PyEncoderObject * s , PyUnicodeWriter * writer , PyObject * obj )
1537
+ {
1446
1538
if (s -> fast_encode ) {
1447
- return s -> fast_encode (NULL , obj );
1539
+ return s -> fast_encode (writer , obj );
1540
+ }
1541
+
1542
+ /* Return the JSON representation of a string */
1543
+ PyObject * encoded = PyObject_CallOneArg (s -> encoder , obj );
1544
+ if (encoded == NULL ) {
1545
+ return -1 ;
1448
1546
}
1449
- encoded = PyObject_CallOneArg (s -> encoder , obj );
1450
1547
if (encoded != NULL && !PyUnicode_Check (encoded )) {
1451
1548
PyErr_Format (PyExc_TypeError ,
1452
1549
"encoder() must return a string, not %.80s" ,
1453
1550
Py_TYPE (encoded )-> tp_name );
1454
1551
Py_DECREF (encoded );
1455
- return NULL ;
1552
+ return -1 ;
1456
1553
}
1457
- return encoded ;
1458
- }
1459
-
1460
- static int
1461
- _steal_accumulate (PyUnicodeWriter * writer , PyObject * stolen )
1462
- {
1463
- /* Append stolen and then decrement its reference count */
1464
- int rval = PyUnicodeWriter_WriteStr (writer , stolen );
1465
- Py_DECREF (stolen );
1466
- return rval ;
1554
+ return _steal_accumulate (writer , encoded );
1467
1555
}
1468
1556
1469
1557
static int
@@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
1485
1573
return PyUnicodeWriter_WriteUTF8 (writer , "false" , 5 );
1486
1574
}
1487
1575
else if (PyUnicode_Check (obj )) {
1488
- PyObject * encoded = encoder_encode_string (s , obj );
1489
- if (encoded == NULL )
1490
- return -1 ;
1491
- return _steal_accumulate (writer , encoded );
1576
+ return encoder_write_string (s , writer , obj );
1492
1577
}
1493
1578
else if (PyLong_Check (obj )) {
1494
1579
if (PyLong_CheckExact (obj )) {
@@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
1577
1662
PyObject * item_separator )
1578
1663
{
1579
1664
PyObject * keystr = NULL ;
1580
- PyObject * encoded ;
1665
+ int rv ;
1581
1666
1582
1667
if (PyUnicode_Check (key )) {
1583
1668
keystr = Py_NewRef (key );
@@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
1617
1702
}
1618
1703
}
1619
1704
1620
- encoded = encoder_encode_string ( s , keystr );
1705
+ rv = encoder_write_string ( s , writer , keystr );
1621
1706
Py_DECREF (keystr );
1622
- if (encoded == NULL ) {
1623
- return -1 ;
1707
+ if (rv != 0 ) {
1708
+ return rv ;
1624
1709
}
1625
1710
1626
- if (_steal_accumulate (writer , encoded ) < 0 ) {
1627
- return -1 ;
1628
- }
1629
1711
if (PyUnicodeWriter_WriteStr (writer , s -> key_separator ) < 0 ) {
1630
1712
return -1 ;
1631
1713
}
0 commit comments