Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
:mod:`json` now encodes strings up to 2.2x faster if they consist solely of ASCII characters that don’t require escaping.
Original file line numberDiff line numberDiff line change
Expand Up@@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
char sort_keys;
char skipkeys;
int allow_nan;
PyCFunction fast_encode;
int (*fast_encode)(PyUnicodeWriter *, PyObject *);
} PyEncoderObject;

#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
Expand DownExpand Up@@ -102,8 +102,10 @@ static PyObject *
_encoded_const(PyObject *obj);
static void
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
static PyObject *
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
static int
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen);
static int
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj);
static PyObject *
encoder_encode_float(PyEncoderObject *s, PyObject *obj);

Expand DownExpand Up@@ -146,22 +148,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
return chars;
}

static PyObject *
ascii_escape_unicode(PyObject *pystr)
static Py_ssize_t
ascii_escape_size(const void *input, int kind, Py_ssize_t input_chars)
{
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
const void *input;
Py_UCS1 *output;
int kind;

input_chars = PyUnicode_GET_LENGTH(pystr);
input = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);

/* Compute the output size */
for (i = 0, output_size = 2; i < input_chars; i++) {
Expand All@@ -181,11 +172,22 @@ ascii_escape_unicode(PyObject *pystr)
}
if (output_size > PY_SSIZE_T_MAX - d) {
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
return NULL;
return -1;
}
output_size += d;
}

return output_size;
}

static PyObject *
ascii_escape_unicode_and_size(const void *input, int kind, Py_ssize_t input_chars, Py_ssize_t output_size)
{
Py_ssize_t i;
Py_ssize_t chars;
PyObject *rval;
Py_UCS1 *output;

rval = PyUnicode_New(output_size, 127);
if (rval == NULL) {
return NULL;
Expand All@@ -210,23 +212,66 @@ ascii_escape_unicode(PyObject *pystr)
}

static PyObject *
escape_unicode(PyObject *pystr)
ascii_escape_unicode(PyObject *pystr)
{
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
Py_ssize_t input_chars;
const void *input;
int kind;

input_chars = PyUnicode_GET_LENGTH(pystr);
input = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);

Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars);
if (output_size < 0) {
return NULL;
}

return ascii_escape_unicode_and_size(input, kind, input_chars, output_size);
}

static int
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
{
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
Py_ssize_t i;
Py_ssize_t input_chars;
Py_ssize_t output_size;
Py_ssize_t chars;
PyObject *rval;
const void *input;
int kind;
Py_UCS4 maxchar;

maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
input_chars = PyUnicode_GET_LENGTH(pystr);
input = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);

Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars);
if (output_size < 0) {
return -1;
}

if (output_size == input_chars + 2) {
/* No need to escape anything */
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
return -1;
}
return PyUnicodeWriter_WriteChar(writer, '"');
}

PyObject *rval = ascii_escape_unicode_and_size(input, kind, input_chars, output_size);
if (rval == NULL) {
return -1;
}

return _steal_accumulate(writer, rval);
}

static Py_ssize_t
escape_size(const void *input, int kind, Py_ssize_t input_chars)
{
Py_ssize_t i;
Py_ssize_t output_size;

/* Compute the output size */
for (i = 0, output_size = 2; i < input_chars; i++) {
Py_UCS4 c = PyUnicode_READ(kind, input, i);
Expand All@@ -244,11 +289,21 @@ escape_unicode(PyObject *pystr)
}
if (output_size > PY_SSIZE_T_MAX - d) {
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
return NULL;
return -1;
}
output_size += d;
}

return output_size;
}

static PyObject *
escape_unicode_and_size(const void *input, int kind, Py_UCS4 maxchar, Py_ssize_t input_chars, Py_ssize_t output_size)
{
Py_ssize_t i;
Py_ssize_t chars;
PyObject *rval;

rval = PyUnicode_New(output_size, maxchar);
if (rval == NULL)
return NULL;
Expand DownExpand Up@@ -303,6 +358,65 @@ escape_unicode(PyObject *pystr)
return rval;
}

static PyObject *
escape_unicode(PyObject *pystr)
{
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
Py_ssize_t input_chars;
const void *input;
int kind;
Py_UCS4 maxchar;

maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
input_chars = PyUnicode_GET_LENGTH(pystr);
input = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);

Py_ssize_t output_size = escape_size(input, kind, input_chars);
if (output_size < 0) {
return NULL;
}

return escape_unicode_and_size(input, kind, maxchar, input_chars, output_size);
}

static int
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
{
Py_ssize_t input_chars;
const void *input;
int kind;
Py_UCS4 maxchar;

maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
input_chars = PyUnicode_GET_LENGTH(pystr);
input = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);

Py_ssize_t output_size = escape_size(input, kind, input_chars);
if (output_size < 0) {
return -1;
}

if (output_size == input_chars + 2) {
/* No need to escape anything */
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
return -1;
}
return PyUnicodeWriter_WriteChar(writer, '"');
}

PyObject *rval = escape_unicode_and_size(input, kind, maxchar, input_chars, output_size);
if (rval == NULL) {
return -1;
}

return _steal_accumulate(writer, rval);
}

static void
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
{
Expand DownExpand Up@@ -1255,8 +1369,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

if (PyCFunction_Check(s->encoder)) {
PyCFunction f = PyCFunction_GetFunction(s->encoder);
if (f == py_encode_basestring_ascii || f == py_encode_basestring) {
s->fast_encode = f;
if (f == py_encode_basestring_ascii) {
s->fast_encode = write_escaped_ascii;
}
else if (f == py_encode_basestring) {
s->fast_encode = write_escaped_unicode;
}
}

Expand DownExpand Up@@ -1437,24 +1554,27 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
return PyFloat_Type.tp_repr(obj);
}

static PyObject *
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
static int
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
{
/* Return the JSON representation of a string */
PyObject *encoded;

if (s->fast_encode) {
return s->fast_encode(NULL, obj);
return s->fast_encode(writer, obj);
}
encoded = PyObject_CallOneArg(s->encoder, obj);
if (encoded != NULL && !PyUnicode_Check(encoded)) {
if (encoded == NULL) {
return -1;
}
if (!PyUnicode_Check(encoded)) {
PyErr_Format(PyExc_TypeError,
"encoder() must return a string, not %.80s",
Py_TYPE(encoded)->tp_name);
Py_DECREF(encoded);
return NULL;
return -1;
}
return encoded;
return _steal_accumulate(writer, encoded);
}

static int
Expand DownExpand Up@@ -1485,10 +1605,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
return PyUnicodeWriter_WriteASCII(writer, "false", 5);
}
else if (PyUnicode_Check(obj)) {
PyObject *encoded = encoder_encode_string(s, obj);
if (encoded == NULL)
return -1;
return _steal_accumulate(writer, encoded);
return encoder_write_string(s, writer, obj);
}
else if (PyLong_Check(obj)) {
if (PyLong_CheckExact(obj)) {
Expand DownExpand Up@@ -1577,7 +1694,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
PyObject *item_separator)
{
PyObject *keystr = NULL;
PyObject *encoded;
int rv;

if (PyUnicode_Check(key)) {
keystr = Py_NewRef(key);
Expand DownExpand Up@@ -1617,13 +1734,10 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
}
}

encoded = encoder_encode_string(s, keystr);
rv = encoder_write_string(s, writer, keystr);
Py_DECREF(keystr);
if (encoded == NULL) {
return -1;
}

if (_steal_accumulate(writer, encoded) < 0) {
if (rv < 0) {
return -1;
}
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
Expand Down
Loading