|
|
|
|
|
#define PY_SSIZE_T_CLEAN |
|
#include <Python.h> |
|
#include <stdint.h> |
|
|
|
#if __ARM_NEON |
|
#include <arm_neon.h> |
|
#elif __SSE2__ |
|
#include <emmintrin.h> |
|
#endif |
|
|
|
static const Py_ssize_t MASK_LEN = 4; |
|
|
|
|
|
|
|
static int |
|
_PyBytesLike_AsStringAndSize(PyObject *obj, PyObject **tmp, char **buffer, Py_ssize_t *length) |
|
{ |
|
|
|
|
|
|
|
if (PyBytes_Check(obj)) |
|
{ |
|
*tmp = NULL; |
|
*buffer = PyBytes_AS_STRING(obj); |
|
*length = PyBytes_GET_SIZE(obj); |
|
} |
|
else if (PyByteArray_Check(obj)) |
|
{ |
|
*tmp = NULL; |
|
*buffer = PyByteArray_AS_STRING(obj); |
|
*length = PyByteArray_GET_SIZE(obj); |
|
} |
|
else if (PyMemoryView_Check(obj)) |
|
{ |
|
*tmp = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C'); |
|
if (*tmp == NULL) |
|
{ |
|
return -1; |
|
} |
|
Py_buffer *mv_buf; |
|
mv_buf = PyMemoryView_GET_BUFFER(*tmp); |
|
*buffer = mv_buf->buf; |
|
*length = mv_buf->len; |
|
} |
|
else |
|
{ |
|
PyErr_Format( |
|
PyExc_TypeError, |
|
"expected a bytes-like object, %.200s found", |
|
Py_TYPE(obj)->tp_name); |
|
return -1; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
|
|
|
|
static PyObject * |
|
apply_mask(PyObject *self, PyObject *args, PyObject *kwds) |
|
{ |
|
|
|
|
|
|
|
static char *kwlist[] = {"data", "mask", NULL}; |
|
PyObject *input_obj; |
|
PyObject *mask_obj; |
|
|
|
|
|
|
|
|
|
PyObject *input_tmp = NULL; |
|
char *input; |
|
Py_ssize_t input_len; |
|
PyObject *mask_tmp = NULL; |
|
char *mask; |
|
Py_ssize_t mask_len; |
|
|
|
|
|
|
|
|
|
PyObject *result = NULL; |
|
char *output; |
|
|
|
|
|
|
|
Py_ssize_t i = 0; |
|
|
|
|
|
|
|
if (!PyArg_ParseTupleAndKeywords( |
|
args, kwds, "OO", kwlist, &input_obj, &mask_obj)) |
|
{ |
|
goto exit; |
|
} |
|
|
|
if (_PyBytesLike_AsStringAndSize(input_obj, &input_tmp, &input, &input_len) == -1) |
|
{ |
|
goto exit; |
|
} |
|
|
|
if (_PyBytesLike_AsStringAndSize(mask_obj, &mask_tmp, &mask, &mask_len) == -1) |
|
{ |
|
goto exit; |
|
} |
|
|
|
if (mask_len != MASK_LEN) |
|
{ |
|
PyErr_SetString(PyExc_ValueError, "mask must contain 4 bytes"); |
|
goto exit; |
|
} |
|
|
|
|
|
|
|
result = PyBytes_FromStringAndSize(NULL, input_len); |
|
if (result == NULL) |
|
{ |
|
goto exit; |
|
} |
|
|
|
|
|
output = PyBytes_AS_STRING(result); |
|
|
|
|
|
|
|
|
|
|
|
|
|
{ |
|
#if __ARM_NEON |
|
|
|
|
|
|
|
Py_ssize_t input_len_128 = input_len & ~15; |
|
uint8x16_t mask_128 = vreinterpretq_u8_u32(vdupq_n_u32(*(uint32_t *)mask)); |
|
|
|
for (; i < input_len_128; i += 16) |
|
{ |
|
uint8x16_t in_128 = vld1q_u8((uint8_t *)(input + i)); |
|
uint8x16_t out_128 = veorq_u8(in_128, mask_128); |
|
vst1q_u8((uint8_t *)(output + i), out_128); |
|
} |
|
|
|
#elif __SSE2__ |
|
|
|
|
|
|
|
|
|
|
|
|
|
Py_ssize_t input_len_128 = input_len & ~15; |
|
__m128i mask_128 = _mm_set1_epi32(*(uint32_t *)mask); |
|
|
|
for (; i < input_len_128; i += 16) |
|
{ |
|
__m128i in_128 = _mm_loadu_si128((__m128i *)(input + i)); |
|
__m128i out_128 = _mm_xor_si128(in_128, mask_128); |
|
_mm_storeu_si128((__m128i *)(output + i), out_128); |
|
} |
|
|
|
#else |
|
|
|
|
|
|
|
|
|
|
|
Py_ssize_t input_len_64 = input_len & ~7; |
|
uint32_t mask_32 = *(uint32_t *)mask; |
|
uint64_t mask_64 = ((uint64_t)mask_32 << 32) | (uint64_t)mask_32; |
|
|
|
for (; i < input_len_64; i += 8) |
|
{ |
|
*(uint64_t *)(output + i) = *(uint64_t *)(input + i) ^ mask_64; |
|
} |
|
|
|
#endif |
|
} |
|
|
|
|
|
|
|
for (; i < input_len; i++) |
|
{ |
|
output[i] = input[i] ^ mask[i & (MASK_LEN - 1)]; |
|
} |
|
|
|
exit: |
|
Py_XDECREF(input_tmp); |
|
Py_XDECREF(mask_tmp); |
|
return result; |
|
|
|
} |
|
|
|
static PyMethodDef speedups_methods[] = { |
|
{ |
|
"apply_mask", |
|
(PyCFunction)apply_mask, |
|
METH_VARARGS | METH_KEYWORDS, |
|
"Apply masking to the data of a WebSocket message.", |
|
}, |
|
{NULL, NULL, 0, NULL}, |
|
}; |
|
|
|
static struct PyModuleDef speedups_module = { |
|
PyModuleDef_HEAD_INIT, |
|
"websocket.speedups", |
|
"C implementation of performance sensitive functions.", |
|
|
|
-1, |
|
speedups_methods, |
|
NULL, |
|
NULL, |
|
NULL, |
|
NULL |
|
}; |
|
|
|
PyMODINIT_FUNC |
|
PyInit_speedups(void) |
|
{ |
|
return PyModule_Create(&speedups_module); |
|
} |
|
|