jscript: Store concatenated strings as a rope string to avoid useless copying.
This commit is contained in:
parent
019a1af76e
commit
8541f94e2c
|
@ -16,13 +16,54 @@
|
|||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "jscript.h"
|
||||
|
||||
#include "wine/debug.h"
|
||||
|
||||
/*
|
||||
* This is the length of a string that is considered to be long enough to be
|
||||
* worth the rope to avoid copy.
|
||||
* This probably could be tuned, but keep it low for a while to better test rope's code.
|
||||
*/
|
||||
#define JSSTR_SHORT_STRING_LENGTH 8
|
||||
|
||||
/*
|
||||
* This is the max rope depth. While faster to allocate, ropes may become slow at access.
|
||||
*/
|
||||
#define JSSTR_MAX_ROPE_DEPTH 100
|
||||
|
||||
const char *debugstr_jsstr(jsstr_t *str)
|
||||
{
|
||||
return debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str));
|
||||
return jsstr_is_inline(str) ? debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str))
|
||||
: jsstr_is_heap(str) ? debugstr_wn(jsstr_as_heap(str)->buf, jsstr_length(str))
|
||||
: wine_dbg_sprintf("%s...", debugstr_jsstr(jsstr_as_rope(str)->left));
|
||||
}
|
||||
|
||||
void jsstr_free(jsstr_t *str)
|
||||
{
|
||||
switch(jsstr_tag(str)) {
|
||||
case JSSTR_HEAP:
|
||||
heap_free(jsstr_as_heap(str)->buf);
|
||||
break;
|
||||
case JSSTR_ROPE: {
|
||||
jsstr_rope_t *rope = jsstr_as_rope(str);
|
||||
jsstr_release(rope->left);
|
||||
jsstr_release(rope->right);
|
||||
break;
|
||||
}
|
||||
case JSSTR_INLINE:
|
||||
break;
|
||||
}
|
||||
|
||||
heap_free(str);
|
||||
}
|
||||
|
||||
static inline void jsstr_init(jsstr_t *str, unsigned len, jsstr_tag_t tag)
|
||||
{
|
||||
str->length_flags = len << JSSTR_LENGTH_SHIFT | tag;
|
||||
str->ref = 1;
|
||||
}
|
||||
|
||||
WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
|
||||
|
@ -36,8 +77,7 @@ WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
|
|||
if(!ret)
|
||||
return NULL;
|
||||
|
||||
ret->str.length_flags = len << JSSTR_LENGTH_SHIFT;
|
||||
ret->str.ref = 1;
|
||||
jsstr_init(&ret->str, len, JSSTR_INLINE);
|
||||
ret->buf[len] = 0;
|
||||
*r = &ret->str;
|
||||
return ret->buf;
|
||||
|
@ -55,17 +95,116 @@ jsstr_t *jsstr_alloc_len(const WCHAR *buf, unsigned len)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int jsstr_cmp(jsstr_t *str1, jsstr_t *str2)
|
||||
static void jsstr_rope_extract(jsstr_rope_t *str, unsigned off, unsigned len, WCHAR *buf)
|
||||
{
|
||||
unsigned left_len = jsstr_length(str->left);
|
||||
|
||||
if(left_len <= off) {
|
||||
jsstr_extract(str->right, off-left_len, len, buf);
|
||||
}else if(left_len >= len+off) {
|
||||
jsstr_extract(str->left, off, len, buf);
|
||||
}else {
|
||||
left_len -= off;
|
||||
jsstr_extract(str->left, off, left_len, buf);
|
||||
jsstr_extract(str->right, 0, len-left_len, buf+left_len);
|
||||
}
|
||||
}
|
||||
|
||||
void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf)
|
||||
{
|
||||
switch(jsstr_tag(str)) {
|
||||
case JSSTR_INLINE:
|
||||
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR));
|
||||
return;
|
||||
case JSSTR_HEAP:
|
||||
memcpy(buf, jsstr_as_heap(str)->buf+off, len*sizeof(WCHAR));
|
||||
return;
|
||||
case JSSTR_ROPE:
|
||||
return jsstr_rope_extract(jsstr_as_rope(str), off, len, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static int jsstr_cmp_str(jsstr_t *jsstr, const WCHAR *str, unsigned len)
|
||||
{
|
||||
int len1 = jsstr_length(str1);
|
||||
int len2 = jsstr_length(str2);
|
||||
int ret;
|
||||
|
||||
ret = memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, min(len1, len2)*sizeof(WCHAR));
|
||||
if(!ret)
|
||||
ret = len1 - len2;
|
||||
switch(jsstr_tag(jsstr)) {
|
||||
case JSSTR_INLINE:
|
||||
ret = memcmp(jsstr_as_inline(jsstr)->buf, str, len*sizeof(WCHAR));
|
||||
return ret || jsstr_length(jsstr) == len ? ret : 1;
|
||||
case JSSTR_HEAP:
|
||||
ret = memcmp(jsstr_as_heap(jsstr)->buf, str, len*sizeof(WCHAR));
|
||||
return ret || jsstr_length(jsstr) == len ? ret : 1;
|
||||
case JSSTR_ROPE: {
|
||||
jsstr_rope_t *rope = jsstr_as_rope(jsstr);
|
||||
unsigned left_len = jsstr_length(rope->left);
|
||||
|
||||
return ret;
|
||||
ret = jsstr_cmp_str(rope->left, str, min(len, left_len));
|
||||
if(ret || len <= left_len)
|
||||
return ret;
|
||||
return jsstr_cmp_str(rope->right, str+left_len, len-left_len);
|
||||
}
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define TMP_BUF_SIZE 256
|
||||
|
||||
static int ropes_cmp(jsstr_rope_t *left, jsstr_rope_t *right)
|
||||
{
|
||||
WCHAR left_buf[TMP_BUF_SIZE], right_buf[TMP_BUF_SIZE];
|
||||
unsigned left_len = jsstr_length(&left->str);
|
||||
unsigned right_len = jsstr_length(&right->str);
|
||||
unsigned cmp_off = 0, cmp_size;
|
||||
int ret;
|
||||
|
||||
/* FIXME: We can avoid temporary buffers here. */
|
||||
while(cmp_off < min(left_len, right_len)) {
|
||||
cmp_size = min(left_len, right_len) - cmp_off;
|
||||
if(cmp_size > TMP_BUF_SIZE)
|
||||
cmp_size = TMP_BUF_SIZE;
|
||||
|
||||
jsstr_rope_extract(left, cmp_off, cmp_size, left_buf);
|
||||
jsstr_rope_extract(right, cmp_off, cmp_size, right_buf);
|
||||
ret = memcmp(left_buf, right_buf, cmp_size);
|
||||
if(ret)
|
||||
return ret;
|
||||
|
||||
cmp_off += cmp_size;
|
||||
}
|
||||
|
||||
return left_len - right_len;
|
||||
}
|
||||
|
||||
static inline const WCHAR *jsstr_try_flat(jsstr_t *str)
|
||||
{
|
||||
return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
|
||||
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
|
||||
: NULL;
|
||||
}
|
||||
|
||||
int jsstr_cmp(jsstr_t *str1, jsstr_t *str2)
|
||||
{
|
||||
unsigned len1 = jsstr_length(str1);
|
||||
unsigned len2 = jsstr_length(str2);
|
||||
const WCHAR *str;
|
||||
int ret;
|
||||
|
||||
str = jsstr_try_flat(str2);
|
||||
if(str) {
|
||||
ret = jsstr_cmp_str(str1, str, min(len1, len2));
|
||||
return ret || len1 == len2 ? ret : -1;
|
||||
}
|
||||
|
||||
str = jsstr_try_flat(str1);
|
||||
if(str) {
|
||||
ret = jsstr_cmp_str(str2, str, min(len1, len2));
|
||||
return ret || len1 == len2 ? -ret : 1;
|
||||
}
|
||||
|
||||
return ropes_cmp(jsstr_as_rope(str1), jsstr_as_rope(str2));
|
||||
}
|
||||
|
||||
jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
|
||||
|
@ -82,6 +221,31 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
|
|||
if(!len2)
|
||||
return jsstr_addref(str1);
|
||||
|
||||
if(len1 + len2 >= JSSTR_SHORT_STRING_LENGTH) {
|
||||
unsigned depth, depth2;
|
||||
jsstr_rope_t *rope;
|
||||
|
||||
depth = jsstr_is_rope(str1) ? jsstr_as_rope(str1)->depth : 0;
|
||||
depth2 = jsstr_is_rope(str2) ? jsstr_as_rope(str2)->depth : 0;
|
||||
if(depth2 > depth)
|
||||
depth = depth2;
|
||||
|
||||
if(depth++ < JSSTR_MAX_ROPE_DEPTH) {
|
||||
if(len1+len2 > JSSTR_MAX_LENGTH)
|
||||
return NULL;
|
||||
|
||||
rope = heap_alloc(sizeof(*rope));
|
||||
if(!rope)
|
||||
return NULL;
|
||||
|
||||
jsstr_init(&rope->str, len1+len2, JSSTR_ROPE);
|
||||
rope->left = jsstr_addref(str1);
|
||||
rope->right = jsstr_addref(str2);
|
||||
rope->depth = depth;
|
||||
return &rope->str;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = jsstr_alloc_buf(len1+len2, &ret);
|
||||
if(!ret)
|
||||
return NULL;
|
||||
|
@ -89,6 +253,28 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
|
|||
jsstr_flush(str1, ptr);
|
||||
jsstr_flush(str2, ptr+len1);
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
C_ASSERT(sizeof(jsstr_heap_t) <= sizeof(jsstr_rope_t));
|
||||
|
||||
const WCHAR *jsstr_rope_flatten(jsstr_rope_t *str)
|
||||
{
|
||||
WCHAR *buf;
|
||||
|
||||
buf = heap_alloc((jsstr_length(&str->str)+1) * sizeof(WCHAR));
|
||||
if(!buf)
|
||||
return NULL;
|
||||
|
||||
jsstr_flush(str->left, buf);
|
||||
jsstr_flush(str->right, buf+jsstr_length(str->left));
|
||||
buf[jsstr_length(&str->str)] = 0;
|
||||
|
||||
/* Trasform to heap string */
|
||||
jsstr_release(str->left);
|
||||
jsstr_release(str->right);
|
||||
str->str.length_flags |= JSSTR_FLAG_FLAT;
|
||||
return jsstr_as_heap(&str->str)->buf = buf;
|
||||
}
|
||||
|
||||
static jsstr_t *empty_str, *nan_str, *undefined_str;
|
||||
|
|
|
@ -16,6 +16,23 @@
|
|||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is a common header for all string representations. The exact layout of the string
|
||||
* representation may be:
|
||||
*
|
||||
* - inline string - string bytes directly follow string headers.
|
||||
* - heap string - a structure containing a pointer to buffer on the heap.
|
||||
* - roper string - a product of concatenation of two strings. Instead of copying whole
|
||||
* buffers, we may store just references to concatenated strings.
|
||||
*
|
||||
* String layout may change over life time of the string. Currently possible transformation
|
||||
* is when a rope string becomes a heap stream. That happens when we need a real, linear
|
||||
* zero-terminated buffer (a flat buffer). At this point the type of the string is changed
|
||||
* and the new buffer is stored in the string, so that subsequent operations requiring
|
||||
* a flat string won't need to flatten it again.
|
||||
*
|
||||
* In the future more layouts and transformations may be added.
|
||||
*/
|
||||
struct _jsstr_t {
|
||||
unsigned length_flags;
|
||||
unsigned ref;
|
||||
|
@ -25,18 +42,60 @@ struct _jsstr_t {
|
|||
#define JSSTR_MAX_LENGTH (1 << (32-JSSTR_LENGTH_SHIFT))
|
||||
#define JSSTR_FLAGS_MASK ((1 << JSSTR_LENGTH_SHIFT)-1)
|
||||
|
||||
#define JSSTR_FLAG_NULLBSTR 1
|
||||
#define JSSTR_FLAG_NULLBSTR 4
|
||||
|
||||
#define JSSTR_FLAG_LBIT 1
|
||||
#define JSSTR_FLAG_FLAT 2
|
||||
#define JSSTR_FLAG_TAG_MASK 3
|
||||
|
||||
typedef enum {
|
||||
JSSTR_INLINE = JSSTR_FLAG_FLAT,
|
||||
JSSTR_HEAP = JSSTR_FLAG_FLAT|JSSTR_FLAG_LBIT,
|
||||
JSSTR_ROPE = JSSTR_FLAG_LBIT
|
||||
} jsstr_tag_t;
|
||||
|
||||
static inline unsigned jsstr_length(jsstr_t *str)
|
||||
{
|
||||
return str->length_flags >> JSSTR_LENGTH_SHIFT;
|
||||
}
|
||||
|
||||
static inline jsstr_tag_t jsstr_tag(jsstr_t *str)
|
||||
{
|
||||
return str->length_flags & JSSTR_FLAG_TAG_MASK;
|
||||
}
|
||||
|
||||
static inline BOOL jsstr_is_inline(jsstr_t *str)
|
||||
{
|
||||
return jsstr_tag(str) == JSSTR_INLINE;
|
||||
}
|
||||
|
||||
static inline BOOL jsstr_is_heap(jsstr_t *str)
|
||||
{
|
||||
return jsstr_tag(str) == JSSTR_HEAP;
|
||||
}
|
||||
|
||||
static inline BOOL jsstr_is_rope(jsstr_t *str)
|
||||
{
|
||||
return jsstr_tag(str) == JSSTR_ROPE;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
jsstr_t str;
|
||||
WCHAR buf[1];
|
||||
} jsstr_inline_t;
|
||||
|
||||
typedef struct {
|
||||
jsstr_t str;
|
||||
WCHAR *buf;
|
||||
} jsstr_heap_t;
|
||||
|
||||
typedef struct {
|
||||
jsstr_t str;
|
||||
jsstr_t *left;
|
||||
jsstr_t *right;
|
||||
unsigned depth;
|
||||
} jsstr_rope_t;
|
||||
|
||||
jsstr_t *jsstr_alloc_len(const WCHAR*,unsigned) DECLSPEC_HIDDEN;
|
||||
WCHAR *jsstr_alloc_buf(unsigned,jsstr_t**) DECLSPEC_HIDDEN;
|
||||
|
||||
|
@ -45,10 +104,16 @@ static inline jsstr_t *jsstr_alloc(const WCHAR *str)
|
|||
return jsstr_alloc_len(str, strlenW(str));
|
||||
}
|
||||
|
||||
void jsstr_free(jsstr_t*) DECLSPEC_HIDDEN;
|
||||
|
||||
static inline void jsstr_release(jsstr_t *str)
|
||||
{
|
||||
if(!--str->ref)
|
||||
heap_free(str);
|
||||
if(!--str->ref) {
|
||||
if(jsstr_is_inline(str))
|
||||
heap_free(str);
|
||||
else
|
||||
jsstr_free(str);
|
||||
}
|
||||
}
|
||||
|
||||
static inline jsstr_t *jsstr_addref(jsstr_t *str)
|
||||
|
@ -62,27 +127,39 @@ static inline jsstr_inline_t *jsstr_as_inline(jsstr_t *str)
|
|||
return CONTAINING_RECORD(str, jsstr_inline_t, str);
|
||||
}
|
||||
|
||||
/* This will be failable in the future. */
|
||||
static inline jsstr_heap_t *jsstr_as_heap(jsstr_t *str)
|
||||
{
|
||||
return CONTAINING_RECORD(str, jsstr_heap_t, str);
|
||||
}
|
||||
|
||||
static inline jsstr_rope_t *jsstr_as_rope(jsstr_t *str)
|
||||
{
|
||||
return CONTAINING_RECORD(str, jsstr_rope_t, str);
|
||||
}
|
||||
|
||||
const WCHAR *jsstr_rope_flatten(jsstr_rope_t*) DECLSPEC_HIDDEN;
|
||||
|
||||
static inline const WCHAR *jsstr_flatten(jsstr_t *str)
|
||||
{
|
||||
return jsstr_as_inline(str)->buf;
|
||||
return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
|
||||
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
|
||||
: jsstr_rope_flatten(jsstr_as_rope(str));
|
||||
}
|
||||
|
||||
static inline BOOL jsstr_eq(jsstr_t *str1, jsstr_t *str2)
|
||||
{
|
||||
unsigned len = jsstr_length(str1);
|
||||
return len == jsstr_length(str2) && !memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, len*sizeof(WCHAR));
|
||||
}
|
||||
|
||||
static inline void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf)
|
||||
{
|
||||
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR));
|
||||
}
|
||||
void jsstr_extract(jsstr_t*,unsigned,unsigned,WCHAR*) DECLSPEC_HIDDEN;
|
||||
|
||||
static inline unsigned jsstr_flush(jsstr_t *str, WCHAR *buf)
|
||||
{
|
||||
unsigned len = jsstr_length(str);
|
||||
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR));
|
||||
if(jsstr_is_inline(str)) {
|
||||
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR));
|
||||
}else if(jsstr_is_heap(str)) {
|
||||
memcpy(buf, jsstr_as_heap(str)->buf, len*sizeof(WCHAR));
|
||||
}else {
|
||||
jsstr_rope_t *rope = jsstr_as_rope(str);
|
||||
jsstr_flush(rope->left, buf);
|
||||
jsstr_flush(rope->right, buf+jsstr_length(rope->left));
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
|
@ -98,6 +175,12 @@ static inline jsstr_t *jsstr_substr(jsstr_t *str, unsigned off, unsigned len)
|
|||
}
|
||||
|
||||
int jsstr_cmp(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
|
||||
|
||||
static inline BOOL jsstr_eq(jsstr_t *left, jsstr_t *right)
|
||||
{
|
||||
return jsstr_length(left) == jsstr_length(right) && !jsstr_cmp(left, right);
|
||||
}
|
||||
|
||||
jsstr_t *jsstr_concat(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
|
||||
|
||||
jsstr_t *jsstr_nan(void) DECLSPEC_HIDDEN;
|
||||
|
|
Loading…
Reference in New Issue