jscript: Cleanup regular expressions engine API.

This commit is contained in:
Piotr Caban 2013-02-19 10:12:12 +01:00 committed by Alexandre Julliard
parent 4c6a1d1d92
commit 67c7cc5517
3 changed files with 128 additions and 109 deletions

View File

@ -28,7 +28,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
typedef struct { typedef struct {
jsdisp_t dispex; jsdisp_t dispex;
JSRegExp *jsregexp; regexp_t *jsregexp;
jsstr_t *str; jsstr_t *str;
INT last_index; INT last_index;
jsval_t last_index_val; jsval_t last_index_val;
@ -77,12 +77,15 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
DWORD *parens_cnt, match_result_t *ret) DWORD *parens_cnt, match_result_t *ret)
{ {
REMatchState *result; match_state_t *result;
DWORD matchlen;
HRESULT hres; HRESULT hres;
hres = MatchRegExpNext(regexp->jsregexp, str->str, jsstr_length(str), result = alloc_match_state(regexp->jsregexp, &ctx->tmp_heap, *cp);
cp, &ctx->tmp_heap, &result, &matchlen); if(!result)
return E_OUTOFMEMORY;
hres = regexp_execute(regexp->jsregexp, ctx, &ctx->tmp_heap,
str->str, jsstr_length(str), result);
if(FAILED(hres)) if(FAILED(hres))
return hres; return hres;
if(hres == S_FALSE) { if(hres == S_FALSE) {
@ -90,19 +93,20 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
set_last_index(regexp, 0); set_last_index(regexp, 0);
return S_FALSE; return S_FALSE;
} }
*cp = result->cp;
if(parens) { if(parens) {
if(regexp->jsregexp->parenCount > *parens_size) { if(result->paren_count > *parens_size) {
match_result_t *new_parens; match_result_t *new_parens;
if(*parens) if(*parens)
new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount); new_parens = heap_realloc(*parens, sizeof(match_result_t)*result->paren_count);
else else
new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount); new_parens = heap_alloc(sizeof(match_result_t)*result->paren_count);
if(!new_parens) if(!new_parens)
return E_OUTOFMEMORY; return E_OUTOFMEMORY;
*parens_size = regexp->jsregexp->parenCount; *parens_size = result->paren_count;
*parens = new_parens; *parens = new_parens;
} }
} }
@ -115,9 +119,9 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
if(parens) { if(parens) {
DWORD i; DWORD i;
*parens_cnt = regexp->jsregexp->parenCount; *parens_cnt = result->paren_count;
for(i=0; i < regexp->jsregexp->parenCount; i++) { for(i=0; i < result->paren_count; i++) {
if(result->parens[i].index == -1) { if(result->parens[i].index == -1) {
(*parens)[i].str = NULL; (*parens)[i].str = NULL;
(*parens)[i].len = 0; (*parens)[i].len = 0;
@ -129,7 +133,7 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
} }
if(!(rem_flags & REM_NO_CTX_UPDATE)) { if(!(rem_flags & REM_NO_CTX_UPDATE)) {
DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), regexp->jsregexp->parenCount); DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), result->paren_count);
for(i=0; i < n; i++) { for(i=0; i < n; i++) {
if(result->parens[i].index == -1) { if(result->parens[i].index == -1) {
@ -145,13 +149,13 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0])); memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0]));
} }
ret->str = result->cp-matchlen; ret->str = result->cp - result->match_len;
ret->len = matchlen; ret->len = result->match_len;
set_last_index(regexp, result->cp-str->str); set_last_index(regexp, result->cp-str->str);
if(!(rem_flags & REM_NO_CTX_UPDATE)) { if(!(rem_flags & REM_NO_CTX_UPDATE)) {
ctx->last_match_index = ret->str-str->str; ctx->last_match_index = ret->str-str->str;
ctx->last_match_length = matchlen; ctx->last_match_length = result->match_len;
} }
return S_OK; return S_OK;
@ -518,7 +522,7 @@ static void RegExp_destructor(jsdisp_t *dispex)
RegExpInstance *This = (RegExpInstance*)dispex; RegExpInstance *This = (RegExpInstance*)dispex;
if(This->jsregexp) if(This->jsregexp)
js_DestroyRegExp(This->jsregexp); regexp_destroy(This->jsregexp);
jsval_release(This->last_index_val); jsval_release(This->last_index_val);
jsstr_release(This->str); jsstr_release(This->str);
heap_free(This); heap_free(This);
@ -598,10 +602,10 @@ HRESULT create_regexp(script_ctx_t *ctx, jsstr_t *src, DWORD flags, jsdisp_t **r
regexp->str = jsstr_addref(src); regexp->str = jsstr_addref(src);
regexp->last_index_val = jsval_number(0); regexp->last_index_val = jsval_number(0);
regexp->jsregexp = js_NewRegExp(ctx, &ctx->tmp_heap, regexp->str->str, regexp->jsregexp = regexp_new(ctx, &ctx->tmp_heap, regexp->str->str,
jsstr_length(regexp->str), flags, FALSE); jsstr_length(regexp->str), flags, FALSE);
if(!regexp->jsregexp) { if(FAILED(hres)) {
WARN("js_NewRegExp failed\n"); WARN("regexp_new failed\n");
jsdisp_release(&regexp->dispex); jsdisp_release(&regexp->dispex);
return E_FAIL; return E_FAIL;
} }

View File

@ -49,6 +49,30 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
#define JS_ReportOutOfMemory(a) #define JS_ReportOutOfMemory(a)
#define JS_COUNT_OPERATION(a,b) #define JS_COUNT_OPERATION(a,b)
typedef BYTE JSPackedBool;
/*
* This struct holds a bitmap representation of a class from a regexp.
* There's a list of these referenced by the classList field in the regexp_t
* struct below. The initial state has startIndex set to the offset in the
* original regexp source of the beginning of the class contents. The first
* use of the class converts the source representation into a bitmap.
*
*/
typedef struct RECharSet {
JSPackedBool converted;
JSPackedBool sense;
WORD length;
union {
BYTE *bits;
struct {
size_t startIndex;
size_t length;
} src;
} u;
} RECharSet;
#define JSMSG_MIN_TOO_BIG 47 #define JSMSG_MIN_TOO_BIG 47
#define JSMSG_MAX_TOO_BIG 48 #define JSMSG_MAX_TOO_BIG 48
#define JSMSG_OUT_OF_ORDER 49 #define JSMSG_OUT_OF_ORDER 49
@ -209,7 +233,7 @@ typedef struct REBackTrackData {
typedef struct REGlobalData { typedef struct REGlobalData {
void *cx; void *cx;
JSRegExp *regexp; /* the RE in execution */ regexp_t *regexp; /* the RE in execution */
BOOL ok; /* runtime error (out_of_memory only?) */ BOOL ok; /* runtime error (out_of_memory only?) */
size_t start; /* offset to start at */ size_t start; /* offset to start at */
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */ ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
@ -285,7 +309,7 @@ typedef struct CompilerState {
} classCache[CLASS_CACHE_SIZE]; } classCache[CLASS_CACHE_SIZE];
WORD flags; WORD flags;
heap_pool_t *pool; /* It's faster to use one malloc'd pool heap_pool_t *pool; /* It's faster to use one malloc'd pool
than to malloc/free */ than to malloc/free */
} CompilerState; } CompilerState;
@ -448,7 +472,7 @@ SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
* of recursion. * of recursion.
*/ */
static jsbytecode * static jsbytecode *
EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth, EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
jsbytecode *pc, RENode *t) jsbytecode *pc, RENode *t)
{ {
EmitStateStackEntry *emitStateSP, *emitStateStack; EmitStateStackEntry *emitStateSP, *emitStateStack;
@ -1896,7 +1920,7 @@ out:
*/ */
static REBackTrackData * static REBackTrackData *
PushBackTrackState(REGlobalData *gData, REOp op, PushBackTrackState(REGlobalData *gData, REOp op,
jsbytecode *target, REMatchState *x, const WCHAR *cp, jsbytecode *target, match_state_t *x, const WCHAR *cp,
size_t parenIndex, size_t parenCount) size_t parenIndex, size_t parenCount)
{ {
size_t i; size_t i;
@ -1955,8 +1979,8 @@ PushBackTrackState(REGlobalData *gData, REOp op,
return result; return result;
} }
static inline REMatchState * static inline match_state_t *
FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars, FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
size_t length) size_t length)
{ {
size_t i; size_t i;
@ -1994,8 +2018,8 @@ FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
* 9. Let y be the State (f, cap). * 9. Let y be the State (f, cap).
* 10. Call c(y) and return its result. * 10. Call c(y) and return its result.
*/ */
static REMatchState * static match_state_t *
BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex) BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
{ {
size_t len, i; size_t len, i;
const WCHAR *parenContent; const WCHAR *parenContent;
@ -2300,11 +2324,11 @@ ReallocStateStack(REGlobalData *gData)
* true, then update the current state's cp. Always update startpc to the next * true, then update the current state's cp. Always update startpc to the next
* op. * op.
*/ */
static inline REMatchState * static inline match_state_t *
SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op, SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op,
jsbytecode **startpc, BOOL updatecp) jsbytecode **startpc, BOOL updatecp)
{ {
REMatchState *result = NULL; match_state_t *result = NULL;
WCHAR matchCh; WCHAR matchCh;
size_t parenIndex; size_t parenIndex;
size_t offset, length, index; size_t offset, length, index;
@ -2508,10 +2532,10 @@ SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
return NULL; return NULL;
} }
static inline REMatchState * static inline match_state_t *
ExecuteREBytecode(REGlobalData *gData, REMatchState *x) ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
{ {
REMatchState *result = NULL; match_state_t *result = NULL;
REBackTrackData *backTrackData; REBackTrackData *backTrackData;
jsbytecode *nextpc, *testpc; jsbytecode *nextpc, *testpc;
REOp nextop; REOp nextop;
@ -2919,7 +2943,7 @@ ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max); TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max);
#define PREPARE_REPEAT() \ #define PREPARE_REPEAT() \
do { \ do { \
curState->index = x->cp - gData->cpbegin; \ curState->index = x->cp - gData->cpbegin; \
curState->continue_op = REOP_MINIMALREPEAT; \ curState->continue_op = REOP_MINIMALREPEAT; \
curState->continue_pc = pc; \ curState->continue_pc = pc; \
pc += ARG_LEN; \ pc += ARG_LEN; \
@ -3046,9 +3070,9 @@ good:
return x; return x;
} }
static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x) static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x)
{ {
REMatchState *result; match_state_t *result;
const WCHAR *cp = x->cp; const WCHAR *cp = x->cp;
const WCHAR *cp2; const WCHAR *cp2;
UINT j; UINT j;
@ -3073,11 +3097,8 @@ static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
return NULL; return NULL;
} }
#define MIN_BACKTRACK_LIMIT 400000 static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
{ {
REMatchState *result;
UINT i; UINT i;
gData->backTrackStackSize = INITIAL_BACKTRACK; gData->backTrackStackSize = INITIAL_BACKTRACK;
@ -3097,65 +3118,67 @@ static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *
gData->stateStackTop = 0; gData->stateStackTop = 0;
gData->cx = cx; gData->cx = cx;
gData->pool = pool;
gData->regexp = re; gData->regexp = re;
gData->ok = TRUE; gData->ok = TRUE;
result = heap_pool_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
if (!result)
goto bad;
for (i = 0; i < re->classCount; i++) { for (i = 0; i < re->classCount; i++) {
if (!re->classList[i].converted && if (!re->classList[i].converted &&
!ProcessCharSet(gData, &re->classList[i])) { !ProcessCharSet(gData, &re->classList[i])) {
return NULL; return E_FAIL;
} }
} }
return result; return S_OK;
bad: bad:
js_ReportOutOfScriptQuota(cx); js_ReportOutOfScriptQuota(cx);
gData->ok = FALSE; gData->ok = FALSE;
return NULL; return E_OUTOFMEMORY;
} }
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, DWORD str_len, HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool,
const WCHAR **cp, heap_pool_t *pool, REMatchState **result, DWORD *matchlen) const WCHAR *str, DWORD str_len, match_state_t *result)
{ {
REMatchState *x, *res; match_state_t *res;
REGlobalData gData; REGlobalData gData;
heap_pool_t *mark = heap_pool_mark(pool);
const WCHAR *str_beg = result->cp;
HRESULT hres;
assert(result->cp != NULL);
gData.cpbegin = str; gData.cpbegin = str;
gData.cpend = str+str_len; gData.cpend = str+str_len;
gData.start = *cp-str; gData.start = result->cp-str;
gData.skipped = 0; gData.skipped = 0;
gData.pool = pool; gData.pool = pool;
x = InitMatch(NULL, &gData, jsregexp, gData.cpend - gData.cpbegin); hres = InitMatch(regexp, cx, pool, &gData);
if(!x) { if(FAILED(hres)) {
WARN("InitMatch failed\n"); WARN("InitMatch failed\n");
return E_FAIL; heap_pool_clear(mark);
return hres;
} }
x->cp = *cp; res = MatchRegExp(&gData, result);
res = MatchRegExp(&gData, x); heap_pool_clear(mark);
if(!gData.ok) { if(!gData.ok) {
WARN("MatchRegExp failed\n"); WARN("MatchRegExp failed\n");
return E_FAIL; return E_FAIL;
} }
*result = res;
if(!res) { if(!res) {
*matchlen = 0; result->match_len = 0;
return S_FALSE; return S_FALSE;
} }
*matchlen = (res->cp-*cp) - gData.skipped; result->match_len = (result->cp-str_beg) - gData.skipped;
*cp = res->cp; result->paren_count = regexp->parenCount;
return S_OK; return S_OK;
} }
void js_DestroyRegExp(JSRegExp *re) void regexp_destroy(regexp_t *re)
{ {
if (re->classList) { if (re->classList) {
UINT i; UINT i;
@ -3169,9 +3192,10 @@ void js_DestroyRegExp(JSRegExp *re)
heap_free(re); heap_free(re);
} }
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, UINT flags, BOOL flat) regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str,
DWORD str_len, WORD flags, BOOL flat)
{ {
JSRegExp *re; regexp_t *re;
heap_pool_t *mark; heap_pool_t *mark;
CompilerState state; CompilerState state;
size_t resize; size_t resize;
@ -3213,7 +3237,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
if (!ParseRegExp(&state)) if (!ParseRegExp(&state))
goto out; goto out;
} }
resize = offsetof(JSRegExp, program) + state.progLength + 1; resize = offsetof(regexp_t, program) + state.progLength + 1;
re = heap_alloc(resize); re = heap_alloc(resize);
if (!re) if (!re)
goto out; goto out;
@ -3223,7 +3247,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
if (re->classCount) { if (re->classCount) {
re->classList = heap_alloc(re->classCount * sizeof(RECharSet)); re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
if (!re->classList) { if (!re->classList) {
js_DestroyRegExp(re); regexp_destroy(re);
re = NULL; re = NULL;
goto out; goto out;
} }
@ -3234,7 +3258,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
} }
endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result); endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
if (!endPC) { if (!endPC) {
js_DestroyRegExp(re); regexp_destroy(re);
re = NULL; re = NULL;
goto out; goto out;
} }
@ -3245,9 +3269,9 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
* besides re exist here. * besides re exist here.
*/ */
if ((size_t)(endPC - re->program) != state.progLength + 1) { if ((size_t)(endPC - re->program) != state.progLength + 1) {
JSRegExp *tmp; regexp_t *tmp;
assert((size_t)(endPC - re->program) < state.progLength + 1); assert((size_t)(endPC - re->program) < state.progLength + 1);
resize = offsetof(JSRegExp, program) + (endPC - re->program); resize = offsetof(regexp_t, program) + (endPC - re->program);
tmp = heap_realloc(re, resize); tmp = heap_realloc(re, resize);
if (tmp) if (tmp)
re = tmp; re = tmp;

View File

@ -41,50 +41,41 @@ typedef struct RECapture {
size_t length; /* length of capture */ size_t length; /* length of capture */
} RECapture; } RECapture;
typedef struct REMatchState { typedef struct match_state_t {
const WCHAR *cp; const WCHAR *cp;
RECapture parens[1]; /* first of 're->parenCount' captures, DWORD match_len;
allocated at end of this struct */
} REMatchState;
DWORD paren_count;
RECapture parens[1];
} match_state_t;
typedef BYTE JSPackedBool;
typedef BYTE jsbytecode; typedef BYTE jsbytecode;
/* typedef struct regexp_t {
* This struct holds a bitmap representation of a class from a regexp. WORD flags; /* flags, see jsapi.h's REG_* defines */
* There's a list of these referenced by the classList field in the JSRegExp size_t parenCount; /* number of parenthesized submatches */
* struct below. The initial state has startIndex set to the offset in the size_t classCount; /* count [...] bitmaps */
* original regexp source of the beginning of the class contents. The first struct RECharSet *classList; /* list of [...] bitmaps */
* use of the class converts the source representation into a bitmap. const WCHAR *source; /* locked source string, sans // */
* DWORD source_len;
*/ jsbytecode program[1]; /* regular expression bytecode */
typedef struct RECharSet { } regexp_t;
JSPackedBool converted;
JSPackedBool sense;
WORD length;
union {
BYTE *bits;
struct {
size_t startIndex;
size_t length;
} src;
} u;
} RECharSet;
typedef struct JSRegExp { regexp_t* regexp_new(void*, heap_pool_t*, const WCHAR*, DWORD, WORD, BOOL) DECLSPEC_HIDDEN;
WORD flags; /* flags, see jsapi.h's REG_* defines */ void regexp_destroy(regexp_t*) DECLSPEC_HIDDEN;
size_t parenCount; /* number of parenthesized submatches */ HRESULT regexp_execute(regexp_t*, void*, heap_pool_t*, const WCHAR*,
size_t classCount; /* count [...] bitmaps */ DWORD, match_state_t*) DECLSPEC_HIDDEN;
RECharSet *classList; /* list of [...] bitmaps */
const WCHAR *source; /* locked source string, sans // */
DWORD source_len;
jsbytecode program[1]; /* regular expression bytecode */
} JSRegExp;
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, static inline match_state_t* alloc_match_state(regexp_t *regexp,
DWORD str_len, UINT flags, BOOL flat) DECLSPEC_HIDDEN; heap_pool_t *pool, const WCHAR *pos)
void js_DestroyRegExp(JSRegExp *re) DECLSPEC_HIDDEN; {
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, size_t size = offsetof(match_state_t, parens) + regexp->parenCount*sizeof(RECapture);
DWORD str_len, const WCHAR **cp, heap_pool_t *pool, match_state_t *ret;
REMatchState **result, DWORD *matchlen) DECLSPEC_HIDDEN;
ret = pool ? heap_pool_alloc(pool, size) : heap_alloc(size);
if(!ret)
return NULL;
ret->cp = pos;
return ret;
}