diff --git a/dlls/jscript/jsregexp.c b/dlls/jscript/jsregexp.c index 92d1166a93a..e7327e2c77f 100644 --- a/dlls/jscript/jsregexp.c +++ b/dlls/jscript/jsregexp.c @@ -28,7 +28,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript); typedef struct { jsdisp_t dispex; - JSRegExp *jsregexp; + regexp_t *jsregexp; jsstr_t *str; INT last_index; jsval_t last_index_val; @@ -77,12 +77,15 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size, DWORD *parens_cnt, match_result_t *ret) { - REMatchState *result; - DWORD matchlen; + match_state_t *result; HRESULT hres; - hres = MatchRegExpNext(regexp->jsregexp, str->str, jsstr_length(str), - cp, &ctx->tmp_heap, &result, &matchlen); + result = alloc_match_state(regexp->jsregexp, &ctx->tmp_heap, *cp); + if(!result) + return E_OUTOFMEMORY; + + hres = regexp_execute(regexp->jsregexp, ctx, &ctx->tmp_heap, + str->str, jsstr_length(str), result); if(FAILED(hres)) return hres; if(hres == S_FALSE) { @@ -90,19 +93,20 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D set_last_index(regexp, 0); return S_FALSE; } + *cp = result->cp; if(parens) { - if(regexp->jsregexp->parenCount > *parens_size) { + if(result->paren_count > *parens_size) { match_result_t *new_parens; if(*parens) - new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount); + new_parens = heap_realloc(*parens, sizeof(match_result_t)*result->paren_count); else - new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount); + new_parens = heap_alloc(sizeof(match_result_t)*result->paren_count); if(!new_parens) return E_OUTOFMEMORY; - *parens_size = regexp->jsregexp->parenCount; + *parens_size = result->paren_count; *parens = new_parens; } } @@ -115,9 +119,9 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D if(parens) { DWORD i; - *parens_cnt = regexp->jsregexp->parenCount; + *parens_cnt = result->paren_count; - for(i=0; i < regexp->jsregexp->parenCount; i++) { + for(i=0; i < result->paren_count; i++) { if(result->parens[i].index == -1) { (*parens)[i].str = NULL; (*parens)[i].len = 0; @@ -129,7 +133,7 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D } if(!(rem_flags & REM_NO_CTX_UPDATE)) { - DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), regexp->jsregexp->parenCount); + DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), result->paren_count); for(i=0; i < n; i++) { if(result->parens[i].index == -1) { @@ -145,13 +149,13 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0])); } - ret->str = result->cp-matchlen; - ret->len = matchlen; + ret->str = result->cp - result->match_len; + ret->len = result->match_len; set_last_index(regexp, result->cp-str->str); if(!(rem_flags & REM_NO_CTX_UPDATE)) { ctx->last_match_index = ret->str-str->str; - ctx->last_match_length = matchlen; + ctx->last_match_length = result->match_len; } return S_OK; @@ -518,7 +522,7 @@ static void RegExp_destructor(jsdisp_t *dispex) RegExpInstance *This = (RegExpInstance*)dispex; if(This->jsregexp) - js_DestroyRegExp(This->jsregexp); + regexp_destroy(This->jsregexp); jsval_release(This->last_index_val); jsstr_release(This->str); heap_free(This); @@ -598,10 +602,10 @@ HRESULT create_regexp(script_ctx_t *ctx, jsstr_t *src, DWORD flags, jsdisp_t **r regexp->str = jsstr_addref(src); regexp->last_index_val = jsval_number(0); - regexp->jsregexp = js_NewRegExp(ctx, &ctx->tmp_heap, regexp->str->str, + regexp->jsregexp = regexp_new(ctx, &ctx->tmp_heap, regexp->str->str, jsstr_length(regexp->str), flags, FALSE); - if(!regexp->jsregexp) { - WARN("js_NewRegExp failed\n"); + if(FAILED(hres)) { + WARN("regexp_new failed\n"); jsdisp_release(®exp->dispex); return E_FAIL; } diff --git a/dlls/jscript/regexp.c b/dlls/jscript/regexp.c index 8b4a1aad4f6..373be82a620 100644 --- a/dlls/jscript/regexp.c +++ b/dlls/jscript/regexp.c @@ -49,6 +49,30 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript); #define JS_ReportOutOfMemory(a) #define JS_COUNT_OPERATION(a,b) + +typedef BYTE JSPackedBool; + +/* + * This struct holds a bitmap representation of a class from a regexp. + * There's a list of these referenced by the classList field in the regexp_t + * struct below. The initial state has startIndex set to the offset in the + * original regexp source of the beginning of the class contents. The first + * use of the class converts the source representation into a bitmap. + * + */ +typedef struct RECharSet { + JSPackedBool converted; + JSPackedBool sense; + WORD length; + union { + BYTE *bits; + struct { + size_t startIndex; + size_t length; + } src; + } u; +} RECharSet; + #define JSMSG_MIN_TOO_BIG 47 #define JSMSG_MAX_TOO_BIG 48 #define JSMSG_OUT_OF_ORDER 49 @@ -209,7 +233,7 @@ typedef struct REBackTrackData { typedef struct REGlobalData { void *cx; - JSRegExp *regexp; /* the RE in execution */ + regexp_t *regexp; /* the RE in execution */ BOOL ok; /* runtime error (out_of_memory only?) */ size_t start; /* offset to start at */ ptrdiff_t skipped; /* chars skipped anchoring this r.e. */ @@ -285,7 +309,7 @@ typedef struct CompilerState { } classCache[CLASS_CACHE_SIZE]; WORD flags; - heap_pool_t *pool; /* It's faster to use one malloc'd pool + heap_pool_t *pool; /* It's faster to use one malloc'd pool than to malloc/free */ } CompilerState; @@ -448,7 +472,7 @@ SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target) * of recursion. */ static jsbytecode * -EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth, +EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth, jsbytecode *pc, RENode *t) { EmitStateStackEntry *emitStateSP, *emitStateStack; @@ -1896,7 +1920,7 @@ out: */ static REBackTrackData * PushBackTrackState(REGlobalData *gData, REOp op, - jsbytecode *target, REMatchState *x, const WCHAR *cp, + jsbytecode *target, match_state_t *x, const WCHAR *cp, size_t parenIndex, size_t parenCount) { size_t i; @@ -1955,8 +1979,8 @@ PushBackTrackState(REGlobalData *gData, REOp op, return result; } -static inline REMatchState * -FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars, +static inline match_state_t * +FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars, size_t length) { size_t i; @@ -1994,8 +2018,8 @@ FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars, * 9. Let y be the State (f, cap). * 10. Call c(y) and return its result. */ -static REMatchState * -BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex) +static match_state_t * +BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex) { size_t len, i; const WCHAR *parenContent; @@ -2300,11 +2324,11 @@ ReallocStateStack(REGlobalData *gData) * true, then update the current state's cp. Always update startpc to the next * op. */ -static inline REMatchState * -SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op, +static inline match_state_t * +SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op, jsbytecode **startpc, BOOL updatecp) { - REMatchState *result = NULL; + match_state_t *result = NULL; WCHAR matchCh; size_t parenIndex; size_t offset, length, index; @@ -2508,10 +2532,10 @@ SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op, return NULL; } -static inline REMatchState * -ExecuteREBytecode(REGlobalData *gData, REMatchState *x) +static inline match_state_t * +ExecuteREBytecode(REGlobalData *gData, match_state_t *x) { - REMatchState *result = NULL; + match_state_t *result = NULL; REBackTrackData *backTrackData; jsbytecode *nextpc, *testpc; REOp nextop; @@ -2919,7 +2943,7 @@ ExecuteREBytecode(REGlobalData *gData, REMatchState *x) TRACE("{%d,%d}\n", curState->u.quantifier.min, curState->u.quantifier.max); #define PREPARE_REPEAT() \ do { \ - curState->index = x->cp - gData->cpbegin; \ + curState->index = x->cp - gData->cpbegin; \ curState->continue_op = REOP_MINIMALREPEAT; \ curState->continue_pc = pc; \ pc += ARG_LEN; \ @@ -3046,9 +3070,9 @@ good: return x; } -static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x) +static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x) { - REMatchState *result; + match_state_t *result; const WCHAR *cp = x->cp; const WCHAR *cp2; UINT j; @@ -3073,11 +3097,8 @@ static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x) return NULL; } -#define MIN_BACKTRACK_LIMIT 400000 - -static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length) +static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData) { - REMatchState *result; UINT i; gData->backTrackStackSize = INITIAL_BACKTRACK; @@ -3097,65 +3118,67 @@ static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp * gData->stateStackTop = 0; gData->cx = cx; + gData->pool = pool; gData->regexp = re; gData->ok = TRUE; - result = heap_pool_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture)); - if (!result) - goto bad; - for (i = 0; i < re->classCount; i++) { if (!re->classList[i].converted && - !ProcessCharSet(gData, &re->classList[i])) { - return NULL; + !ProcessCharSet(gData, &re->classList[i])) { + return E_FAIL; } } - return result; + return S_OK; bad: js_ReportOutOfScriptQuota(cx); gData->ok = FALSE; - return NULL; + return E_OUTOFMEMORY; } -HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, DWORD str_len, - const WCHAR **cp, heap_pool_t *pool, REMatchState **result, DWORD *matchlen) +HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool, + const WCHAR *str, DWORD str_len, match_state_t *result) { - REMatchState *x, *res; + match_state_t *res; REGlobalData gData; + heap_pool_t *mark = heap_pool_mark(pool); + const WCHAR *str_beg = result->cp; + HRESULT hres; + + assert(result->cp != NULL); gData.cpbegin = str; gData.cpend = str+str_len; - gData.start = *cp-str; + gData.start = result->cp-str; gData.skipped = 0; gData.pool = pool; - x = InitMatch(NULL, &gData, jsregexp, gData.cpend - gData.cpbegin); - if(!x) { + hres = InitMatch(regexp, cx, pool, &gData); + if(FAILED(hres)) { WARN("InitMatch failed\n"); - return E_FAIL; + heap_pool_clear(mark); + return hres; } - x->cp = *cp; - res = MatchRegExp(&gData, x); + res = MatchRegExp(&gData, result); + heap_pool_clear(mark); if(!gData.ok) { WARN("MatchRegExp failed\n"); return E_FAIL; } - *result = res; if(!res) { - *matchlen = 0; + result->match_len = 0; return S_FALSE; } - *matchlen = (res->cp-*cp) - gData.skipped; - *cp = res->cp; + result->match_len = (result->cp-str_beg) - gData.skipped; + result->paren_count = regexp->parenCount; return S_OK; } -void js_DestroyRegExp(JSRegExp *re) +void regexp_destroy(regexp_t *re) { if (re->classList) { UINT i; @@ -3169,9 +3192,10 @@ void js_DestroyRegExp(JSRegExp *re) heap_free(re); } -JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, UINT flags, BOOL flat) +regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str, + DWORD str_len, WORD flags, BOOL flat) { - JSRegExp *re; + regexp_t *re; heap_pool_t *mark; CompilerState state; size_t resize; @@ -3213,7 +3237,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_ if (!ParseRegExp(&state)) goto out; } - resize = offsetof(JSRegExp, program) + state.progLength + 1; + resize = offsetof(regexp_t, program) + state.progLength + 1; re = heap_alloc(resize); if (!re) goto out; @@ -3223,7 +3247,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_ if (re->classCount) { re->classList = heap_alloc(re->classCount * sizeof(RECharSet)); if (!re->classList) { - js_DestroyRegExp(re); + regexp_destroy(re); re = NULL; goto out; } @@ -3234,7 +3258,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_ } endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result); if (!endPC) { - js_DestroyRegExp(re); + regexp_destroy(re); re = NULL; goto out; } @@ -3245,9 +3269,9 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_ * besides re exist here. */ if ((size_t)(endPC - re->program) != state.progLength + 1) { - JSRegExp *tmp; + regexp_t *tmp; assert((size_t)(endPC - re->program) < state.progLength + 1); - resize = offsetof(JSRegExp, program) + (endPC - re->program); + resize = offsetof(regexp_t, program) + (endPC - re->program); tmp = heap_realloc(re, resize); if (tmp) re = tmp; diff --git a/dlls/jscript/regexp.h b/dlls/jscript/regexp.h index a882878411d..de86aa215c6 100644 --- a/dlls/jscript/regexp.h +++ b/dlls/jscript/regexp.h @@ -41,50 +41,41 @@ typedef struct RECapture { size_t length; /* length of capture */ } RECapture; -typedef struct REMatchState { +typedef struct match_state_t { const WCHAR *cp; - RECapture parens[1]; /* first of 're->parenCount' captures, - allocated at end of this struct */ -} REMatchState; + DWORD match_len; + DWORD paren_count; + RECapture parens[1]; +} match_state_t; -typedef BYTE JSPackedBool; typedef BYTE jsbytecode; -/* - * This struct holds a bitmap representation of a class from a regexp. - * There's a list of these referenced by the classList field in the JSRegExp - * struct below. The initial state has startIndex set to the offset in the - * original regexp source of the beginning of the class contents. The first - * use of the class converts the source representation into a bitmap. - * - */ -typedef struct RECharSet { - JSPackedBool converted; - JSPackedBool sense; - WORD length; - union { - BYTE *bits; - struct { - size_t startIndex; - size_t length; - } src; - } u; -} RECharSet; +typedef struct regexp_t { + WORD flags; /* flags, see jsapi.h's REG_* defines */ + size_t parenCount; /* number of parenthesized submatches */ + size_t classCount; /* count [...] bitmaps */ + struct RECharSet *classList; /* list of [...] bitmaps */ + const WCHAR *source; /* locked source string, sans // */ + DWORD source_len; + jsbytecode program[1]; /* regular expression bytecode */ +} regexp_t; -typedef struct JSRegExp { - WORD flags; /* flags, see jsapi.h's REG_* defines */ - size_t parenCount; /* number of parenthesized submatches */ - size_t classCount; /* count [...] bitmaps */ - RECharSet *classList; /* list of [...] bitmaps */ - const WCHAR *source; /* locked source string, sans // */ - DWORD source_len; - jsbytecode program[1]; /* regular expression bytecode */ -} JSRegExp; +regexp_t* regexp_new(void*, heap_pool_t*, const WCHAR*, DWORD, WORD, BOOL) DECLSPEC_HIDDEN; +void regexp_destroy(regexp_t*) DECLSPEC_HIDDEN; +HRESULT regexp_execute(regexp_t*, void*, heap_pool_t*, const WCHAR*, + DWORD, match_state_t*) DECLSPEC_HIDDEN; -JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, - DWORD str_len, UINT flags, BOOL flat) DECLSPEC_HIDDEN; -void js_DestroyRegExp(JSRegExp *re) DECLSPEC_HIDDEN; -HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, - DWORD str_len, const WCHAR **cp, heap_pool_t *pool, - REMatchState **result, DWORD *matchlen) DECLSPEC_HIDDEN; +static inline match_state_t* alloc_match_state(regexp_t *regexp, + heap_pool_t *pool, const WCHAR *pos) +{ + size_t size = offsetof(match_state_t, parens) + regexp->parenCount*sizeof(RECapture); + match_state_t *ret; + + ret = pool ? heap_pool_alloc(pool, size) : heap_alloc(size); + if(!ret) + return NULL; + + ret->cp = pos; + return ret; +}