jscript: Cleanup regular expressions engine API.
This commit is contained in:
parent
4c6a1d1d92
commit
67c7cc5517
|
@ -28,7 +28,7 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
|
|||
typedef struct {
|
||||
jsdisp_t dispex;
|
||||
|
||||
JSRegExp *jsregexp;
|
||||
regexp_t *jsregexp;
|
||||
jsstr_t *str;
|
||||
INT last_index;
|
||||
jsval_t last_index_val;
|
||||
|
@ -77,12 +77,15 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
|
|||
jsstr_t *str, const WCHAR **cp, match_result_t **parens, DWORD *parens_size,
|
||||
DWORD *parens_cnt, match_result_t *ret)
|
||||
{
|
||||
REMatchState *result;
|
||||
DWORD matchlen;
|
||||
match_state_t *result;
|
||||
HRESULT hres;
|
||||
|
||||
hres = MatchRegExpNext(regexp->jsregexp, str->str, jsstr_length(str),
|
||||
cp, &ctx->tmp_heap, &result, &matchlen);
|
||||
result = alloc_match_state(regexp->jsregexp, &ctx->tmp_heap, *cp);
|
||||
if(!result)
|
||||
return E_OUTOFMEMORY;
|
||||
|
||||
hres = regexp_execute(regexp->jsregexp, ctx, &ctx->tmp_heap,
|
||||
str->str, jsstr_length(str), result);
|
||||
if(FAILED(hres))
|
||||
return hres;
|
||||
if(hres == S_FALSE) {
|
||||
|
@ -90,19 +93,20 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
|
|||
set_last_index(regexp, 0);
|
||||
return S_FALSE;
|
||||
}
|
||||
*cp = result->cp;
|
||||
|
||||
if(parens) {
|
||||
if(regexp->jsregexp->parenCount > *parens_size) {
|
||||
if(result->paren_count > *parens_size) {
|
||||
match_result_t *new_parens;
|
||||
|
||||
if(*parens)
|
||||
new_parens = heap_realloc(*parens, sizeof(match_result_t)*regexp->jsregexp->parenCount);
|
||||
new_parens = heap_realloc(*parens, sizeof(match_result_t)*result->paren_count);
|
||||
else
|
||||
new_parens = heap_alloc(sizeof(match_result_t)*regexp->jsregexp->parenCount);
|
||||
new_parens = heap_alloc(sizeof(match_result_t)*result->paren_count);
|
||||
if(!new_parens)
|
||||
return E_OUTOFMEMORY;
|
||||
|
||||
*parens_size = regexp->jsregexp->parenCount;
|
||||
*parens_size = result->paren_count;
|
||||
*parens = new_parens;
|
||||
}
|
||||
}
|
||||
|
@ -115,9 +119,9 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
|
|||
if(parens) {
|
||||
DWORD i;
|
||||
|
||||
*parens_cnt = regexp->jsregexp->parenCount;
|
||||
*parens_cnt = result->paren_count;
|
||||
|
||||
for(i=0; i < regexp->jsregexp->parenCount; i++) {
|
||||
for(i=0; i < result->paren_count; i++) {
|
||||
if(result->parens[i].index == -1) {
|
||||
(*parens)[i].str = NULL;
|
||||
(*parens)[i].len = 0;
|
||||
|
@ -129,7 +133,7 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
|
|||
}
|
||||
|
||||
if(!(rem_flags & REM_NO_CTX_UPDATE)) {
|
||||
DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), regexp->jsregexp->parenCount);
|
||||
DWORD i, n = min(sizeof(ctx->match_parens)/sizeof(ctx->match_parens[0]), result->paren_count);
|
||||
|
||||
for(i=0; i < n; i++) {
|
||||
if(result->parens[i].index == -1) {
|
||||
|
@ -145,13 +149,13 @@ static HRESULT do_regexp_match_next(script_ctx_t *ctx, RegExpInstance *regexp, D
|
|||
memset(ctx->match_parens+n, 0, sizeof(ctx->match_parens) - n*sizeof(ctx->match_parens[0]));
|
||||
}
|
||||
|
||||
ret->str = result->cp-matchlen;
|
||||
ret->len = matchlen;
|
||||
ret->str = result->cp - result->match_len;
|
||||
ret->len = result->match_len;
|
||||
set_last_index(regexp, result->cp-str->str);
|
||||
|
||||
if(!(rem_flags & REM_NO_CTX_UPDATE)) {
|
||||
ctx->last_match_index = ret->str-str->str;
|
||||
ctx->last_match_length = matchlen;
|
||||
ctx->last_match_length = result->match_len;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
|
@ -518,7 +522,7 @@ static void RegExp_destructor(jsdisp_t *dispex)
|
|||
RegExpInstance *This = (RegExpInstance*)dispex;
|
||||
|
||||
if(This->jsregexp)
|
||||
js_DestroyRegExp(This->jsregexp);
|
||||
regexp_destroy(This->jsregexp);
|
||||
jsval_release(This->last_index_val);
|
||||
jsstr_release(This->str);
|
||||
heap_free(This);
|
||||
|
@ -598,10 +602,10 @@ HRESULT create_regexp(script_ctx_t *ctx, jsstr_t *src, DWORD flags, jsdisp_t **r
|
|||
regexp->str = jsstr_addref(src);
|
||||
regexp->last_index_val = jsval_number(0);
|
||||
|
||||
regexp->jsregexp = js_NewRegExp(ctx, &ctx->tmp_heap, regexp->str->str,
|
||||
regexp->jsregexp = regexp_new(ctx, &ctx->tmp_heap, regexp->str->str,
|
||||
jsstr_length(regexp->str), flags, FALSE);
|
||||
if(!regexp->jsregexp) {
|
||||
WARN("js_NewRegExp failed\n");
|
||||
if(FAILED(hres)) {
|
||||
WARN("regexp_new failed\n");
|
||||
jsdisp_release(®exp->dispex);
|
||||
return E_FAIL;
|
||||
}
|
||||
|
|
|
@ -49,6 +49,30 @@ WINE_DEFAULT_DEBUG_CHANNEL(jscript);
|
|||
#define JS_ReportOutOfMemory(a)
|
||||
#define JS_COUNT_OPERATION(a,b)
|
||||
|
||||
|
||||
typedef BYTE JSPackedBool;
|
||||
|
||||
/*
|
||||
* This struct holds a bitmap representation of a class from a regexp.
|
||||
* There's a list of these referenced by the classList field in the regexp_t
|
||||
* struct below. The initial state has startIndex set to the offset in the
|
||||
* original regexp source of the beginning of the class contents. The first
|
||||
* use of the class converts the source representation into a bitmap.
|
||||
*
|
||||
*/
|
||||
typedef struct RECharSet {
|
||||
JSPackedBool converted;
|
||||
JSPackedBool sense;
|
||||
WORD length;
|
||||
union {
|
||||
BYTE *bits;
|
||||
struct {
|
||||
size_t startIndex;
|
||||
size_t length;
|
||||
} src;
|
||||
} u;
|
||||
} RECharSet;
|
||||
|
||||
#define JSMSG_MIN_TOO_BIG 47
|
||||
#define JSMSG_MAX_TOO_BIG 48
|
||||
#define JSMSG_OUT_OF_ORDER 49
|
||||
|
@ -209,7 +233,7 @@ typedef struct REBackTrackData {
|
|||
|
||||
typedef struct REGlobalData {
|
||||
void *cx;
|
||||
JSRegExp *regexp; /* the RE in execution */
|
||||
regexp_t *regexp; /* the RE in execution */
|
||||
BOOL ok; /* runtime error (out_of_memory only?) */
|
||||
size_t start; /* offset to start at */
|
||||
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
|
||||
|
@ -448,7 +472,7 @@ SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
|
|||
* of recursion.
|
||||
*/
|
||||
static jsbytecode *
|
||||
EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
|
||||
EmitREBytecode(CompilerState *state, regexp_t *re, size_t treeDepth,
|
||||
jsbytecode *pc, RENode *t)
|
||||
{
|
||||
EmitStateStackEntry *emitStateSP, *emitStateStack;
|
||||
|
@ -1896,7 +1920,7 @@ out:
|
|||
*/
|
||||
static REBackTrackData *
|
||||
PushBackTrackState(REGlobalData *gData, REOp op,
|
||||
jsbytecode *target, REMatchState *x, const WCHAR *cp,
|
||||
jsbytecode *target, match_state_t *x, const WCHAR *cp,
|
||||
size_t parenIndex, size_t parenCount)
|
||||
{
|
||||
size_t i;
|
||||
|
@ -1955,8 +1979,8 @@ PushBackTrackState(REGlobalData *gData, REOp op,
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline REMatchState *
|
||||
FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
|
||||
static inline match_state_t *
|
||||
FlatNIMatcher(REGlobalData *gData, match_state_t *x, const WCHAR *matchChars,
|
||||
size_t length)
|
||||
{
|
||||
size_t i;
|
||||
|
@ -1994,8 +2018,8 @@ FlatNIMatcher(REGlobalData *gData, REMatchState *x, const WCHAR *matchChars,
|
|||
* 9. Let y be the State (f, cap).
|
||||
* 10. Call c(y) and return its result.
|
||||
*/
|
||||
static REMatchState *
|
||||
BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
|
||||
static match_state_t *
|
||||
BackrefMatcher(REGlobalData *gData, match_state_t *x, size_t parenIndex)
|
||||
{
|
||||
size_t len, i;
|
||||
const WCHAR *parenContent;
|
||||
|
@ -2300,11 +2324,11 @@ ReallocStateStack(REGlobalData *gData)
|
|||
* true, then update the current state's cp. Always update startpc to the next
|
||||
* op.
|
||||
*/
|
||||
static inline REMatchState *
|
||||
SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
|
||||
static inline match_state_t *
|
||||
SimpleMatch(REGlobalData *gData, match_state_t *x, REOp op,
|
||||
jsbytecode **startpc, BOOL updatecp)
|
||||
{
|
||||
REMatchState *result = NULL;
|
||||
match_state_t *result = NULL;
|
||||
WCHAR matchCh;
|
||||
size_t parenIndex;
|
||||
size_t offset, length, index;
|
||||
|
@ -2508,10 +2532,10 @@ SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline REMatchState *
|
||||
ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
|
||||
static inline match_state_t *
|
||||
ExecuteREBytecode(REGlobalData *gData, match_state_t *x)
|
||||
{
|
||||
REMatchState *result = NULL;
|
||||
match_state_t *result = NULL;
|
||||
REBackTrackData *backTrackData;
|
||||
jsbytecode *nextpc, *testpc;
|
||||
REOp nextop;
|
||||
|
@ -3046,9 +3070,9 @@ good:
|
|||
return x;
|
||||
}
|
||||
|
||||
static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
|
||||
static match_state_t *MatchRegExp(REGlobalData *gData, match_state_t *x)
|
||||
{
|
||||
REMatchState *result;
|
||||
match_state_t *result;
|
||||
const WCHAR *cp = x->cp;
|
||||
const WCHAR *cp2;
|
||||
UINT j;
|
||||
|
@ -3073,11 +3097,8 @@ static REMatchState *MatchRegExp(REGlobalData *gData, REMatchState *x)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#define MIN_BACKTRACK_LIMIT 400000
|
||||
|
||||
static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *re, size_t length)
|
||||
static HRESULT InitMatch(regexp_t *re, void *cx, heap_pool_t *pool, REGlobalData *gData)
|
||||
{
|
||||
REMatchState *result;
|
||||
UINT i;
|
||||
|
||||
gData->backTrackStackSize = INITIAL_BACKTRACK;
|
||||
|
@ -3097,65 +3118,67 @@ static REMatchState *InitMatch(script_ctx_t *cx, REGlobalData *gData, JSRegExp *
|
|||
|
||||
gData->stateStackTop = 0;
|
||||
gData->cx = cx;
|
||||
gData->pool = pool;
|
||||
gData->regexp = re;
|
||||
gData->ok = TRUE;
|
||||
|
||||
result = heap_pool_alloc(gData->pool, offsetof(REMatchState, parens) + re->parenCount * sizeof(RECapture));
|
||||
if (!result)
|
||||
goto bad;
|
||||
|
||||
for (i = 0; i < re->classCount; i++) {
|
||||
if (!re->classList[i].converted &&
|
||||
!ProcessCharSet(gData, &re->classList[i])) {
|
||||
return NULL;
|
||||
return E_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return S_OK;
|
||||
|
||||
bad:
|
||||
js_ReportOutOfScriptQuota(cx);
|
||||
gData->ok = FALSE;
|
||||
return NULL;
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str, DWORD str_len,
|
||||
const WCHAR **cp, heap_pool_t *pool, REMatchState **result, DWORD *matchlen)
|
||||
HRESULT regexp_execute(regexp_t *regexp, void *cx, heap_pool_t *pool,
|
||||
const WCHAR *str, DWORD str_len, match_state_t *result)
|
||||
{
|
||||
REMatchState *x, *res;
|
||||
match_state_t *res;
|
||||
REGlobalData gData;
|
||||
heap_pool_t *mark = heap_pool_mark(pool);
|
||||
const WCHAR *str_beg = result->cp;
|
||||
HRESULT hres;
|
||||
|
||||
assert(result->cp != NULL);
|
||||
|
||||
gData.cpbegin = str;
|
||||
gData.cpend = str+str_len;
|
||||
gData.start = *cp-str;
|
||||
gData.start = result->cp-str;
|
||||
gData.skipped = 0;
|
||||
gData.pool = pool;
|
||||
|
||||
x = InitMatch(NULL, &gData, jsregexp, gData.cpend - gData.cpbegin);
|
||||
if(!x) {
|
||||
hres = InitMatch(regexp, cx, pool, &gData);
|
||||
if(FAILED(hres)) {
|
||||
WARN("InitMatch failed\n");
|
||||
return E_FAIL;
|
||||
heap_pool_clear(mark);
|
||||
return hres;
|
||||
}
|
||||
|
||||
x->cp = *cp;
|
||||
res = MatchRegExp(&gData, x);
|
||||
res = MatchRegExp(&gData, result);
|
||||
heap_pool_clear(mark);
|
||||
if(!gData.ok) {
|
||||
WARN("MatchRegExp failed\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
*result = res;
|
||||
if(!res) {
|
||||
*matchlen = 0;
|
||||
result->match_len = 0;
|
||||
return S_FALSE;
|
||||
}
|
||||
|
||||
*matchlen = (res->cp-*cp) - gData.skipped;
|
||||
*cp = res->cp;
|
||||
result->match_len = (result->cp-str_beg) - gData.skipped;
|
||||
result->paren_count = regexp->parenCount;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
void js_DestroyRegExp(JSRegExp *re)
|
||||
void regexp_destroy(regexp_t *re)
|
||||
{
|
||||
if (re->classList) {
|
||||
UINT i;
|
||||
|
@ -3169,9 +3192,10 @@ void js_DestroyRegExp(JSRegExp *re)
|
|||
heap_free(re);
|
||||
}
|
||||
|
||||
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_len, UINT flags, BOOL flat)
|
||||
regexp_t* regexp_new(void *cx, heap_pool_t *pool, const WCHAR *str,
|
||||
DWORD str_len, WORD flags, BOOL flat)
|
||||
{
|
||||
JSRegExp *re;
|
||||
regexp_t *re;
|
||||
heap_pool_t *mark;
|
||||
CompilerState state;
|
||||
size_t resize;
|
||||
|
@ -3213,7 +3237,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
|
|||
if (!ParseRegExp(&state))
|
||||
goto out;
|
||||
}
|
||||
resize = offsetof(JSRegExp, program) + state.progLength + 1;
|
||||
resize = offsetof(regexp_t, program) + state.progLength + 1;
|
||||
re = heap_alloc(resize);
|
||||
if (!re)
|
||||
goto out;
|
||||
|
@ -3223,7 +3247,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
|
|||
if (re->classCount) {
|
||||
re->classList = heap_alloc(re->classCount * sizeof(RECharSet));
|
||||
if (!re->classList) {
|
||||
js_DestroyRegExp(re);
|
||||
regexp_destroy(re);
|
||||
re = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -3234,7 +3258,7 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
|
|||
}
|
||||
endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
|
||||
if (!endPC) {
|
||||
js_DestroyRegExp(re);
|
||||
regexp_destroy(re);
|
||||
re = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
@ -3245,9 +3269,9 @@ JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str, DWORD str_
|
|||
* besides re exist here.
|
||||
*/
|
||||
if ((size_t)(endPC - re->program) != state.progLength + 1) {
|
||||
JSRegExp *tmp;
|
||||
regexp_t *tmp;
|
||||
assert((size_t)(endPC - re->program) < state.progLength + 1);
|
||||
resize = offsetof(JSRegExp, program) + (endPC - re->program);
|
||||
resize = offsetof(regexp_t, program) + (endPC - re->program);
|
||||
tmp = heap_realloc(re, resize);
|
||||
if (tmp)
|
||||
re = tmp;
|
||||
|
|
|
@ -41,50 +41,41 @@ typedef struct RECapture {
|
|||
size_t length; /* length of capture */
|
||||
} RECapture;
|
||||
|
||||
typedef struct REMatchState {
|
||||
typedef struct match_state_t {
|
||||
const WCHAR *cp;
|
||||
RECapture parens[1]; /* first of 're->parenCount' captures,
|
||||
allocated at end of this struct */
|
||||
} REMatchState;
|
||||
DWORD match_len;
|
||||
|
||||
DWORD paren_count;
|
||||
RECapture parens[1];
|
||||
} match_state_t;
|
||||
|
||||
typedef BYTE JSPackedBool;
|
||||
typedef BYTE jsbytecode;
|
||||
|
||||
/*
|
||||
* This struct holds a bitmap representation of a class from a regexp.
|
||||
* There's a list of these referenced by the classList field in the JSRegExp
|
||||
* struct below. The initial state has startIndex set to the offset in the
|
||||
* original regexp source of the beginning of the class contents. The first
|
||||
* use of the class converts the source representation into a bitmap.
|
||||
*
|
||||
*/
|
||||
typedef struct RECharSet {
|
||||
JSPackedBool converted;
|
||||
JSPackedBool sense;
|
||||
WORD length;
|
||||
union {
|
||||
BYTE *bits;
|
||||
struct {
|
||||
size_t startIndex;
|
||||
size_t length;
|
||||
} src;
|
||||
} u;
|
||||
} RECharSet;
|
||||
|
||||
typedef struct JSRegExp {
|
||||
typedef struct regexp_t {
|
||||
WORD flags; /* flags, see jsapi.h's REG_* defines */
|
||||
size_t parenCount; /* number of parenthesized submatches */
|
||||
size_t classCount; /* count [...] bitmaps */
|
||||
RECharSet *classList; /* list of [...] bitmaps */
|
||||
struct RECharSet *classList; /* list of [...] bitmaps */
|
||||
const WCHAR *source; /* locked source string, sans // */
|
||||
DWORD source_len;
|
||||
jsbytecode program[1]; /* regular expression bytecode */
|
||||
} JSRegExp;
|
||||
} regexp_t;
|
||||
|
||||
JSRegExp* js_NewRegExp(void *cx, heap_pool_t *pool, const WCHAR *str,
|
||||
DWORD str_len, UINT flags, BOOL flat) DECLSPEC_HIDDEN;
|
||||
void js_DestroyRegExp(JSRegExp *re) DECLSPEC_HIDDEN;
|
||||
HRESULT MatchRegExpNext(JSRegExp *jsregexp, const WCHAR *str,
|
||||
DWORD str_len, const WCHAR **cp, heap_pool_t *pool,
|
||||
REMatchState **result, DWORD *matchlen) DECLSPEC_HIDDEN;
|
||||
regexp_t* regexp_new(void*, heap_pool_t*, const WCHAR*, DWORD, WORD, BOOL) DECLSPEC_HIDDEN;
|
||||
void regexp_destroy(regexp_t*) DECLSPEC_HIDDEN;
|
||||
HRESULT regexp_execute(regexp_t*, void*, heap_pool_t*, const WCHAR*,
|
||||
DWORD, match_state_t*) DECLSPEC_HIDDEN;
|
||||
|
||||
static inline match_state_t* alloc_match_state(regexp_t *regexp,
|
||||
heap_pool_t *pool, const WCHAR *pos)
|
||||
{
|
||||
size_t size = offsetof(match_state_t, parens) + regexp->parenCount*sizeof(RECapture);
|
||||
match_state_t *ret;
|
||||
|
||||
ret = pool ? heap_pool_alloc(pool, size) : heap_alloc(size);
|
||||
if(!ret)
|
||||
return NULL;
|
||||
|
||||
ret->cp = pos;
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue