riched20: Fix URL auto-detection to handle 'neutral' characters.

Signed-off-by: Huw Davies <huw@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Huw Davies 2016-06-01 12:35:37 +01:00 committed by Alexandre Julliard
parent 0bdf9e5817
commit ea1e44e4f6
2 changed files with 138 additions and 67 deletions

View File

@ -5005,10 +5005,24 @@ LRESULT WINAPI REExtendedRegisterClass(void)
return result;
}
static BOOL isurlspecial(WCHAR c)
static int wchar_comp( const void *key, const void *elem )
{
static const WCHAR special_chars[] = {'.','/','%','@','*','|','\\','+','#',0};
return strchrW( special_chars, c ) != NULL;
return *(const WCHAR *)key - *(const WCHAR *)elem;
}
/* neutral characters end the url if the next non-neutral character is a space character,
otherwise they are included in the url. */
static BOOL isurlneutral( WCHAR c )
{
/* NB this list is sorted */
static const WCHAR neutral_chars[] = {'!','\"','\'','(',')',',','-','.',':',';','<','>','?','[',']','{','}'};
/* Some shortcuts */
if (isalnum( c )) return FALSE;
if (c > neutral_chars[sizeof(neutral_chars) / sizeof(neutral_chars[0]) - 1]) return FALSE;
return !!bsearch( &c, neutral_chars, sizeof(neutral_chars) / sizeof(neutral_chars[0]),
sizeof(c), wchar_comp );
}
/**
@ -5024,87 +5038,67 @@ static BOOL ME_FindNextURLCandidate(ME_TextEditor *editor,
ME_Cursor *candidate_min,
ME_Cursor *candidate_max)
{
ME_Cursor cursor = *start;
BOOL foundColon = FALSE;
ME_Cursor cursor = *start, neutral_end;
BOOL candidateStarted = FALSE;
WCHAR lastAcceptedChar = '\0';
WCHAR c;
while (nChars > 0)
{
WCHAR *strStart = get_text( &cursor.pRun->member.run, 0 );
WCHAR *str = strStart + cursor.nOffset;
int nLen = cursor.pRun->member.run.len - cursor.nOffset;
nChars -= nLen;
WCHAR *str = get_text( &cursor.pRun->member.run, 0 );
int run_len = cursor.pRun->member.run.len;
if (~cursor.pRun->member.run.nFlags & MERF_ENDPARA)
nChars -= run_len - cursor.nOffset;
/* Find start of candidate */
if (!candidateStarted)
{
/* Find start of candidate */
if (!candidateStarted)
while (cursor.nOffset < run_len)
{
while (nLen)
c = str[cursor.nOffset];
if (!isspaceW( c ) && !isurlneutral( c ))
{
nLen--;
if (isalnumW(*str) || isurlspecial(*str))
{
cursor.nOffset = str - strStart;
*candidate_min = cursor;
candidateStarted = TRUE;
lastAcceptedChar = *str++;
break;
}
str++;
*candidate_min = cursor;
candidateStarted = TRUE;
neutral_end.pPara = NULL;
cursor.nOffset++;
break;
}
}
/* Find end of candidate */
if (candidateStarted) {
while (nLen)
{
nLen--;
if (*str == ':' && !foundColon) {
foundColon = TRUE;
} else if (!isalnumW(*str) && !isurlspecial(*str)) {
cursor.nOffset = str - strStart;
if (lastAcceptedChar == ':')
ME_MoveCursorChars(editor, &cursor, -1);
*candidate_max = cursor;
return TRUE;
}
lastAcceptedChar = *str++;
}
}
} else {
/* End of paragraph: skip it if before candidate span, or terminates
current active span */
if (candidateStarted) {
if (lastAcceptedChar == ':')
ME_MoveCursorChars(editor, &cursor, -1);
*candidate_max = cursor;
return TRUE;
cursor.nOffset++;
}
}
/* Reaching this point means no span was found, so get next span */
if (!ME_NextRun(&cursor.pPara, &cursor.pRun)) {
if (candidateStarted) {
/* There are no further runs, so take end of text as end of candidate */
cursor.nOffset = str - strStart;
if (lastAcceptedChar == ':')
ME_MoveCursorChars(editor, &cursor, -1);
*candidate_max = cursor;
return TRUE;
/* Find end of candidate */
if (candidateStarted)
{
while (cursor.nOffset < run_len)
{
c = str[cursor.nOffset];
if (isspaceW( c ))
goto done;
else if (isurlneutral( c ))
{
if (!neutral_end.pPara)
neutral_end = cursor;
}
else
neutral_end.pPara = NULL;
cursor.nOffset++;
}
*candidate_max = *candidate_min = cursor;
return FALSE;
}
cursor.nOffset = 0;
if (!ME_NextRun(&cursor.pPara, &cursor.pRun))
goto done;
}
if (candidateStarted) {
/* There are no further runs, so take end of text as end of candidate */
if (lastAcceptedChar == ':')
ME_MoveCursorChars(editor, &cursor, -1);
*candidate_max = cursor;
done:
if (candidateStarted)
{
if (neutral_end.pPara)
*candidate_max = neutral_end;
else
*candidate_max = cursor;
return TRUE;
}
*candidate_max = *candidate_min = cursor;

View File

@ -1781,6 +1781,7 @@ static void test_EM_AUTOURLDETECT(void)
"This is some text with #X on it",
"This is some text with @X on it",
"This is some text with \\X on it",
"This is some text with _X on it",
};
/* All of these cause the URL detection to be extended by one more byte,
thus demonstrating that the tested character is considered as part
@ -1794,6 +1795,33 @@ static void test_EM_AUTOURLDETECT(void)
"This is some text with X# on it",
"This is some text with X@ on it",
"This is some text with X\\ on it",
"This is some text with X_ on it",
};
/* These delims act as neutral breaks. Whether the url is ended
or not depends on the next non-neutral character. We'll test
with Y unchanged, in which case the url should include the
deliminator and the Y. We'll also test with the Y changed
to a space, in which case the url stops before the
deliminator. */
const char * templates_neutral_delim[] = {
"This is some text with X-Y on it",
"This is some text with X--Y on it",
"This is some text with X!Y on it",
"This is some text with X[Y on it",
"This is some text with X]Y on it",
"This is some text with X{Y on it",
"This is some text with X}Y on it",
"This is some text with X(Y on it",
"This is some text with X)Y on it",
"This is some text with X\"Y on it",
"This is some text with X;Y on it",
"This is some text with X:Y on it",
"This is some text with X'Y on it",
"This is some text with X?Y on it",
"This is some text with X<Y on it",
"This is some text with X>Y on it",
"This is some text with X.Y on it",
"This is some text with X,Y on it",
};
char buffer[1024];
@ -1974,6 +2002,55 @@ static void test_EM_AUTOURLDETECT(void)
}
}
for (j = 0; j < sizeof(templates_neutral_delim) / sizeof(const char *); j++) {
char * at_pos, * end_pos;
int at_offset;
int end_offset;
if (!urls[i].is_url) continue;
at_pos = strchr(templates_neutral_delim[j], 'X');
at_offset = at_pos - templates_neutral_delim[j];
memcpy(buffer, templates_neutral_delim[j], at_offset);
buffer[at_offset] = '\0';
strcat(buffer, urls[i].text);
strcat(buffer, templates_neutral_delim[j] + at_offset + 1);
end_pos = strchr(buffer, 'Y');
end_offset = end_pos - buffer;
SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
/* This assumes no templates start with the URL itself, and that they
have at least two characters before the URL text */
ok(!check_CFE_LINK_selection(hwndRichEdit, 0, 1),
"CFE_LINK incorrectly set in (%d-%d), text: %s\n", 0, 1, buffer);
ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -2, at_offset -1),
"CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -2, at_offset -1, buffer);
ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -1, at_offset),
"CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -1, at_offset, buffer);
ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
"CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
ok(check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
"CFE_LINK not set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
ok(check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
"CFE_LINK not set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
*end_pos = ' ';
SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
"CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
"CFE_LINK set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
"CFE_LINK set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
}
DestroyWindow(hwndRichEdit);
hwndRichEdit = NULL;
}