riched20: Fix URL auto-detection to handle 'neutral' characters.

Signed-off-by: Huw Davies <huw@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
2016-06-01 12:35:37 +01:00 · 2016-06-01 12:35:37 +01:00 · ea1e44e4f6
parent 0bdf9e5817
commit ea1e44e4f6
2 changed files with 138 additions and 67 deletions
--- a/dlls/riched20/editor.c
+++ b/dlls/riched20/editor.c
@ -5005,10 +5005,24 @@ LRESULT WINAPI REExtendedRegisterClass(void)
  return result;
 }

-static BOOL isurlspecial(WCHAR c)
+static int wchar_comp( const void *key, const void *elem )
 {
-  static const WCHAR special_chars[] = {'.','/','%','@','*','|','\\','+','#',0};
-  return strchrW( special_chars, c ) != NULL;
+    return *(const WCHAR *)key - *(const WCHAR *)elem;
+}
+
+/* neutral characters end the url if the next non-neutral character is a space character,
+   otherwise they are included in the url. */
+static BOOL isurlneutral( WCHAR c )
+{
+    /* NB this list is sorted */
+    static const WCHAR neutral_chars[] = {'!','\"','\'','(',')',',','-','.',':',';','<','>','?','[',']','{','}'};
+
+    /* Some shortcuts */
+    if (isalnum( c )) return FALSE;
+    if (c > neutral_chars[sizeof(neutral_chars) / sizeof(neutral_chars[0]) - 1]) return FALSE;
+
+    return !!bsearch( &c, neutral_chars, sizeof(neutral_chars) / sizeof(neutral_chars[0]),
+                      sizeof(c), wchar_comp );
 }

 /**
@ -5024,87 +5038,67 @@ static BOOL ME_FindNextURLCandidate(ME_TextEditor *editor,
                                    ME_Cursor *candidate_min,
                                    ME_Cursor *candidate_max)
 {
-  ME_Cursor cursor = *start;
-  BOOL foundColon = FALSE;
+  ME_Cursor cursor = *start, neutral_end;
  BOOL candidateStarted = FALSE;
-  WCHAR lastAcceptedChar = '\0';
+  WCHAR c;

  while (nChars > 0)
  {
-    WCHAR *strStart = get_text( &cursor.pRun->member.run, 0 );
-    WCHAR *str = strStart + cursor.nOffset;
-    int nLen = cursor.pRun->member.run.len - cursor.nOffset;
-    nChars -= nLen;
+    WCHAR *str = get_text( &cursor.pRun->member.run, 0 );
+    int run_len = cursor.pRun->member.run.len;

-    if (~cursor.pRun->member.run.nFlags & MERF_ENDPARA)
+    nChars -= run_len - cursor.nOffset;
+
+    /* Find start of candidate */
+    if (!candidateStarted)
    {
-      /* Find start of candidate */
-      if (!candidateStarted)
+      while (cursor.nOffset < run_len)
      {
-        while (nLen)
+        c = str[cursor.nOffset];
+        if (!isspaceW( c ) && !isurlneutral( c ))
        {
-          nLen--;
-          if (isalnumW(*str) || isurlspecial(*str))
-          {
-            cursor.nOffset = str - strStart;
-            *candidate_min = cursor;
-            candidateStarted = TRUE;
-            lastAcceptedChar = *str++;
-            break;
-          }
-          str++;
+          *candidate_min = cursor;
+          candidateStarted = TRUE;
+          neutral_end.pPara = NULL;
+          cursor.nOffset++;
+          break;
        }
-      }
-
-      /* Find end of candidate */
-      if (candidateStarted) {
-        while (nLen)
-        {
-          nLen--;
-          if (*str == ':' && !foundColon) {
-            foundColon = TRUE;
-          } else if (!isalnumW(*str) && !isurlspecial(*str)) {
-            cursor.nOffset = str - strStart;
-            if (lastAcceptedChar == ':')
-              ME_MoveCursorChars(editor, &cursor, -1);
-            *candidate_max = cursor;
-            return TRUE;
-          }
-          lastAcceptedChar = *str++;
-        }
-      }
-    } else {
-      /* End of paragraph: skip it if before candidate span, or terminates
-         current active span */
-      if (candidateStarted) {
-        if (lastAcceptedChar == ':')
-          ME_MoveCursorChars(editor, &cursor, -1);
-        *candidate_max = cursor;
-        return TRUE;
+        cursor.nOffset++;
      }
    }

-    /* Reaching this point means no span was found, so get next span */
-    if (!ME_NextRun(&cursor.pPara, &cursor.pRun)) {
-      if (candidateStarted) {
-        /* There are no further runs, so take end of text as end of candidate */
-        cursor.nOffset = str - strStart;
-        if (lastAcceptedChar == ':')
-          ME_MoveCursorChars(editor, &cursor, -1);
-        *candidate_max = cursor;
-        return TRUE;
+    /* Find end of candidate */
+    if (candidateStarted)
+    {
+      while (cursor.nOffset < run_len)
+      {
+        c = str[cursor.nOffset];
+        if (isspaceW( c ))
+          goto done;
+        else if (isurlneutral( c ))
+        {
+          if (!neutral_end.pPara)
+            neutral_end = cursor;
+        }
+        else
+          neutral_end.pPara = NULL;
+
+        cursor.nOffset++;
      }
-      *candidate_max = *candidate_min = cursor;
-      return FALSE;
    }
+
    cursor.nOffset = 0;
+    if (!ME_NextRun(&cursor.pPara, &cursor.pRun))
+      goto done;
  }

-  if (candidateStarted) {
-    /* There are no further runs, so take end of text as end of candidate */
-    if (lastAcceptedChar == ':')
-      ME_MoveCursorChars(editor, &cursor, -1);
-    *candidate_max = cursor;
+done:
+  if (candidateStarted)
+  {
+    if (neutral_end.pPara)
+      *candidate_max = neutral_end;
+    else
+      *candidate_max = cursor;
    return TRUE;
  }
  *candidate_max = *candidate_min = cursor;
--- a/dlls/riched20/tests/editor.c
+++ b/dlls/riched20/tests/editor.c
@ -1781,6 +1781,7 @@ static void test_EM_AUTOURLDETECT(void)
    "This is some text with #X on it",
    "This is some text with @X on it",
    "This is some text with \\X on it",
+    "This is some text with _X on it",
  };
  /* All of these cause the URL detection to be extended by one more byte,
     thus demonstrating that the tested character is considered as part
@ -1794,6 +1795,33 @@ static void test_EM_AUTOURLDETECT(void)
    "This is some text with X# on it",
    "This is some text with X@ on it",
    "This is some text with X\\ on it",
+    "This is some text with X_ on it",
+  };
+  /* These delims act as neutral breaks.  Whether the url is ended
+     or not depends on the next non-neutral character.  We'll test
+     with Y unchanged, in which case the url should include the
+     deliminator and the Y.  We'll also test with the Y changed
+     to a space, in which case the url stops before the
+     deliminator. */
+  const char * templates_neutral_delim[] = {
+    "This is some text with X-Y on it",
+    "This is some text with X--Y on it",
+    "This is some text with X!Y on it",
+    "This is some text with X[Y on it",
+    "This is some text with X]Y on it",
+    "This is some text with X{Y on it",
+    "This is some text with X}Y on it",
+    "This is some text with X(Y on it",
+    "This is some text with X)Y on it",
+    "This is some text with X\"Y on it",
+    "This is some text with X;Y on it",
+    "This is some text with X:Y on it",
+    "This is some text with X'Y on it",
+    "This is some text with X?Y on it",
+    "This is some text with X<Y on it",
+    "This is some text with X>Y on it",
+    "This is some text with X.Y on it",
+    "This is some text with X,Y on it",
  };
  char buffer[1024];

@ -1974,6 +2002,55 @@ static void test_EM_AUTOURLDETECT(void)
      }
    }

+    for (j = 0; j < sizeof(templates_neutral_delim) / sizeof(const char *); j++) {
+      char * at_pos, * end_pos;
+      int at_offset;
+      int end_offset;
+
+      if (!urls[i].is_url) continue;
+
+      at_pos = strchr(templates_neutral_delim[j], 'X');
+      at_offset = at_pos - templates_neutral_delim[j];
+      memcpy(buffer, templates_neutral_delim[j], at_offset);
+      buffer[at_offset] = '\0';
+      strcat(buffer, urls[i].text);
+      strcat(buffer, templates_neutral_delim[j] + at_offset + 1);
+
+      end_pos = strchr(buffer, 'Y');
+      end_offset = end_pos - buffer;
+
+      SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
+      SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
+
+      /* This assumes no templates start with the URL itself, and that they
+         have at least two characters before the URL text */
+      ok(!check_CFE_LINK_selection(hwndRichEdit, 0, 1),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", 0, 1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -2, at_offset -1),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -2, at_offset -1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, at_offset -1, at_offset),
+         "CFE_LINK incorrectly set in (%d-%d), text: %s\n", at_offset -1, at_offset, buffer);
+
+      ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
+      ok(check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
+         "CFE_LINK not set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
+      ok(check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
+
+      *end_pos = ' ';
+
+      SendMessageA(hwndRichEdit, EM_AUTOURLDETECT, TRUE, 0);
+      SendMessageA(hwndRichEdit, WM_SETTEXT, 0, (LPARAM)buffer);
+
+      ok(check_CFE_LINK_selection(hwndRichEdit, at_offset, at_offset +1),
+         "CFE_LINK not set in (%d-%d), text: %s\n", at_offset, at_offset +1, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset -1, end_offset),
+         "CFE_LINK set in (%d-%d), text: %s\n", end_offset -1, end_offset, buffer);
+      ok(!check_CFE_LINK_selection(hwndRichEdit, end_offset, end_offset +1),
+         "CFE_LINK set in (%d-%d), text: %s\n", end_offset, end_offset +1, buffer);
+    }
+
    DestroyWindow(hwndRichEdit);
    hwndRichEdit = NULL;
  }