notepad: Improve encoding detection when opening files.
This commit is contained in:
parent
42729bc1c1
commit
8b6b7b2c39
|
@ -26,6 +26,7 @@
|
|||
#include <windows.h>
|
||||
#include <commdlg.h>
|
||||
#include <shlwapi.h>
|
||||
#include <winternl.h>
|
||||
|
||||
#include "main.h"
|
||||
#include "dialog.h"
|
||||
|
@ -37,6 +38,13 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
|
|||
|
||||
static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
|
||||
|
||||
/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
|
||||
static inline void byteswap_wide_string(LPWSTR str, UINT num)
|
||||
{
|
||||
UINT i;
|
||||
for (i = 0; i < num; i++) str[i] = RtlUshortByteSwap(str[i]);
|
||||
}
|
||||
|
||||
VOID ShowLastError(void)
|
||||
{
|
||||
DWORD error = GetLastError();
|
||||
|
@ -195,6 +203,43 @@ BOOL DoCloseFile(void)
|
|||
return(TRUE);
|
||||
}
|
||||
|
||||
static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
|
||||
{
|
||||
static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
|
||||
if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
|
||||
return ENCODING_UTF8;
|
||||
else
|
||||
{
|
||||
int flags = IS_TEXT_UNICODE_SIGNATURE |
|
||||
IS_TEXT_UNICODE_REVERSE_SIGNATURE |
|
||||
IS_TEXT_UNICODE_ODD_LENGTH;
|
||||
IsTextUnicode(buffer, size, &flags);
|
||||
if (flags & IS_TEXT_UNICODE_SIGNATURE)
|
||||
return ENCODING_UTF16LE;
|
||||
else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
|
||||
return ENCODING_UTF16BE;
|
||||
else
|
||||
return ENCODING_ANSI;
|
||||
}
|
||||
}
|
||||
|
||||
/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
|
||||
* lpTextInUtf8 should be NUL-terminated and not include the BOM.
|
||||
*
|
||||
* Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
|
||||
*/
|
||||
static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
|
||||
{
|
||||
BOOL ret;
|
||||
int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
|
||||
LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
|
||||
if (!textW)
|
||||
return FALSE;
|
||||
MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
|
||||
ret = SetWindowTextW(hwnd, textW);
|
||||
HeapFree(GetProcessHeap(), 0, textW);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void DoOpenFile(LPCWSTR szFileName)
|
||||
{
|
||||
|
@ -203,6 +248,8 @@ void DoOpenFile(LPCWSTR szFileName)
|
|||
LPSTR pTemp;
|
||||
DWORD size;
|
||||
DWORD dwNumRead;
|
||||
ENCODING enc;
|
||||
BOOL succeeded;
|
||||
WCHAR log[5];
|
||||
|
||||
/* Close any files and prompt to save changes */
|
||||
|
@ -224,9 +271,9 @@ void DoOpenFile(LPCWSTR szFileName)
|
|||
ShowLastError();
|
||||
return;
|
||||
}
|
||||
size++;
|
||||
|
||||
pTemp = HeapAlloc(GetProcessHeap(), 0, size);
|
||||
/* Extra memory for (WCHAR)'\0'-termination. */
|
||||
pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
|
||||
if (!pTemp)
|
||||
{
|
||||
CloseHandle(hFile);
|
||||
|
@ -243,12 +290,48 @@ void DoOpenFile(LPCWSTR szFileName)
|
|||
}
|
||||
|
||||
CloseHandle(hFile);
|
||||
pTemp[dwNumRead] = 0;
|
||||
|
||||
if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
|
||||
SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
|
||||
else
|
||||
SetWindowTextA(Globals.hEdit, pTemp);
|
||||
size = dwNumRead;
|
||||
pTemp[size] = 0; /* make sure it's (char)'\0'-terminated */
|
||||
pTemp[size+1] = 0; /* make sure it's (WCHAR)'\0'-terminated */
|
||||
|
||||
enc = detect_encoding_of_buffer(pTemp, size);
|
||||
|
||||
/* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
|
||||
* check if they succeed.
|
||||
*/
|
||||
switch (enc)
|
||||
{
|
||||
case ENCODING_UTF16BE:
|
||||
byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
|
||||
/* fall through */
|
||||
|
||||
case ENCODING_UTF16LE:
|
||||
if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
|
||||
succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
|
||||
else
|
||||
succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
|
||||
break;
|
||||
|
||||
case ENCODING_UTF8:
|
||||
if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
|
||||
(BYTE)pTemp[2] == 0xbf)
|
||||
succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
|
||||
else
|
||||
succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
|
||||
break;
|
||||
|
||||
default:
|
||||
succeeded = SetWindowTextA(Globals.hEdit, pTemp);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!succeeded)
|
||||
{
|
||||
ShowLastError();
|
||||
HeapFree(GetProcessHeap(), 0, pTemp);
|
||||
return;
|
||||
}
|
||||
|
||||
HeapFree(GetProcessHeap(), 0, pTemp);
|
||||
|
||||
|
|
|
@ -25,6 +25,14 @@
|
|||
|
||||
#define MAX_STRING_LEN 255
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ENCODING_ANSI,
|
||||
ENCODING_UTF16LE,
|
||||
ENCODING_UTF16BE,
|
||||
ENCODING_UTF8
|
||||
} ENCODING;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
HANDLE hInstance;
|
||||
|
|
Loading…
Reference in New Issue