hhctrl.ocx: Add HTML to Unicode decoding capability to the table of contents.
This commit is contained in:
parent
b527679d17
commit
9033b14438
|
@ -50,16 +50,6 @@ static void free_content_item(ContentItem *item)
|
|||
}
|
||||
}
|
||||
|
||||
static void store_param(LPWSTR *param, const char *value, int len)
|
||||
{
|
||||
int wlen;
|
||||
|
||||
wlen = MultiByteToWideChar(CP_ACP, 0, value, len, NULL, 0);
|
||||
*param = heap_alloc((wlen+1)*sizeof(WCHAR));
|
||||
MultiByteToWideChar(CP_ACP, 0, value, len, *param, wlen);
|
||||
(*param)[wlen] = 0;
|
||||
}
|
||||
|
||||
static void parse_obj_node_param(ContentItem *item, ContentItem *hhc_root, const char *text)
|
||||
{
|
||||
const char *ptr;
|
||||
|
@ -99,11 +89,11 @@ static void parse_obj_node_param(ContentItem *item, ContentItem *hhc_root, const
|
|||
const char *local = strstr(ptr, "::")+2;
|
||||
int local_len = len-(local-ptr);
|
||||
|
||||
store_param(&item->local, local, local_len);
|
||||
item->local = decode_html(local, local_len);
|
||||
param = &merge;
|
||||
}
|
||||
|
||||
store_param(param, ptr, len);
|
||||
*param = decode_html(ptr, len);
|
||||
|
||||
if(param == &merge) {
|
||||
SetChmPath(&item->merge, hhc_root->merge.chm_file, merge);
|
||||
|
|
|
@ -50,6 +50,119 @@ static void ExpandContract(HHInfo *pHHInfo);
|
|||
|
||||
static const WCHAR szEmpty[] = {0};
|
||||
|
||||
struct html_encoded_symbol {
|
||||
const char *html_code;
|
||||
char ansi_symbol;
|
||||
};
|
||||
|
||||
/*
|
||||
* Table mapping the conversion between HTML encoded symbols and their ANSI code page equivalent.
|
||||
* Note: Add additional entries in proper alphabetical order (a binary search is used on this table).
|
||||
*/
|
||||
struct html_encoded_symbol html_encoded_symbols[] =
|
||||
{
|
||||
{"AElig", 0xC6},
|
||||
{"Aacute", 0xC1},
|
||||
{"Acirc", 0xC2},
|
||||
{"Agrave", 0xC0},
|
||||
{"Aring", 0xC5},
|
||||
{"Atilde", 0xC3},
|
||||
{"Auml", 0xC4},
|
||||
{"Ccedil", 0xC7},
|
||||
{"ETH", 0xD0},
|
||||
{"Eacute", 0xC9},
|
||||
{"Ecirc", 0xCA},
|
||||
{"Egrave", 0xC8},
|
||||
{"Euml", 0xCB},
|
||||
{"Iacute", 0xCD},
|
||||
{"Icirc", 0xCE},
|
||||
{"Igrave", 0xCC},
|
||||
{"Iuml", 0xCF},
|
||||
{"Ntilde", 0xD1},
|
||||
{"Oacute", 0xD3},
|
||||
{"Ocirc", 0xD4},
|
||||
{"Ograve", 0xD2},
|
||||
{"Oslash", 0xD8},
|
||||
{"Otilde", 0xD5},
|
||||
{"Ouml", 0xD6},
|
||||
{"THORN", 0xDE},
|
||||
{"Uacute", 0xDA},
|
||||
{"Ucirc", 0xDB},
|
||||
{"Ugrave", 0xD9},
|
||||
{"Uuml", 0xDC},
|
||||
{"Yacute", 0xDD},
|
||||
{"aacute", 0xE1},
|
||||
{"acirc", 0xE2},
|
||||
{"acute", 0xB4},
|
||||
{"aelig", 0xE6},
|
||||
{"agrave", 0xE0},
|
||||
{"amp", '&'},
|
||||
{"aring", 0xE5},
|
||||
{"atilde", 0xE3},
|
||||
{"auml", 0xE4},
|
||||
{"brvbar", 0xA6},
|
||||
{"ccedil", 0xE7},
|
||||
{"cedil", 0xB8},
|
||||
{"cent", 0xA2},
|
||||
{"copy", 0xA9},
|
||||
{"curren", 0xA4},
|
||||
{"deg", 0xB0},
|
||||
{"divide", 0xF7},
|
||||
{"eacute", 0xE9},
|
||||
{"ecirc", 0xEA},
|
||||
{"egrave", 0xE8},
|
||||
{"eth", 0xF0},
|
||||
{"euml", 0xEB},
|
||||
{"frac12", 0xBD},
|
||||
{"frac14", 0xBC},
|
||||
{"frac34", 0xBE},
|
||||
{"gt", '>'},
|
||||
{"iacute", 0xED},
|
||||
{"icirc", 0xEE},
|
||||
{"iexcl", 0xA1},
|
||||
{"igrave", 0xEC},
|
||||
{"iquest", 0xBF},
|
||||
{"iuml", 0xEF},
|
||||
{"laquo", 0xAB},
|
||||
{"lt", '<'},
|
||||
{"macr", 0xAF},
|
||||
{"micro", 0xB5},
|
||||
{"middot", 0xB7},
|
||||
{"nbsp", ' '},
|
||||
{"not", 0xAC},
|
||||
{"ntilde", 0xF1},
|
||||
{"oacute", 0xF3},
|
||||
{"ocirc", 0xF4},
|
||||
{"ograve", 0xF2},
|
||||
{"ordf", 0xAA},
|
||||
{"ordm", 0xBA},
|
||||
{"oslash", 0xF8},
|
||||
{"otilde", 0xF5},
|
||||
{"ouml", 0xF6},
|
||||
{"para", 0xB6},
|
||||
{"plusmn", 0xB1},
|
||||
{"pound", 0xA3},
|
||||
{"quot", '"'},
|
||||
{"raquo", 0xBB},
|
||||
{"reg", 0xAE},
|
||||
{"sect", 0xA7},
|
||||
{"shy", 0xAD},
|
||||
{"sup1", 0xB9},
|
||||
{"sup2", 0xB2},
|
||||
{"sup3", 0xB3},
|
||||
{"szlig", 0xDF},
|
||||
{"thorn", 0xFE},
|
||||
{"times", 0xD7},
|
||||
{"uacute", 0xFA},
|
||||
{"ucirc", 0xFB},
|
||||
{"ugrave", 0xF9},
|
||||
{"uml", 0xA8},
|
||||
{"uuml", 0xFC},
|
||||
{"yacute", 0xFD},
|
||||
{"yen", 0xA5},
|
||||
{"yuml", 0xFF}
|
||||
};
|
||||
|
||||
/* Loads a string from the resource file */
|
||||
static LPWSTR HH_LoadString(DWORD dwID)
|
||||
{
|
||||
|
@ -1654,3 +1767,92 @@ HHInfo *CreateHelpViewer(LPCWSTR filename)
|
|||
|
||||
return info;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the table of HTML entities and return the corresponding ANSI symbol.
|
||||
*/
|
||||
static char find_html_symbol(const char *entity, int entity_len)
|
||||
{
|
||||
int max = sizeof(html_encoded_symbols)/sizeof(html_encoded_symbols[0])-1;
|
||||
int min = 0, dir;
|
||||
|
||||
while(min <= max)
|
||||
{
|
||||
int pos = (min+max)/2;
|
||||
const char *encoded_symbol = html_encoded_symbols[pos].html_code;
|
||||
dir = strncmp(encoded_symbol, entity, entity_len);
|
||||
if(dir == 0 && !encoded_symbol[entity_len]) return html_encoded_symbols[pos].ansi_symbol;
|
||||
if(dir < 0)
|
||||
min = pos+1;
|
||||
else
|
||||
max = pos-1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a string containing HTML encoded characters into a unicode string.
|
||||
*/
|
||||
WCHAR *decode_html(const char *html_fragment, int html_fragment_len)
|
||||
{
|
||||
const char *h = html_fragment;
|
||||
char *amp, *sem, symbol, *tmp;
|
||||
int len, tmp_len = 0;
|
||||
WCHAR *unicode_text;
|
||||
|
||||
tmp = heap_alloc(html_fragment_len+1);
|
||||
while(1)
|
||||
{
|
||||
symbol = 0;
|
||||
amp = strchr(h, '&');
|
||||
if(!amp) break;
|
||||
len = amp-h;
|
||||
/* Copy the characters prior to the HTML encoded character */
|
||||
memcpy(&tmp[tmp_len], h, len);
|
||||
tmp_len += len;
|
||||
amp++; /* skip ampersand */
|
||||
sem = strchr(amp, ';');
|
||||
/* Require a semicolon after the ampersand */
|
||||
if(!sem)
|
||||
{
|
||||
h = amp;
|
||||
tmp[tmp_len++] = '&';
|
||||
continue;
|
||||
}
|
||||
/* Find the symbol either by using the ANSI character number (prefixed by the pound symbol)
|
||||
* or by searching the HTML entity table */
|
||||
len = sem-amp;
|
||||
if(amp[0] == '#')
|
||||
{
|
||||
char *endnum = NULL;
|
||||
int tmp;
|
||||
|
||||
tmp = (char) strtol(amp, &endnum, 10);
|
||||
if(endnum == sem)
|
||||
symbol = tmp;
|
||||
}
|
||||
else
|
||||
symbol = find_html_symbol(amp, len);
|
||||
if(!symbol)
|
||||
{
|
||||
FIXME("Failed to translate HTML encoded character '&%.*s;'.\n", len, amp);
|
||||
h = amp;
|
||||
tmp[tmp_len++] = '&';
|
||||
continue;
|
||||
}
|
||||
/* Insert the new symbol */
|
||||
h = sem+1;
|
||||
tmp[tmp_len++] = symbol;
|
||||
}
|
||||
/* Convert any remaining characters */
|
||||
len = html_fragment_len-(h-html_fragment);
|
||||
memcpy(&tmp[tmp_len], h, len);
|
||||
tmp_len += len;
|
||||
tmp[tmp_len++] = 0; /* NULL-terminate the string */
|
||||
|
||||
len = MultiByteToWideChar(CP_ACP, 0, tmp, tmp_len, NULL, 0);
|
||||
unicode_text = heap_alloc(len*sizeof(WCHAR));
|
||||
MultiByteToWideChar(CP_ACP, 0, tmp, tmp_len, unicode_text, len);
|
||||
heap_free(tmp);
|
||||
return unicode_text;
|
||||
}
|
||||
|
|
|
@ -193,6 +193,8 @@ void ReleaseSearch(HHInfo *info) DECLSPEC_HIDDEN;
|
|||
|
||||
LPCWSTR skip_schema(LPCWSTR url) DECLSPEC_HIDDEN;
|
||||
|
||||
WCHAR *decode_html(const char *html_fragment, int html_fragment_len);
|
||||
|
||||
/* memory allocation functions */
|
||||
|
||||
static inline void * __WINE_ALLOC_SIZE(1) heap_alloc(size_t len)
|
||||
|
|
Loading…
Reference in New Issue