diff --git a/tools/winedump/dump.c b/tools/winedump/dump.c index 4e46687a355..412880c4bd6 100644 --- a/tools/winedump/dump.c +++ b/tools/winedump/dump.c @@ -207,6 +207,34 @@ const char* get_guid_str(const GUID* guid) return str; } +const char *get_unicode_str( const WCHAR *str, int len ) +{ + char *buffer; + int i = 0; + + if (len == -1) len = strlenW( str ); + buffer = dump_want_n( len * 6 + 3); + buffer[i++] = '"'; + while (len-- > 0 && *str) + { + WCHAR c = *str++; + switch (c) + { + case '\n': strcpy( buffer + i, "\\n" ); i += 2; break; + case '\r': strcpy( buffer + i, "\\r" ); i += 2; break; + case '\t': strcpy( buffer + i, "\\t" ); i += 2; break; + case '"': strcpy( buffer + i, "\\\"" ); i += 2; break; + case '\\': strcpy( buffer + i, "\\\\" ); i += 2; break; + default: + if (c >= ' ' && c <= 126) buffer[i++] = c; + else i += sprintf( buffer + i, "\\u%04x",c); + } + } + buffer[i++] = '"'; + buffer[i] = 0; + return buffer; +} + const void* PRD(unsigned long prd, unsigned long len) { return (prd + len > dump_total_len) ? NULL : (const char*)dump_base + prd; diff --git a/tools/winedump/nls.c b/tools/winedump/nls.c index 8966ceada99..e69d11c42cb 100644 --- a/tools/winedump/nls.c +++ b/tools/winedump/nls.c @@ -35,7 +35,7 @@ static const void *read_data( unsigned int *pos, unsigned int size ) return ret; } -static unsigned short casemap( const unsigned short *table, unsigned int len, unsigned short ch ) +static unsigned short mapchar( const unsigned short *table, unsigned int len, unsigned short ch ) { unsigned int off = table[ch >> 8] + ((ch >> 4) & 0x0f); if (off >= len) return 0; @@ -44,9 +44,64 @@ static unsigned short casemap( const unsigned short *table, unsigned int len, un return ch + table[off]; } -static void dump_casemap(void) +static void dump_offset_table( const unsigned short *table, unsigned int len ) { int i, ch; + + for (i = 0; i < 0x10000; i++) + { + if (!(i % 16)) printf( "\n%04x:", i ); + ch = mapchar( table, len, i ); + if (ch == i) printf( " ...." ); + else printf( " %04x", ch ); + } +} + +struct ctype +{ + WORD c1, c2, c3; +}; + +static const char *get_ctype( const struct ctype *ctype ) +{ + static char buffer[100]; + static const char *c1[] = { "up ", "lo ", "dg ", "sp ", "pt ", "cl ", "bl ", "xd ", "al " }; + static const char *c2[] = { " ", "L ", "R ", "EN", "ES", "ET", + "AN", "CS", "B ", "S ", "WS", "ON" }; + static const char *c3[] = { "ns ", "di ", "vo ", "sy ", "ka ", "hi ", "hw ", "fw ", + "id ", "ks ", "lx ", "hi ", "lo ", " ", " ", "al " }; + int i; + strcpy( buffer, "| " ); + for (i = 0; i < ARRAY_SIZE(c1); i++) + strcat( buffer, (ctype->c1 & (1 << i)) ? c1[i] : "__ " ); + strcat( buffer, "| " ); + strcat( buffer, ctype->c2 < ARRAY_SIZE(c2) ? c2[ctype->c2] : "??" ); + strcat( buffer, " | " ); + for (i = 0; i < ARRAY_SIZE(c3); i++) + strcat( buffer, (ctype->c3 & (1 << i)) ? c3[i] : "__ " ); + strcat( buffer, "|" ); + return buffer; +} + +static void dump_ctype_table( const USHORT *ptr ) +{ + const struct ctype *ctypes = (const struct ctype *)(ptr + 2); + const BYTE *types = (const BYTE *)ptr + ptr[1] + 2; + int i, len = (ptr[1] - 2) / sizeof(*ctypes); + + printf( " CTYPE1 CTYPE2 CTYPE3\n" ); + for (i = 0; i < 0x10000; i++) + { + const BYTE *b = types + ((const WORD *)types)[i >> 8]; + b = types + ((const WORD *)b)[(i >> 4) & 0x0f] + (i & 0x0f); + if (*b < len) printf( "%04x %s\n", i, get_ctype( ctypes + *b )); + else printf( "%04x ??? %02x\n", i, *b ); + } + printf( "\n" ); +} + +static void dump_casemap(void) +{ unsigned int pos = 0, upper_len, lower_len; const unsigned short *header, *upper, *lower; @@ -62,21 +117,9 @@ static void dump_casemap(void) printf( "Magic: %04x\n", header[0] ); printf( "Upper-case table:\n" ); - for (i = 0; i < 0x10000; i++) - { - if (!(i % 16)) printf( "\n%04x:", i ); - ch = casemap( upper, upper_len, i ); - if (ch == i) printf( " ...." ); - else printf( " %04x", ch ); - } + dump_offset_table( upper, upper_len ); printf( "\n\nLower-case table:\n" ); - for (i = 0; i < 0x10000; i++) - { - if (!(i % 16)) printf( "\n%04x:", i ); - ch = casemap( lower, lower_len, i ); - if (ch == i) printf( " ...." ); - else printf( " %04x", ch ); - } + dump_offset_table( lower, lower_len ); printf( "\n\n" ); } @@ -346,6 +389,325 @@ static void dump_norm(void) printf( "\n" ); } + +struct sortguid +{ + GUID id; /* sort GUID */ + DWORD flags; /* flags */ + DWORD compr; /* offset to compression table */ + DWORD except; /* exception table offset in sortkey table */ + DWORD ling_except; /* exception table offset for linguistic casing */ + DWORD casemap; /* linguistic casemap table offset */ +}; + +#define FLAG_HAS_3_BYTE_WEIGHTS 0x01 +#define FLAG_REVERSEDIACRITICS 0x10 +#define FLAG_DOUBLECOMPRESSION 0x20 +#define FLAG_INVERSECASING 0x40 + +struct language_id +{ + DWORD offset; + WCHAR name[32]; +}; + +struct compression +{ + DWORD offset; + WCHAR minchar, maxchar; + WORD len[8]; +}; + +struct comprlang +{ + struct compression compr; + WCHAR name[32]; +}; + +static const char *get_sortkey( DWORD key ) +{ + static char buffer[16]; + if (!key) return "...."; + if ((WORD)key == 0x200) + sprintf( buffer, "expand %04x", key >> 16 ); + else + sprintf( buffer, "%u.%u.%u.%u", (BYTE)(key >> 8), (BYTE)key, (BYTE)(key >> 16), (BYTE)(key >> 24) ); + return buffer; +} + +static const void *dump_expansions( const DWORD *ptr ) +{ + DWORD i, count = *ptr++; + + printf( "\nExpansions: (count=%04x)\n\n", count ); + for (i = 0; i < count; i++) + { + const WCHAR *p = (const WCHAR *)(ptr + i); + printf( " %04x: %04x %04x\n", i, p[0], p[1] ); + } + return ptr + count; +} + +static void dump_exceptions( const DWORD *sortkeys, DWORD offset ) +{ + int i, j; + const DWORD *table = sortkeys + offset; + + for (i = 0; i < 0x100; i++) + { + if (table[i] == i * 0x100) continue; + for (j = 0; j < 0x100; j++) + { + if (sortkeys[table[i] + j] == sortkeys[i * 0x100 + j]) continue; + printf( " %04x: %s\n", i * 0x100 + j, get_sortkey( sortkeys[table[i] + j] )); + } + } +} + +static const void *dump_compression( const struct compression *compr, const WCHAR *table ) +{ + int i, j, k; + const WCHAR *p = table + compr->offset; + + printf( " min=%04x max=%04x counts=%u,%u,%u,%u,%u,%u,%u,%u\n", + compr->minchar, compr->maxchar, + compr->len[0], compr->len[1], compr->len[2], compr->len[3], + compr->len[4], compr->len[5], compr->len[6], compr->len[7] ); + for (i = 0; i < 8; i++) + { + for (j = 0; j < compr->len[i]; j++) + { + printf( " " ); + for (k = 0; k < i + 2; k++) printf( " %04x", *p++ ); + p = (const WCHAR *)(((ULONG_PTR)p + 3) & ~3); + printf( " -> %s\n", get_sortkey( *(const DWORD *)p )); + p += 2; + } + } + return p; +} + +static const void *dump_multiple_weights( const DWORD *ptr ) +{ + int i, count = *ptr++; + const WCHAR *p; + + printf( "\nMultiple weights: (count=%u)\n\n", count ); + p = (const WCHAR *)ptr; + for (i = 0; i < count; i++) + { + BYTE weight = p[i]; + BYTE count = p[i] >> 8; + printf( "%u - %u\n", weight, weight + count ); + } + return ptr + (count + 1) / 2; +} + +static void dump_sort( int old_version ) +{ + const struct + { + DWORD sortkeys; + DWORD casemaps; + DWORD ctypes; + DWORD sortids; + } *header; + + const struct compression *compr; + const struct sortguid *guids; + const struct comprlang *comprlangs; + const struct language_id *language_ids = NULL; + const WORD *casemaps, *map; + const DWORD *sortkeys, *ptr; + const WCHAR *p = NULL; + int i, j, size, len; + int nb_casemaps = 0, casemap_offsets[16]; + + if (!(header = PRD( 0, sizeof(*header) ))) return; + + if (!(sortkeys = PRD( header->sortkeys, header->casemaps - header->sortkeys ))) return; + printf( "\nSort keys:\n" ); + for (i = 0; i < 0x10000; i++) + { + if (!(i % 8)) printf( "\n%04x:", i ); + printf( " %16s", get_sortkey( sortkeys[i] )); + } + printf( "\n\n" ); + + size = (header->ctypes - header->casemaps) / sizeof(*casemaps); + if (!(casemaps = PRD( header->casemaps, size * sizeof(*casemaps) ))) return; + len = 0; + if (old_version) + { + ptr = (const DWORD *)casemaps; + len = *ptr++; + language_ids = (const struct language_id *)ptr; + casemaps = (const WORD *)(language_ids + len); + } + map = casemaps; + while (size) + { + const WORD *upper = map + 2; + const WORD *lower = map + 2 + map[1]; + const WORD *end = map + map[1] + 1 + map[map[1] + 1]; + + if (map[0] != 1) break; + printf( "\nCase mapping table %u:\n", nb_casemaps ); + casemap_offsets[nb_casemaps++] = map - casemaps; + for (j = 0; j < len; j++) + { + if (language_ids[j].offset != map - casemaps) continue; + printf( "Language: %s\n", get_unicode_str( language_ids[j].name, -1 )); + break; + } + printf( "\nUpper-case table:\n" ); + dump_offset_table( upper, lower - upper ); + printf( "\n\nLower-case table:\n" ); + dump_offset_table( lower, end - lower ); + printf( "\n\n" ); + size -= (end - map); + map = end; + } + + if (!(p = PRD( header->ctypes, header->sortids - header->ctypes ))) return; + printf( "\nCTYPE table:\n\n" ); + dump_ctype_table( p ); + + printf( "\nSort tables:\n\n" ); + size = (dump_total_len - header->sortids) / sizeof(*ptr); + if (!(ptr = PRD( header->sortids, size * sizeof(*ptr) ))) return; + + if (old_version) + { + len = *ptr++; + for (i = 0; i < len; i++, ptr += 2) printf( "NLS version: %08x %08x\n", ptr[0], ptr[1] ); + len = *ptr++; + for (i = 0; i < len; i++, ptr += 2) printf( "Defined version: %08x %08x\n", ptr[0], ptr[1] ); + len = *ptr++; + printf( "\nReversed diacritics:\n\n" ); + for (i = 0; i < len; i++) + { + const WCHAR *name = (const WCHAR *)ptr; + printf( "%s\n", get_unicode_str( name, -1 )); + ptr += 16; + } + len = *ptr++; + printf( "\nDouble compression:\n\n" ); + for (i = 0; i < len; i++) + { + const WCHAR *name = (const WCHAR *)ptr; + printf( "%s\n", get_unicode_str( name, -1 )); + ptr += 16; + } + ptr = dump_expansions( ptr ); + + printf( "\nCompressions:\n" ); + size = *ptr++; + comprlangs = (const struct comprlang *)ptr; + for (i = 0; i < size; i++) + { + printf( "\n %s\n", get_unicode_str( comprlangs[i].name, -1 )); + ptr = dump_compression( &comprlangs[i].compr, (const WCHAR *)(comprlangs + size) ); + } + + ptr = dump_multiple_weights( ptr ); + + size = *ptr++; + printf( "\nJamo sort:\n\n" ); + for (i = 0; i < size; i++, ptr += 2) + { + const struct jamo { BYTE val[5], off, len; } *jamo = (const struct jamo *)ptr; + printf( "%04x: %02x %02x %02x %02x %02x off=%02x len=%02x\n", 0x1100 + i, jamo->val[0], + jamo->val[1], jamo->val[2], jamo->val[3], jamo->val[4], + jamo->off, jamo->len ); + } + + size = *ptr++; + printf( "\nJamo second chars:\n\n" ); + for (i = 0; i < size; i++, ptr += 2) + { + const struct jamo { WORD ch; BYTE val[5], len; } *jamo = (const struct jamo *)ptr; + printf( "%02x: %04x: %02x %02x %02x %02x %02x len=%02x\n", i, jamo->ch, jamo->val[0], + jamo->val[1], jamo->val[2], jamo->val[3], jamo->val[4], jamo->len ); + } + + size = *ptr++; + printf( "\nExceptions:\n" ); + language_ids = (const struct language_id *)ptr; + for (i = 0; i < size; i++) + { + printf( "\n %08x %s\n", language_ids[i].offset, get_unicode_str( language_ids[i].name, -1 )); + dump_exceptions( sortkeys, language_ids[i].offset ); + } + } + else + { + int guid_count = ptr[1]; + printf( "NLS version: %08x\n\n", ptr[0] ); + printf( "Sort GUIDs:\n\n" ); + guids = (const struct sortguid *)(ptr + 2); + for (i = 0; i < guid_count; i++) + { + for (j = 0; j < nb_casemaps; j++) if (casemap_offsets[j] == guids[i].casemap) break; + printf( " %s flags=%08x compr=%08x casemap=%d\n", get_guid_str( &guids[i].id ), + guids[i].flags, guids[i].compr, j < nb_casemaps ? j : -1 ); + } + + ptr = dump_expansions( (const DWORD *)(guids + guid_count) ); + + size = *ptr++; + printf( "\nCompressions:\n" ); + compr = (const struct compression *)ptr; + for (i = 0; i < size; i++) + { + printf( "\n" ); + for (j = 0; j < guid_count; j++) + if (guids[j].compr == i) printf( " %s\n", get_guid_str( &guids[j].id )); + ptr = dump_compression( compr + i, (const WCHAR *)(compr + size) ); + } + + ptr = dump_multiple_weights( ptr ); + + size = *ptr++; + printf( "\nJamo sort:\n\n" ); + for (i = 0; i < size; i++) + { + static const WCHAR hangul_chars[] = + { + 0xa960, 0xa961, 0xa962, 0xa963, 0xa964, 0xa965, 0xa966, 0xa967, + 0xa968, 0xa969, 0xa96a, 0xa96b, 0xa96c, 0xa96d, 0xa96e, 0xa96f, + 0xa970, 0xa971, 0xa972, 0xa973, 0xa974, 0xa975, 0xa976, 0xa977, + 0xa978, 0xa979, 0xa97a, 0xa97b, 0xa97c, + 0xd7b0, 0xd7b1, 0xd7b2, 0xd7b3, 0xd7b4, 0xd7b5, 0xd7b6, 0xd7b7, + 0xd7b8, 0xd7b9, 0xd7ba, 0xd7bb, 0xd7bc, 0xd7bd, 0xd7be, 0xd7bf, + 0xd7c0, 0xd7c1, 0xd7c2, 0xd7c3, 0xd7c4, 0xd7c5, 0xd7c6, + 0xd7cb, 0xd7cc, 0xd7cd, 0xd7ce, 0xd7cf, + 0xd7d0, 0xd7d1, 0xd7d2, 0xd7d3, 0xd7d4, 0xd7d5, 0xd7d6, 0xd7d7, + 0xd7d8, 0xd7d9, 0xd7da, 0xd7db, 0xd7dc, 0xd7dd, 0xd7de, 0xd7df, + 0xd7e0, 0xd7e1, 0xd7e2, 0xd7e3, 0xd7e4, 0xd7e5, 0xd7e6, 0xd7e7, + 0xd7e8, 0xd7e9, 0xd7ea, 0xd7eb, 0xd7ec, 0xd7ed, 0xd7ee, 0xd7ef, + 0xd7f0, 0xd7f1, 0xd7f2, 0xd7f3, 0xd7f4, 0xd7f5, 0xd7f6, 0xd7f7, + 0xd7f8, 0xd7f9, 0xd7fa, 0xd7fb + }; + const BYTE *b = (const BYTE *)(ptr + 2 * i); + WCHAR wc = i < 0x100 ? 0x1100 + i : hangul_chars[i - 0x100]; + printf( "%04x: %02x %02x %02x %02x %02x\n", wc, b[0], b[1], b[2], b[3], b[4] ); + } + + printf( "\nExceptions:\n" ); + for (i = 0; i < guid_count; i++) + { + if (!guids[i].except) continue; + printf( "\n %s\n", get_guid_str( &guids[i].id )); + dump_exceptions( sortkeys, guids[i].except ); + if (!guids[i].ling_except) continue; + printf( "\n %s LINGUISTIC_CASING\n", get_guid_str( &guids[i].id )); + dump_exceptions( sortkeys, guids[i].ling_except ); + } + } + printf( "\n" ); +} + void nls_dump(void) { const char *name = strrchr( globals.input_name, '/' ); @@ -354,6 +716,8 @@ void nls_dump(void) if (!strcasecmp( name, "l_intl.nls" )) return dump_casemap(); if (!strncasecmp( name, "c_", 2 )) return dump_codepage(); if (!strncasecmp( name, "norm", 4 )) return dump_norm(); + if (!strcasecmp( name, "sortdefault.nls" )) return dump_sort( 0 ); + if (!strncasecmp( name, "sort", 4 )) return dump_sort( 1 ); fprintf( stderr, "Unrecognized file name '%s'\n", globals.input_name ); } diff --git a/tools/winedump/winedump.h b/tools/winedump/winedump.h index c2671539009..240f31d2f2d 100644 --- a/tools/winedump/winedump.h +++ b/tools/winedump/winedump.h @@ -229,6 +229,7 @@ const char* get_time_str( unsigned long ); unsigned int strlenW( const unsigned short *str ); void dump_unicode_str( const unsigned short *str, int len ); const char* get_guid_str(const GUID* guid); +const char* get_unicode_str( const WCHAR *str, int len ); const char* get_symbol_str(const char* symname); void print_fake_dll(void); void dump_file_header(const IMAGE_FILE_HEADER *);