diff --git a/ChangeLog b/ChangeLog index 232561387..54693a87a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +2005-11-29 Chia-I Wu + + Major update to distinguish between unsorted and overlapped segments + for cmap format 4. For overlapped but sorted segments, which is + previously considered unsorted, we still use binary search. + + * src/sfnt/ttcmap.h (struct TT_CMapRec_): Replace `unsorted' by + `flags'. + (TT_CMAP_FLAG_UNSORTED, TT_CMAP_FLAG_OVERLAPPED): New macros. + + * src/sfnt/ttcmap.c (OPT_CMAP4): Removed as it is always defined. + (struct TT_CMap4Rec_): Remove `old_charcode' and `table_length'. + (tt_cmap4_reset): Removed. + (tt_cmap4_init): Updated accordingly. + (tt_cmap4_next): Updated accordingly. + Take care of overlapped segments. + (tt_cmap4_validate): Make sure the subtable is large enough. + Do not check glyph_ids because some fonts set the length wrongly. + Also, when all segments have offset 0, glyph_ids is always invalid. It + does not cause any problem so far only because the check misses + equality. + Distinguish between unsorted and overlapped segments. + (tt_cmap4_char_map_linear, tt_cmap4_char_map_binary): New functions to + do "charcode => glyph index" by linear/binary search. + (tt_cmap4_char_index, tt_cmap4_char_next): Use + tt_cmap4_char_map_linear and tt_cmap4_char_map_binary. + (tt_face_build_cmaps): Treat the return value of validator as flags + for cmap. + 2005-11-29 Chia-I Wu * src/sfnt/ttcmap.c (struct TT_CMap12Rec_, tt_cmap12_init, diff --git a/src/sfnt/ttcmap.c b/src/sfnt/ttcmap.c index 411008f5d..2ff6f945e 100644 --- a/src/sfnt/ttcmap.c +++ b/src/sfnt/ttcmap.c @@ -625,18 +625,12 @@ #ifdef TT_CONFIG_CMAP_FORMAT_4 -#define OPT_CMAP4 - -#ifdef OPT_CMAP4 - typedef struct TT_CMap4Rec_ { TT_CMapRec cmap; - FT_UInt32 old_charcode; /* old charcode */ FT_UInt32 cur_charcode; /* current charcode */ FT_UInt cur_gindex; /* current glyph index */ - FT_UInt table_length; FT_UInt num_ranges; FT_UInt cur_range; FT_UInt cur_start; @@ -656,14 +650,9 @@ cmap->cmap.data = table; - p = table + 2; - cmap->table_length = FT_PEEK_USHORT( p ); - p = table + 6; cmap->num_ranges = FT_PEEK_USHORT( p ) >> 1; - cmap->cur_range = cmap->num_ranges; - cmap->old_charcode = 0xFFFFFFFFUL; - cmap->cur_charcode = 0; + cmap->cur_charcode = 0xFFFFFFFFUL; cmap->cur_gindex = 0; return SFNT_Err_Ok; @@ -707,21 +696,27 @@ range_index++; } - cmap->old_charcode = 0xFFFFFFFFUL; - cmap->cur_charcode = 0; - cmap->cur_gindex = 0; - cmap->cur_range = num_ranges; return -1; } + /* find the index of the charcode next to cmap->cur_charcode; */ + /* caller should call tt_cmap4_set_range with proper range */ + /* before calling this function */ + /* */ static void tt_cmap4_next( TT_CMap4 cmap ) { - FT_UInt charcode = cmap->cur_charcode + 1; + FT_UInt charcode; + + if ( cmap->cur_charcode >= 0xFFFFUL ) + goto Fail; - cmap->old_charcode = cmap->cur_charcode; + charcode = cmap->cur_charcode + 1; + + if ( charcode < cmap->cur_start ) + charcode = cmap->cur_start; for ( ;; ) { @@ -775,27 +770,16 @@ if ( tt_cmap4_set_range( cmap, cmap->cur_range + 1 ) < 0 ) break; - charcode = cmap->cur_start; + if ( charcode < cmap->cur_start ) + charcode = cmap->cur_start; } + + Fail: + cmap->cur_charcode = 0xFFFFFFFFUL; + cmap->cur_gindex = 0; } - static void - tt_cmap4_reset( TT_CMap4 cmap, - FT_UInt code, - FT_UInt range_index ) - { - if ( tt_cmap4_set_range( cmap, range_index ) >= 0 ) - { - cmap->cur_charcode = code; - tt_cmap4_next( cmap ); - } - } - -#endif /* OPT_CMAP4 */ - - - FT_CALLBACK_DEF( FT_Error ) tt_cmap4_validate( FT_Byte* table, FT_Validator valid ) @@ -807,11 +791,11 @@ FT_Error error = SFNT_Err_Ok; - /* in certain fonts, the `length' field is invalid and goes */ - /* out of bound. We try to correct this here... */ if ( length < 16 ) FT_INVALID_TOO_SHORT; + /* in certain fonts, the `length' field is invalid and goes */ + /* out of bound. We try to correct this here... */ if ( table + length > valid->limit ) { if ( valid->level >= FT_VALIDATE_TIGHT ) @@ -832,6 +816,9 @@ num_segs /= 2; + if ( length < 16 + num_segs * 2 * 4 ) + FT_INVALID_TOO_SHORT; + /* check the search parameters - even though we never use them */ /* */ if ( valid->level >= FT_VALIDATE_PARANOID ) @@ -863,9 +850,6 @@ offsets = deltas + num_segs * 2; glyph_ids = offsets + num_segs * 2; - if ( glyph_ids > table + length ) - FT_INVALID_TOO_SHORT; - /* check last segment, its end count must be FFFF */ if ( valid->level >= FT_VALIDATE_PARANOID ) { @@ -874,10 +858,9 @@ FT_INVALID_DATA; } - /* check that segments are sorted in increasing order and do not */ - /* overlap; check also the offsets */ { - FT_UInt start, end, last = 0, offset, n; + FT_UInt start, end, offset, n; + FT_UInt last_start = 0, last_end = 0; FT_Int delta; @@ -899,12 +882,20 @@ /* unfortunately, some popular Asian fonts present overlapping */ /* ranges in their charmaps */ /* */ - if ( n > 0 && start <= last ) + if ( start <= last_end && n > 0 ) { if ( valid->level >= FT_VALIDATE_TIGHT ) FT_INVALID_DATA; else - error = SFNT_Err_Invalid_CharMap_Format; + { + /* allow overlapping segments, provided their start points */ + /* and end points, respectively, are in ascending order. */ + /* */ + if ( last_start > start || last_end > end ) + error |= TT_CMAP_FLAG_UNSORTED; + else + error |= TT_CMAP_FLAG_OVERLAPPED; + } } if ( offset && offset != 0xFFFFU ) @@ -955,7 +946,8 @@ FT_INVALID_DATA; } - last = end; + last_start = start; + last_end = end; } } @@ -963,120 +955,301 @@ } + static FT_UInt + tt_cmap4_char_map_linear( TT_CMap cmap, + FT_UInt* pcharcode, + FT_Bool next ) + { + FT_UInt num_segs2, start, end, offset; + FT_Int delta; + FT_UInt i, num_segs; + FT_UInt32 charcode = *pcharcode; + FT_UInt gindex = 0; + FT_Byte* p; + + + p = cmap->data + 6; + num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); + + num_segs = num_segs2 >> 1; + + if ( !num_segs ) + return 0; + + if ( next ) + charcode++; + + /* linear search */ + for ( ; charcode <= 0xFFFFU; charcode++ ) + { + FT_Byte* q; + + + p = cmap->data + 14; /* ends table */ + q = cmap->data + 16 + num_segs2; /* starts table */ + + for ( i = 0; i < num_segs; i++ ) + { + end = TT_NEXT_USHORT( p ); + start = TT_NEXT_USHORT( q ); + + if ( charcode >= start && charcode <= end ) + { + p = q - 2 + num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + + if ( offset == 0xFFFFU ) + continue; + + if ( offset ) + { + p += offset + ( charcode - start ) * 2; + gindex = TT_PEEK_USHORT( p ); + if ( gindex != 0 ) + gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU; + } + else + gindex = (FT_UInt)( charcode + delta ) & 0xFFFFU; + + break; + } + } + + if ( !next || gindex ) + break; + } + + if ( next && gindex ) + *pcharcode = charcode; + + return gindex; + } + + + static FT_UInt + tt_cmap4_char_map_binary( TT_CMap cmap, + FT_UInt* pcharcode, + FT_Bool next ) + { + FT_UInt num_segs2, start, end, offset; + FT_Int delta; + FT_UInt max, min, mid, num_segs; + FT_UInt charcode = *pcharcode; + FT_UInt gindex = 0; + FT_Byte* p; + + + p = cmap->data + 6; + num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); + + num_segs = num_segs2 >> 1; + + if ( !num_segs ) + return 0; + + if ( next ) + charcode++; + + min = 0; + max = num_segs; + + /* binary search */ + while ( min < max ) + { + mid = ( min + max ) >> 1; + p = cmap->data + 14 + mid * 2; + end = TT_PEEK_USHORT( p ); + p += 2 + num_segs2; + start = TT_PEEK_USHORT( p ); + + if ( charcode < start ) + max = mid; + else if ( charcode > end ) + min = mid + 1; + else + { + p += num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + + /* find the first segment containing `charcode' */ + if ( cmap->flags & TT_CMAP_FLAG_OVERLAPPED ) + { + FT_UInt i; + + + /* call the current segment `max' */ + max = mid; + + if ( offset == 0xFFFFU ) + mid = max + 1; + + /* find in segments before the current segment */ + for ( i = max ; i > 0; i-- ) + { + FT_UInt prev_end; + + + p = cmap->data + 14 + ( i - 1 ) * 2; + prev_end = TT_PEEK_USHORT( p ); + + if ( charcode > prev_end ) + break; + + end = prev_end; + p += 2 + num_segs2; + start = TT_PEEK_USHORT( p ); + p += num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + + if ( offset != 0xFFFFU ) + mid = i - 1; + } + + /* no luck */ + if ( mid == max + 1 ) + { + if ( i != max ) + { + p = cmap->data + 14 + max * 2; + end = TT_PEEK_USHORT( p ); + p += 2 + num_segs2; + start = TT_PEEK_USHORT( p ); + p += num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + } + + mid = max; + + /* find in segments after the current segment */ + for ( i = max + 1; i < num_segs; i++ ) + { + FT_UInt next_end, next_start; + + + p = cmap->data + 14 + i * 2; + next_end = TT_PEEK_USHORT( p ); + p += 2 + num_segs2; + next_start = TT_PEEK_USHORT( p ); + + if ( charcode < next_start ) + break; + + end = next_end; + start = next_start; + p += num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + + if ( offset != 0xFFFFU ) + mid = i; + } + i--; + + /* still no luck */ + if ( mid == max ) + { + mid = i; + + break; + } + } + + /* end, start, delta and offset are for the i'th segment */ + if ( mid != i ) + { + p = cmap->data + 14 + mid * 2; + end = TT_PEEK_USHORT( p ); + p += 2 + num_segs2; + start = TT_PEEK_USHORT( p ); + p += num_segs2; + delta = TT_PEEK_SHORT( p ); + p += num_segs2; + offset = TT_PEEK_USHORT( p ); + } + } + else + { + if ( offset == 0xFFFFU ) + break; + } + + if ( offset ) + { + p += offset + ( charcode - start ) * 2; + gindex = TT_PEEK_USHORT( p ); + if ( gindex != 0 ) + gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU; + } + else + gindex = (FT_UInt)( charcode + delta ) & 0xFFFFU; + + break; + } + } + + if ( next ) + { + TT_CMap4 cmap4 = (TT_CMap4)cmap; + + + /* if `charcode' is not in any segment, then `mid' is */ + /* the segment nearest to `charcode' */ + /* */ + + if ( charcode > end ) + { + mid++; + if ( mid == num_segs ) + return 0; + } + + if ( tt_cmap4_set_range( cmap4, mid ) ) + { + if ( gindex ) + *pcharcode = charcode; + } + else + { + cmap4->cur_charcode = charcode; + + if ( gindex ) + cmap4->cur_gindex = gindex; + else + { + cmap4->cur_charcode = charcode; + tt_cmap4_next( cmap4 ); + gindex = cmap4->cur_gindex; + } + + if ( gindex ) + *pcharcode = cmap4->cur_charcode; + } + } + + return gindex; + } + + FT_CALLBACK_DEF( FT_UInt ) tt_cmap4_char_index( TT_CMap cmap, FT_UInt32 char_code ) { - FT_Byte* table = cmap->data; - FT_UInt result = 0; + if ( char_code >= 0x10000U ) + return 0; - - if ( char_code < 0x10000UL ) - { - FT_UInt idx, num_segs2; - FT_Int delta; - FT_UInt code = (FT_UInt)char_code; - FT_Byte* p; - - p = table + 6; - num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); /* be paranoid! */ - - if ( !cmap->unsorted ) - { - /* Some fonts have more than 170 segments in their charmaps! */ - /* We changed this function to use a more efficient binary */ - /* search for improving performance */ - FT_UInt min = 0; - FT_UInt max = num_segs2 >> 1; - FT_UInt mid, start, end, offset; - - - while ( min < max ) - { - mid = ( min + max ) >> 1; - p = table + 14 + mid * 2; - end = TT_NEXT_USHORT( p ); - p += num_segs2; - start = TT_PEEK_USHORT( p); - - if ( code < start ) - max = mid; - else if ( code > end ) - min = mid + 1; - else - { - /* we found the segment */ - idx = code; - - p += num_segs2; - delta = TT_PEEK_SHORT( p ); - - p += num_segs2; - offset = TT_PEEK_USHORT( p ); - - if ( offset == 0xFFFFU ) - goto Exit; - - if ( offset != 0 ) - { - p += offset + 2 * ( idx - start ); - idx = TT_PEEK_USHORT( p ); - } - - if ( idx != 0 ) - result = (FT_UInt)( idx + delta ) & 0xFFFFU; - - goto Exit; - } - } - } - else - { - FT_UInt n; - FT_Byte* q; - - - p = table + 14; /* ends table */ - q = table + 16 + num_segs2; /* starts table */ - - - for ( n = 0; n < num_segs2; n += 2 ) - { - FT_UInt end = TT_NEXT_USHORT( p ); - FT_UInt start = TT_NEXT_USHORT( q ); - FT_UInt offset; - - - if ( code < start ) - break; - - if ( code <= end ) - { - idx = code; - - p = q + num_segs2 - 2; - delta = TT_PEEK_SHORT( p ); - p += num_segs2; - offset = TT_PEEK_USHORT( p ); - - if ( offset == 0xFFFFU ) - goto Exit; - - if ( offset != 0 ) - { - p += offset + 2 * ( idx - start ); - idx = TT_PEEK_USHORT( p ); - } - - if ( idx != 0 ) - result = (FT_UInt)( idx + delta ) & 0xFFFFU; - } - } - } - } - - Exit: - return result; + if ( cmap->flags & TT_CMAP_FLAG_UNSORTED ) + return tt_cmap4_char_map_linear( cmap, &char_code, 0 ); + else + return tt_cmap4_char_map_binary( cmap, &char_code, 0 ); } @@ -1084,206 +1257,31 @@ tt_cmap4_char_next( TT_CMap cmap, FT_UInt32 *pchar_code ) { - FT_Byte* table = cmap->data; - FT_UInt32 result = 0; - FT_UInt gindex = 0; - FT_UInt32 char_code = *pchar_code; - FT_Byte* p; - FT_UInt code, num_segs2; + FT_UInt gindex; - if ( char_code >= 0xFFFFUL ) - goto Exit; + if ( *pchar_code >= 0xFFFFU ) + return 0; -#ifdef OPT_CMAP4 - { - TT_CMap4 cmap4 = (TT_CMap4)cmap; - - - if ( char_code == cmap4->old_charcode ) - { - result = cmap4->cur_charcode; - gindex = cmap4->cur_gindex; - if ( result != 0 ) - { - tt_cmap4_next( cmap4 ); - goto Exit; - } - } - } -#endif /* OPT_CMAP4 */ - - code = (FT_UInt)char_code + 1; - p = table + 6; - num_segs2 = FT_PAD_FLOOR( TT_PEEK_USHORT( p ), 2 ); /* ensure even-ness */ - - if ( !cmap->unsorted ) - { - for (;;) - { - /* Some fonts have more than 170 segments in their charmaps! */ - /* We changed this function to use a more efficient binary */ - /* search */ - FT_UInt offset; - FT_Int delta; - FT_UInt min = 0; - FT_UInt max = num_segs2 >> 1; - FT_UInt mid, start, end; - FT_UInt hi; - - - /* we begin by finding the segment which end is - closer to our code point */ - hi = max + 1; - while ( min < max ) - { - mid = ( min + max ) >> 1; - p = table + 14 + mid * 2; - end = TT_PEEK_USHORT( p ); - - if ( end < code ) - min = mid + 1; - else - { - hi = mid; - max = mid; - } - } - - if ( hi > max ) - { - /* the point is behind the last segment; - we will exit right now */ - goto Exit; - } - - p = table + 14 + hi * 2; - end = TT_PEEK_USHORT( p ); - - p += 2 + num_segs2; - start = TT_PEEK_USHORT( p ); - - if ( code < start ) - code = start; - - p += num_segs2; - delta = TT_PEEK_USHORT( p ); - - p += num_segs2; - offset = TT_PEEK_USHORT( p ); - - if ( offset != 0 && offset != 0xFFFFU ) - { - /* parse the glyph ids array for non-zero index */ - p += offset + ( code - start ) * 2; - while ( code <= end ) - { - gindex = TT_NEXT_USHORT( p ); - if ( gindex != 0 ) - { - gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU; - if ( gindex != 0 ) - { - result = code; -#ifdef OPT_CMAP4 - tt_cmap4_reset( (TT_CMap4)cmap, code, hi ); -#endif - goto Exit; - } - } - code++; - } - } - else if ( offset == 0xFFFFU ) - { - /* an offset of 0xFFFF means an empty segment in certain fonts! */ - code = end + 1; - } - else /* offset == 0 */ - { - gindex = (FT_UInt)( code + delta ) & 0xFFFFU; - if ( gindex != 0 ) - { - result = code; -#ifdef OPT_CMAP4 - tt_cmap4_reset( (TT_CMap4)cmap, code, hi ); -#endif - goto Exit; - } - code++; - } - } - } + if ( cmap->flags & TT_CMAP_FLAG_UNSORTED ) + gindex = tt_cmap4_char_map_linear( cmap, pchar_code, 1 ); else { - for ( ;; ) + TT_CMap4 cmap4 = (TT_CMap4)cmap; + + + /* no need to search */ + if ( *pchar_code == cmap4->cur_charcode ) { - FT_UInt offset, n; - FT_Int delta; - FT_Byte* q; - - - p = table + 14; /* ends table */ - q = table + 16 + num_segs2; /* starts table */ - - for ( n = 0; n < num_segs2; n += 2 ) - { - FT_UInt end = TT_NEXT_USHORT( p ); - FT_UInt start = TT_NEXT_USHORT( q ); - - - if ( code < start ) - code = start; - - if ( code <= end ) - { - p = q + num_segs2 - 2; - delta = TT_PEEK_SHORT( p ); - p += num_segs2; - offset = TT_PEEK_USHORT( p ); - - if ( offset != 0 && offset != 0xFFFFU ) - { - /* parse the glyph ids array for non-0 index */ - p += offset + ( code - start ) * 2; - while ( code <= end ) - { - gindex = TT_NEXT_USHORT( p ); - if ( gindex != 0 ) - { - gindex = (FT_UInt)( gindex + delta ) & 0xFFFFU; - if ( gindex != 0 ) - break; - } - code++; - } - } - else if ( offset == 0xFFFFU ) - { - /* an offset of 0xFFFF means an empty glyph in certain fonts! */ - code = end; - break; - } - else - gindex = (FT_UInt)( code + delta ) & 0xFFFFU; - - if ( gindex == 0 ) - break; - - result = code; - goto Exit; - } - } - /* loop to next trial charcode */ - if ( code >= 0xFFFFU ) - break; - - code++; + tt_cmap4_next( cmap4 ); + gindex = cmap4->cur_gindex; + if ( gindex ) + *pchar_code = cmap4->cur_charcode; } + else + gindex = tt_cmap4_char_map_binary( cmap, pchar_code, 1 ); } - Exit: - *pchar_code = result; return gindex; } @@ -1305,13 +1303,8 @@ const TT_CMap_ClassRec tt_cmap4_class_rec = { { -#ifdef OPT_CMAP4 sizeof ( TT_CMap4Rec ), (FT_CMap_InitFunc) tt_cmap4_init, -#else - sizeof ( TT_CMapRec ), - (FT_CMap_InitFunc) tt_cmap_init, -#endif (FT_CMap_DoneFunc) NULL, (FT_CMap_CharIndexFunc)tt_cmap4_char_index, (FT_CMap_CharNextFunc) tt_cmap4_char_next @@ -2300,13 +2293,15 @@ if ( valid.validator.error == 0 ) { - (void)FT_CMap_New( (FT_CMap_Class)clazz, cmap, &charmap, NULL ); + FT_CMap ttcmap; - /* it is simpler to directly set the `unsorted' flag instead */ - /* of adding a parameter to FT_CMap_New */ - ((TT_CMap)(face->root.charmaps - [face->root.num_charmaps - 1]))->unsorted = - FT_BOOL( error ); + + if ( !FT_CMap_New( (FT_CMap_Class)clazz, cmap, &charmap, &ttcmap ) ) + { + /* it is simpler to directly set `flags' than adding */ + /* a parameter to FT_CMap_New */ + ((TT_CMap)ttcmap)->flags = (FT_Int)error; + } } else { diff --git a/src/sfnt/ttcmap.h b/src/sfnt/ttcmap.h index 5f758a46d..fb03107a9 100644 --- a/src/sfnt/ttcmap.h +++ b/src/sfnt/ttcmap.h @@ -27,11 +27,15 @@ FT_BEGIN_HEADER + +#define TT_CMAP_FLAG_UNSORTED 1 +#define TT_CMAP_FLAG_OVERLAPPED 2 + typedef struct TT_CMapRec_ { FT_CMapRec cmap; FT_Byte* data; /* pointer to in-memory cmap table */ - FT_Bool unsorted; /* for format 4 only */ + FT_Int flags; /* for format 4 only */ } TT_CMapRec, *TT_CMap;