From bcf618b256c101e9f4a47af77541a559bb8bb44d Mon Sep 17 00:00:00 2001 From: Werner Lemberg Date: Mon, 2 Nov 2015 06:50:49 +0100 Subject: [PATCH] [ftfuzzer] Add support for multiple files (patch #8779). Currently, libFuzzer only supports mutation of a single file. We circumvent this problem by using an uncompressed tar archive as multiple-file input for the fuzzer. This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing; a constructed tarball should contain a font file as the first element, and files to be attached as further elements. * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers. (archive_read_entry_data, parse_data): New functions. (LLVMFuzzerTestOneInput): Updated. * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom mutator for libFuzzer that can mutate tarballs in a sensible way. --- ChangeLog | 19 ++ src/tools/ftfuzzer/ftfuzzer.cc | 119 ++++++++++++- src/tools/ftfuzzer/ftmutator.cc | 301 ++++++++++++++++++++++++++++++++ 3 files changed, 430 insertions(+), 9 deletions(-) create mode 100644 src/tools/ftfuzzer/ftmutator.cc diff --git a/ChangeLog b/ChangeLog index 44fc7defc..ee5e5e986 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2015-11-02 Bungeman + + [ftfuzzer] Add support for multiple files (patch #8779). + + Currently, libFuzzer only supports mutation of a single file. We + circumvent this problem by using an uncompressed tar archive as + multiple-file input for the fuzzer. + + This patch enables tests of `FT_Attach_Stream' and AFM/PFM parsing; + a constructed tarball should contain a font file as the first + element, and files to be attached as further elements. + + * src/tools/ftfuzzer/ftfuzzer.cc: Include libarchive headers. + (archive_read_entry_data, parse_data): New functions. + (LLVMFuzzerTestOneInput): Updated. + + * src/tools/ftfuzzer/ftmutator.cc: New file, providing a custom + mutator for libFuzzer that can mutate tarballs in a sensible way. + 2015-10-31 Werner Lemberg [sfnt] Fix cmap 14 validation (#46346). diff --git a/src/tools/ftfuzzer/ftfuzzer.cc b/src/tools/ftfuzzer/ftfuzzer.cc index e5ab293f1..a232c682e 100644 --- a/src/tools/ftfuzzer/ftfuzzer.cc +++ b/src/tools/ftfuzzer/ftfuzzer.cc @@ -1,8 +1,11 @@ -// we use `unique_ptr' and `decltype', defined since C++11 +// we use `unique_ptr', `decltype', and other gimmicks defined since C++11 #if __cplusplus < 201103L # error "a C++11 compiler is needed" #endif +#include +#include + #include #include @@ -10,7 +13,7 @@ #include -using namespace std; + using namespace std; #include @@ -34,6 +37,7 @@ using namespace std; static FT_Library library; static int InitResult; + struct FT_Global { FT_Global() { InitResult = FT_Init_FreeType( &library ); @@ -46,6 +50,81 @@ using namespace std; FT_Global global_ft; + static int + archive_read_entry_data( struct archive *ar, + vector *vw ) + { + int r; + const FT_Byte* buff; + size_t size; + int64_t offset; + + for (;;) + { + r = archive_read_data_block( ar, + reinterpret_cast( &buff ), + &size, + &offset ); + if ( r == ARCHIVE_EOF ) + return ARCHIVE_OK; + if ( r != ARCHIVE_OK ) + return r; + + vw->insert( vw->end(), buff, buff + size ); + } + } + + + static vector> + parse_data( const uint8_t* data, + size_t size ) + { + struct archive_entry* entry; + int r; + vector> files; + + unique_ptr a( archive_read_new(), + archive_read_free ); + + // activate reading of uncompressed tar archives + archive_read_support_format_tar( a.get() ); + + // the need for `const_cast' was removed with libarchive commit be4d4dd + if ( !( r = archive_read_open_memory( + a.get(), + const_cast(static_cast( data ) ), + size ) ) ) + { + unique_ptr a_open( a.get(), + archive_read_close ); + + // read files contained in archive + for (;;) + { + r = archive_read_next_header( a_open.get(), &entry ); + if ( r == ARCHIVE_EOF ) + break; + if ( r != ARCHIVE_OK ) + break; + + vector entry_data; + r = archive_read_entry_data( a.get(), &entry_data ); + if ( r != ARCHIVE_OK ) + break; + + files.push_back( move( entry_data ) ); + } + } + + if ( files.size() == 0 ) + files.emplace_back( data, data + size ); + + return files; + } + + static void setIntermediateAxis( FT_Face face ) { @@ -74,6 +153,7 @@ using namespace std; } + // the interface function to the libFuzzer library extern "C" int LLVMFuzzerTestOneInput( const uint8_t* data, size_t size_ ) @@ -83,7 +163,7 @@ using namespace std; if ( size_ < 1 ) return 0; - long size = (long)size_; + const vector>& files = parse_data( data, size_ ); FT_Face face; FT_Int32 load_flags = FT_LOAD_DEFAULT; @@ -99,7 +179,11 @@ using namespace std; // more than a single font. // get number of faces - if ( FT_New_Memory_Face( library, data, size, -1, &face ) ) + if ( FT_New_Memory_Face( library, + files[0].data(), + (FT_Long)files[0].size(), + -1, + &face ) ) return 0; long num_faces = face->num_faces; FT_Done_Face( face ); @@ -111,8 +195,8 @@ using namespace std; { // get number of instances if ( FT_New_Memory_Face( library, - data, - size, + files[0].data(), + (FT_Long)files[0].size(), -( face_index + 1 ), &face ) ) continue; @@ -125,12 +209,29 @@ using namespace std; instance_index++ ) { if ( FT_New_Memory_Face( library, - data, - size, + files[0].data(), + (FT_Long)files[0].size(), ( instance_index << 16 ) + face_index, &face ) ) continue; + // if we have more than a single input file coming from an archive, + // attach them (starting with the second file) using the order given + // in the archive + for ( size_t files_index = 1; + files_index < files.size(); + files_index++ ) + { + FT_Open_Args open_args = {}; + open_args.flags = FT_OPEN_MEMORY; + open_args.memory_base = files[files_index].data(); + open_args.memory_size = (FT_Long)files[files_index].size(); + + // the last archive element will be eventually used as the + // attachment + FT_Attach_Stream( face, &open_args ); + } + // loop over all bitmap stroke sizes // and an arbitrary size for outlines for ( long fixed_sizes_index = 0; @@ -192,4 +293,4 @@ using namespace std; } -/* END */ +// END diff --git a/src/tools/ftfuzzer/ftmutator.cc b/src/tools/ftfuzzer/ftmutator.cc new file mode 100644 index 000000000..1223ba27d --- /dev/null +++ b/src/tools/ftfuzzer/ftmutator.cc @@ -0,0 +1,301 @@ +// A custom fuzzer mutator for FreeType. +// +// Since `tar' is not a valid format for input to FreeType, treat any input +// that looks like `tar' as multiple files and mutate them separately. +// +// In the future, a variation of this may be used to guide mutation on a +// logically higher level. + +// we use `unique_ptr', `decltype', and other gimmicks defined since C++11 +#if __cplusplus < 201103L +# error "a C++11 compiler is needed" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "FuzzerInterface.h" + + + using namespace std; + + + // This function should be defined by `ftfuzzer.cc'. + extern "C" int + LLVMFuzzerTestOneInput( const uint8_t* Data, + size_t Size ); + + + static void + check_result( struct archive* a, + int r ) + { + if ( r == ARCHIVE_OK ) + return; + + const char* m = archive_error_string( a ); + write( 1, m, strlen( m ) ); + exit( 1 ); + } + + + static int + archive_read_entry_data( struct archive *ar, + vector *vw ) + { + int r; + const uint8_t* buff; + size_t size; + int64_t offset; + + for (;;) + { + r = archive_read_data_block( ar, + reinterpret_cast( &buff ), + &size, + &offset ); + if ( r == ARCHIVE_EOF ) + return ARCHIVE_OK; + if ( r != ARCHIVE_OK ) + return r; + + vw->insert( vw->end(), buff, buff + size ); + } + } + + + static vector> + parse_data( const uint8_t* data, + size_t size ) + { + struct archive_entry* entry; + int r; + vector> files; + + unique_ptr a( archive_read_new(), + archive_read_free ); + + // activate reading of uncompressed tar archives + archive_read_support_format_tar( a.get() ); + + // the need for `const_cast' was removed with libarchive commit be4d4dd + if ( !( r = archive_read_open_memory( + a.get(), + const_cast(static_cast( data ) ), + size ) ) ) + { + unique_ptr a_open( a.get(), + archive_read_close ); + + // read files contained in archive + for (;;) + { + r = archive_read_next_header( a_open.get(), &entry ); + if ( r == ARCHIVE_EOF ) + break; + if ( r != ARCHIVE_OK ) + break; + + vector entry_data; + r = archive_read_entry_data( a.get(), &entry_data ); + if ( entry_data.size() == 0 ) + continue; + + files.push_back( move( entry_data ) ); + if ( r != ARCHIVE_OK ) + break; + } + } + + return files; + } + + + class FTFuzzer + : public fuzzer::UserSuppliedFuzzer + { + + public: + FTFuzzer( fuzzer::FuzzerRandomBase* Rand ) + : fuzzer::UserSuppliedFuzzer( Rand ) {} + + + int + TargetFunction( const uint8_t* Data, + size_t Size ) + { + return LLVMFuzzerTestOneInput( Data, Size ); + } + + + // Custom mutator. + virtual size_t + Mutate( uint8_t* Data, + size_t Size, + size_t MaxSize ) + { + vector> files = parse_data( Data, Size ); + + // If the file was not recognized as a tar file, treat it as non-tar. + if ( files.size() == 0 ) + return fuzzer::UserSuppliedFuzzer::Mutate( Data, Size, MaxSize ); + + // This is somewhat `white box' on tar. The tar format uses 512 byte + // blocks. One block as header for each file, two empty blocks of 0's + // at the end. File data is padded to fill its last block. + size_t used_blocks = files.size() + 2; + for ( const auto& file : files ) + used_blocks += ( file.size() + 511 ) / 512; + + size_t max_blocks = MaxSize / 512; + + // If the input is big, it will need to be downsized. If the original + // tar file was too big, it may have been clipped to fit. In this + // case it may not be possible to properly write out the data, as + // there may not be enough space for the trailing two blocks. Start + // dropping file data or files from the end. + for ( size_t i = files.size(); + i-- > 1 && used_blocks > max_blocks; ) + { + size_t blocks_to_free = used_blocks - max_blocks; + size_t blocks_currently_used_by_file_data = + ( files[i].size() + 511 ) / 512; + + if ( blocks_currently_used_by_file_data >= blocks_to_free ) + { + files[i].resize( ( blocks_currently_used_by_file_data - + blocks_to_free ) * 512 ); + used_blocks -= blocks_to_free; + continue; + } + + files.pop_back(); + used_blocks -= blocks_currently_used_by_file_data + 1; + } + + // If we get down to one file, don't use tar. + if ( files.size() == 1 ) + { + memcpy( Data, files[0].data(), files[0].size() ); + return fuzzer::UserSuppliedFuzzer::Mutate( Data, + files[0].size(), + MaxSize ); + } + + size_t free_blocks = max_blocks - used_blocks; + + // Allow each file to use up as much of the currently available space + // it can. If it uses or gives up blocks, add them or remove them + // from the pool. + for ( auto&& file : files ) + { + size_t blocks_currently_used_by_file = ( file.size() + 511 ) / 512; + size_t blocks_available = blocks_currently_used_by_file + + free_blocks; + size_t max_size = blocks_available * 512; + size_t data_size = file.size(); + + file.resize( max_size ); + file.resize( fuzzer::UserSuppliedFuzzer::Mutate( file.data(), + data_size, + max_size ) ); + + size_t blocks_now_used_by_file = ( file.size() + 511 ) / 512; + free_blocks = free_blocks + + blocks_currently_used_by_file - + blocks_now_used_by_file; + } + + unique_ptr a( archive_write_new(), + archive_write_free ); + + check_result( a.get(), archive_write_add_filter_none( a.get() ) ); + check_result( a.get(), archive_write_set_format_ustar( a.get() ) ); + + // `used' may not be correct until after the archive is closed. + size_t used = 0xbadbeef; + check_result( a.get(), archive_write_open_memory( a.get(), + Data, + MaxSize, + &used ) ); + + { + unique_ptr a_open( a.get(), + archive_write_close ); + + int file_index = 0; + for ( const auto& file : files ) + { + unique_ptr + e( archive_entry_new2( a_open.get() ), + archive_entry_free ); + + char name_buffer[100]; + snprintf( name_buffer, 100, "file%d", file_index++ ); + + archive_entry_set_pathname( e.get(), name_buffer ); + archive_entry_set_size( e.get(), file.size() ); + archive_entry_set_filetype( e.get(), AE_IFREG ); + archive_entry_set_perm( e.get(), 0644 ); + + check_result( a_open.get(), + archive_write_header( a_open.get(), e.get() ) ); + archive_write_data( a_open.get(), file.data(), file.size() ); + check_result( a_open.get(), + archive_write_finish_entry( a_open.get() ) ); + } + } + + return used; + } + + + // Cross `Data1' and `Data2', write up to `MaxOutSize' bytes into `Out', + // return the number of bytes written, which should be positive. + virtual size_t + CrossOver( const uint8_t* Data1, + size_t Size1, + const uint8_t* Data2, + size_t Size2, + uint8_t* Out, + size_t MaxOutSize ) + { + return fuzzer::UserSuppliedFuzzer::CrossOver( Data1, + Size1, + Data2, + Size2, + Out, + MaxOutSize ); + } + + }; // end of FTFuzzer class + + + int + main( int argc, + char* *argv ) + { + fuzzer::FuzzerRandomLibc Rand( 0 ); + FTFuzzer F( &Rand ); + + fuzzer::FuzzerDriver( argc, argv, F ); + } + + +// END