From ca63097e901c1dcb70e8234ad9dfecbf19781707 Mon Sep 17 00:00:00 2001 From: Rodrigo Braz Monteiro Date: Sat, 15 Mar 2008 08:36:52 +0000 Subject: [PATCH] Fixed UTF-16 support on gorgonsub, and, incidentally, it's ~20% faster than UTF-8. Originally committed to SVN as r2063. --- aegilib/include/aegilib/exception.h | 10 +- aegilib/include/aegilib/fastbuffer.h | 2 +- aegilib/src/exception.cpp | 18 ++- aegilib/src/formats/format_ass_dialogue.cpp | 4 +- aegilib/src/formats/format_ass_style.cpp | 2 +- aegilib/src/text_file_reader.cpp | 151 ++++++++++---------- aegilib/src/text_file_reader.h | 3 +- aegilib/test/src/main.cpp | 44 +++--- 8 files changed, 124 insertions(+), 110 deletions(-) diff --git a/aegilib/include/aegilib/exception.h b/aegilib/include/aegilib/exception.h index de7ce948b..5f094c66b 100644 --- a/aegilib/include/aegilib/exception.h +++ b/aegilib/include/aegilib/exception.h @@ -56,13 +56,21 @@ namespace Gorgonsub { }; Exception(ExceptionList code); + Exception(ExceptionList code,const char* file,const long line); - String GetMessage() const { return GetMessage(code); } + String GetMessage() const { return wxString(what(),wxConvLocal); } int GetCode(); private: static String GetMessage(int code); + static String GetMessageFile(int code,const char *file,long line); ExceptionList code; }; }; + +#ifdef _MSC_VER +#define THROW_GORGON_EXCEPTION(code) throw Gorgonsub::Exception(code,__FILE__,__LINE__) +#else +#define THROW_GORGON_EXCEPTION(code) throw Gorgonsub::Exception(code) +#endif diff --git a/aegilib/include/aegilib/fastbuffer.h b/aegilib/include/aegilib/fastbuffer.h index f53fc263b..f729e8755 100644 --- a/aegilib/include/aegilib/fastbuffer.h +++ b/aegilib/include/aegilib/fastbuffer.h @@ -57,7 +57,7 @@ namespace Gorgonsub { // Shifts all the buffer left, destroying steps entries void ShiftLeft(size_t steps) { steps = Min(_size,steps); - memcpy(&buffer[0],&buffer[steps],_size-steps); + memcpy(&buffer[0],&buffer[steps],(_size-steps)*sizeof(T)); _size -= steps; } diff --git a/aegilib/src/exception.cpp b/aegilib/src/exception.cpp index fe2125db5..f7137d57f 100644 --- a/aegilib/src/exception.cpp +++ b/aegilib/src/exception.cpp @@ -37,14 +37,20 @@ using namespace Gorgonsub; -/////////////// -// Constructor +//////////////// +// Constructors Exception::Exception(ExceptionList _code) : std::exception(GetMessage(_code).mb_str(wxConvLocal)) { code = _code; } +Exception::Exception(ExceptionList _code,const char *file,const long line) +: std::exception(GetMessageFile(_code,file,line).mb_str(wxConvLocal)) +{ + code = _code; +} + ////////////////////// // Get message string @@ -65,6 +71,14 @@ String Exception::GetMessage(int code) } +/////////////////////////////////// +// Insert file and line on message +String Exception::GetMessageFile(int code,const char *file,long line) +{ + return GetMessage(code) + _T(" (") + wxString(file,wxConvLocal) + wxString::Format(_T(":%i)."),line); +} + + //////////// // Get code int Exception::GetCode() diff --git a/aegilib/src/formats/format_ass_dialogue.cpp b/aegilib/src/formats/format_ass_dialogue.cpp index 3b850bed1..14dcdde43 100644 --- a/aegilib/src/formats/format_ass_dialogue.cpp +++ b/aegilib/src/formats/format_ass_dialogue.cpp @@ -56,7 +56,9 @@ DialogueASS::DialogueASS(const String &data,int version) version++; if (version > 2) version = 0; } - if (!valid) throw Exception(Exception::Parse_Error); + if (!valid) { + THROW_GORGON_EXCEPTION(Exception::Parse_Error); + } } diff --git a/aegilib/src/formats/format_ass_style.cpp b/aegilib/src/formats/format_ass_style.cpp index 6572de185..0a63fb048 100644 --- a/aegilib/src/formats/format_ass_style.cpp +++ b/aegilib/src/formats/format_ass_style.cpp @@ -55,7 +55,7 @@ StyleASS::StyleASS(String data,int version) version++; if (version > 2) version = 0; } - if (!valid) throw Exception(Exception::Parse_Error); + if (!valid) THROW_GORGON_EXCEPTION(Exception::Parse_Error); } diff --git a/aegilib/src/text_file_reader.cpp b/aegilib/src/text_file_reader.cpp index 8b97f0aed..cbeb8d772 100644 --- a/aegilib/src/text_file_reader.cpp +++ b/aegilib/src/text_file_reader.cpp @@ -56,7 +56,6 @@ TextFileReader::TextFileReader(wxInputStream &stream,Gorgonsub::String enc,bool trim = _trim; threaded = prefetch && false; thread = NULL; - _buffer.Alloc(4096); // Set encoding encoding = enc.c_str(); @@ -101,6 +100,78 @@ void TextFileReader::SetEncodingConfiguration() else { conv = shared_ptr (new wxCSConv(encoding)); } + + // Allocate buffer + if (!Is16) buffer1.Alloc(4096); + else buffer2.Alloc(4096); +} + + +//////////////////// +// Helper functions +wxString GetString(char *read,shared_ptr conv) { return wxString(read,*conv); } +wxString GetString(wchar_t *read,shared_ptr conv) { (void)conv; return wxString(read); } +inline void Swap(wchar_t &a) { + char *c = (char*) &a; + char aux = c[0]; + c[0] = c[1]; + c[1] = aux; +} +inline void Swap(char &a) { (void) a; } + + +//////////////// +// Parse a line +template +void ParseLine(FastBuffer &_buffer,wxInputStream &file,wxString &stringBuffer,shared_ptr conv,bool swap) +{ + // Look for a new line + int newLinePos = -1; + T newLineChar = 0; + size_t size = _buffer.GetSize(); + + // Find first line break + if (size) _buffer.FindLineBreak(0,size,newLinePos,newLineChar); + + // If no line breaks were found, load more data into file + while (newLinePos == -1) { + // Read 2048 bytes + const size_t readBytes = 2048; + const size_t read = readBytes/sizeof(T); + size_t oldSize = _buffer.GetSize(); + T *ptr = _buffer.GetWritePtr(read); + file.Read(ptr,readBytes); + size_t lastRead = file.LastRead()/sizeof(T); + _buffer.AssumeSize(_buffer.GetSize()+lastRead-read); + + // Swap + if (swap) { + T* ptr2 = ptr; + for (size_t i=0;i= bufAlloc - 1) { - bufAlloc *= 2; - stringBuffer.Alloc(bufAlloc); - } - stringBuffer += ch; - len++; - } - - // Remove line breaks - len = stringBuffer.Length(); - for (size_t i=0;i(buffer2,file,stringBuffer,conv,swap); // Read ASCII/UTF-8 line from file - else { - // Look for a new line - int newLinePos = -1; - char newLineChar = 0; - size_t size = _buffer.GetSize(); - - // Find first line break - if (size) _buffer.FindLineBreak(0,size,newLinePos,newLineChar); - - // If no line breaks were found, load more data into file - while (newLinePos == -1) { - // Read 2048 bytes - const size_t read = 2048; - size_t oldSize = _buffer.GetSize(); - char *ptr = _buffer.GetWritePtr(read); - file.Read(ptr,read); - size_t lastRead = file.LastRead(); - _buffer.AssumeSize(_buffer.GetSize()+lastRead-read); - - // Find line break - _buffer.FindLineBreak(oldSize,lastRead+oldSize,newLinePos,newLineChar); - - // End of file, force a line break - if (file.Eof() && newLinePos == -1) newLinePos = (int) _buffer.GetSize(); - } - - // Found newline - if (newLinePos != -1) { - // Replace newline with null character and convert to proper charset - char *read = _buffer.GetMutableReadPtr(); - if (newLinePos) { - read[newLinePos] = 0; - stringBuffer = wxString(read,*conv); - } - - // Remove an extra character if the new is the complement of \n,\r (13^7=10, 10^7=13) - if (read[newLinePos+1] == (newLineChar ^ 7)) newLinePos++; - _buffer.ShiftLeft(newLinePos+1); - } - } + else ParseLine(buffer1,file,stringBuffer,conv,false); // Remove BOM size_t startPos = 0; @@ -208,7 +207,7 @@ bool TextFileReader::HasMoreLines() { if (cache.size()) return true; wxCriticalSectionLocker locker(mutex); - return (!file.Eof() || _buffer.GetSize()); + return (!file.Eof() || buffer1.GetSize() || buffer2.GetSize()); } diff --git a/aegilib/src/text_file_reader.h b/aegilib/src/text_file_reader.h index 91116eb55..54596cc56 100644 --- a/aegilib/src/text_file_reader.h +++ b/aegilib/src/text_file_reader.h @@ -52,7 +52,8 @@ namespace Gorgonsub { wxCriticalSection mutex; std::list cache; - FastBuffer _buffer; + FastBuffer buffer1; + FastBuffer buffer2; wxString encoding; wxInputStream &file; diff --git a/aegilib/test/src/main.cpp b/aegilib/test/src/main.cpp index f8e743ed8..601046a1a 100644 --- a/aegilib/test/src/main.cpp +++ b/aegilib/test/src/main.cpp @@ -60,10 +60,17 @@ int main() { // Load subtitles cout << "Loading file... "; timer.Start(); - control.LoadFile(L"subs_in.ass",L"UTF-8"); + control.LoadFile(L"subs_in.ass",L"UTF-16LE"); timer.Pause(); - cout << "Done in " << timer.Time() << "ms.\n"; - system("pause"); + cout << "Done in " << timer.Time() << " ms.\n"; + //system("pause"); + + // Save subtitles + cout << "Saving file... "; + timer.Start(); + control.SaveFile(L"subs_out.ass",L"UTF-8"); + timer.Pause(); + cout << "Done in " << timer.Time() << " ms.\n"; // Create line to be inserted cout << "Creating data... "; @@ -79,34 +86,17 @@ int main() { actions->RemoveLine(3,L"Events"); actions->Finish(); timer.Pause(); - cout << "Done in " << timer.Time() << "ms.\n"; - - // Save subtitles - cout << "Saving file... "; - //control.SaveFile(L"subs_out_mid1.ass",L"UTF-8"); - cout << "Done.\n"; + cout << "Done in " << timer.Time() << " ms.\n"; // Undo - cout << "Undoing... (can undo=" << (control.CanUndo()?"true":"false") << ") "; - control.Undo(); - cout << "Done.\n"; - - // Save subtitles - cout << "Saving file... "; - control.SaveFile(L"subs_out_mid2.ass",L"UTF-8"); - cout << "Done.\n"; - - // Redo - cout << "Undoing... (can redo=" << (control.CanRedo()?"true":"false") << ") "; - control.Redo(); - cout << "Done.\n"; - - // Save subtitles - cout << "Saving file... "; + cout << "Undoing and redoing 1000 times... "; timer.Start(); - //control.SaveFile(L"subs_out.ass",L"UTF-8"); + for (size_t i=0;i<1000;i++) { + control.Undo(); + control.Redo(); + } timer.Pause(); - cout << "Done in " << timer.Time() << "ms.\n"; + cout << "Done in " << timer.Time() << " ms.\n"; } catch (std::exception &e) {