From d6d3f8aecb0bb45ee2c92c42bf93a9c3e211c27c Mon Sep 17 00:00:00 2001 From: Rodrigo Braz Monteiro Date: Sat, 15 Mar 2008 03:24:38 +0000 Subject: [PATCH] Made gorgonsub's UTF-8/ASCII parser much faster, resulting in a 2x subtitles reading speedup for such files. Originally committed to SVN as r2060. --- aegilib/aegilib.vcproj | 4 ++ aegilib/include/aegilib/fastbuffer.h | 104 +++++++++++++++++++++++++++ aegilib/include/aegilib/utils.h | 3 +- aegilib/src/text_file_reader.cpp | 85 +++++++++++++--------- aegilib/src/text_file_reader.h | 3 + aegilib/src/utils.cpp | 30 ++++++++ 6 files changed, 196 insertions(+), 33 deletions(-) create mode 100644 aegilib/include/aegilib/fastbuffer.h diff --git a/aegilib/aegilib.vcproj b/aegilib/aegilib.vcproj index 5a96dd816..646b213f8 100644 --- a/aegilib/aegilib.vcproj +++ b/aegilib/aegilib.vcproj @@ -176,6 +176,10 @@ RelativePath=".\include\aegilib\exception.h" > + + diff --git a/aegilib/include/aegilib/fastbuffer.h b/aegilib/include/aegilib/fastbuffer.h new file mode 100644 index 000000000..83fa6b1f5 --- /dev/null +++ b/aegilib/include/aegilib/fastbuffer.h @@ -0,0 +1,104 @@ +// Copyright (c) 2005, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB +// +// Website: http://aegisub.cellosoft.com +// Contact: mailto:zeratul@cellosoft.com +// + + +#pragma once +#include +#include "utils.h" + + +namespace Gorgonsub { + // Fast buffer class + template + class FastBuffer { + private: + std::vector buffer; + size_t _size; + + public: + // Constructor + FastBuffer() { _size = 0; } + + // Gets the stored size + size_t GetSize() const { return _size; } + + // Shifts all the buffer left, destroying steps entries + void ShiftLeft(size_t steps) { + steps = Min(_size,steps); + memcpy(&buffer[0],&buffer[steps],_size-steps); + _size -= steps; + } + + // Get a read pointer + const T* GetReadPtr() const { return &buffer[0]; } + + // Get a non-const read pointer + T* GetMutableReadPtr() { return &buffer[0]; } + + // Get a write pointer to a new area of the specified size + T* GetWritePtr(size_t size) { + size_t oldSize = _size; + _size += size; + if (buffer.size() < _size+4) buffer.resize(_size+4); + return &buffer[oldSize]; + } + + // Assume that has a certain size, discarding anything beyond it + void AssumeSize(size_t size) { + _size = Min(size,_size); + } + + // Pre-Allocates memory + void Alloc(size_t size) { + buffer.resize(size); + } + + // Finds a line break + void FindLineBreak(size_t start,size_t end,int &pos,T &character) { + pos = -1; + character = 0; + T c1 = '\n'; + T c2 = '\r'; + for (size_t i=start;i= bufAlloc - 1) { bufAlloc *= 2; - wxbuffer.Alloc(bufAlloc); + stringBuffer.Alloc(bufAlloc); } - wxbuffer += ch; + stringBuffer += ch; len++; } + + // Remove line breaks + len = stringBuffer.Length(); + for (size_t i=0;i 0 && wxbuffer[0] == 0xFEFF) { - wxbuffer = wxbuffer.Mid(1); - } + size_t startPos = 0; + if (stringBuffer.Length() > 0 && stringBuffer[0] == 0xFEFF) startPos = 1; // Trim - if (trim) { - wxbuffer.Trim(true); - wxbuffer.Trim(false); - } - return Gorgonsub::String(wxbuffer.c_str()); + if (trim) return String(StringTrim(stringBuffer,startPos)); + return String(stringBuffer.c_str() + startPos); } @@ -186,7 +207,7 @@ bool TextFileReader::HasMoreLines() { if (cache.size()) return true; wxCriticalSectionLocker locker(mutex); - return (!file.Eof()); + return (!file.Eof() || _buffer.GetSize()); } diff --git a/aegilib/src/text_file_reader.h b/aegilib/src/text_file_reader.h index 5c9376e76..91116eb55 100644 --- a/aegilib/src/text_file_reader.h +++ b/aegilib/src/text_file_reader.h @@ -39,6 +39,7 @@ // Headers #include "Gorgonsub.h" +#include "fastbuffer.h" #include @@ -51,6 +52,8 @@ namespace Gorgonsub { wxCriticalSection mutex; std::list cache; + FastBuffer _buffer; + wxString encoding; wxInputStream &file; shared_ptr conv; diff --git a/aegilib/src/utils.cpp b/aegilib/src/utils.cpp index 7c506aef6..eb4e31d60 100644 --- a/aegilib/src/utils.cpp +++ b/aegilib/src/utils.cpp @@ -112,3 +112,33 @@ void Gorgonsub::WriteNumber(wxChar *&dst,wxChar *temp,int number,int pad,size_t pos++; } } + + +///////////////// +// Trim a string +const wxChar *Gorgonsub::StringTrim(wxString &str,size_t startPos) +{ + size_t len = str.Length(); + size_t start = startPos; + size_t end = len; + bool isStart = true; + bool isEnd = false; + wxChar cur; + for (size_t i=start;i