From c870465dee49d4db3b0c0c16a4e63292113d9a05 Mon Sep 17 00:00:00 2001 From: Rodrigo Braz Monteiro Date: Mon, 17 Mar 2008 01:32:26 +0000 Subject: [PATCH] Added a few mostly unimplemented classes, and wrote my own custom (experimental) UTF-8 writer, which made writing UTF-8 subtitles almost twice as fast. Originally committed to SVN as r2072. --- aegilib/aegilib.vcproj | 8 ++ aegilib/include/aegilib/action.h | 27 +++++- aegilib/include/aegilib/actionlist.h | 2 + aegilib/include/aegilib/deltacoder.h | 2 +- aegilib/include/aegilib/selection.h | 80 ++++++++++++++++ aegilib/include/aegilib/utils.h | 4 + aegilib/src/action.cpp | 8 +- aegilib/src/actionlist.cpp | 2 +- .../src/formats/format_ass_dialogue_delta.cpp | 4 +- .../src/formats/format_ass_dialogue_delta.h | 2 +- aegilib/src/selection.cpp | 95 +++++++++++++++++++ aegilib/src/text_file_writer.cpp | 20 +++- aegilib/src/text_file_writer.h | 1 + aegilib/src/utils.cpp | 83 ++++++++++++++++ aegilib/test/src/main.cpp | 20 ++-- 15 files changed, 332 insertions(+), 26 deletions(-) create mode 100644 aegilib/include/aegilib/selection.h create mode 100644 aegilib/src/selection.cpp diff --git a/aegilib/aegilib.vcproj b/aegilib/aegilib.vcproj index 001153252..133c9aa5c 100644 --- a/aegilib/aegilib.vcproj +++ b/aegilib/aegilib.vcproj @@ -394,6 +394,14 @@ RelativePath=".\include\aegilib\actionlist.h" > + + + + entry,int line,const String §ion); - virtual ~ActionInsert() {} + ~ActionInsert() {} ActionPtr GetAntiAction(const Model &model) const; void Execute(Model &model); @@ -79,7 +79,7 @@ namespace Gorgonsub { public: ActionRemove(int line,const String §ion); - virtual ~ActionRemove() {} + ~ActionRemove() {} ActionPtr GetAntiAction(const Model &model) const; void Execute(Model &model); @@ -92,11 +92,30 @@ namespace Gorgonsub { shared_ptr delta; const String section; int lineNumber; + bool noTextFields; public: - ActionModify(shared_ptr entry,int line,const String §ion); + ActionModify(shared_ptr entry,int line,const String §ion,bool noTextFields); ActionModify(shared_ptr delta,int line,const String §ion); - virtual ~ActionModify() {} + ~ActionModify() {} + + ActionPtr GetAntiAction(const Model &model) const; + void Execute(Model &model); + }; + + // Modify several line + class ActionModifyBatch : public Action { + private: + std::vector > entries; + std::vector > deltas; + std::vector lines; + const String section; + bool noTextFields; + + public: + ActionModifyBatch(shared_ptr entry,int line,const String §ion,bool noTextFields); + ActionModifyBatch(shared_ptr delta,int line,const String §ion); + ~ActionModifyBatch() {} ActionPtr GetAntiAction(const Model &model) const; void Execute(Model &model); diff --git a/aegilib/include/aegilib/actionlist.h b/aegilib/include/aegilib/actionlist.h index abc65a029..26e009ad8 100644 --- a/aegilib/include/aegilib/actionlist.h +++ b/aegilib/include/aegilib/actionlist.h @@ -38,6 +38,7 @@ #include "action.h" #include "gorgonstring.h" #include "section_entry.h" +#include "selection.h" namespace Gorgonsub { @@ -73,6 +74,7 @@ namespace Gorgonsub { void InsertLine(SectionEntryPtr line,int position=-1,const String section=L""); void RemoveLine(int position,const String section); SectionEntryPtr ModifyLine(int position,const String section); + SectionEntryPtr ModifyLines(Selection selection,const String section); }; typedef shared_ptr ActionListPtr; diff --git a/aegilib/include/aegilib/deltacoder.h b/aegilib/include/aegilib/deltacoder.h index d73a12568..16b511b76 100644 --- a/aegilib/include/aegilib/deltacoder.h +++ b/aegilib/include/aegilib/deltacoder.h @@ -45,7 +45,7 @@ namespace Gorgonsub { class DeltaCoder { public: virtual ~DeltaCoder() {} - virtual VoidPtr EncodeDelta(VoidPtr from,VoidPtr to) const = 0; + virtual VoidPtr EncodeDelta(VoidPtr from,VoidPtr to,bool withTextFields=true) const = 0; virtual VoidPtr EncodeReverseDelta(VoidPtr delta,VoidPtr object) const = 0; virtual void ApplyDelta(VoidPtr delta,VoidPtr object) const = 0; }; diff --git a/aegilib/include/aegilib/selection.h b/aegilib/include/aegilib/selection.h new file mode 100644 index 000000000..3cea70637 --- /dev/null +++ b/aegilib/include/aegilib/selection.h @@ -0,0 +1,80 @@ +// Copyright (c) 2008, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB/GORGONSUB +// +// Website: http://www.aegisub.net +// Contact: mailto:amz@aegisub.net +// + +#pragma once +#include + +namespace Gorgonsub { + + // Range class + class Range { + private: + size_t start,end; + + public: + Range() : start(0), end(0) {} + Range(size_t _start,size_t _end) : start(_start), end(_end) {} + + size_t GetLine(size_t n) const; + size_t GetSize() const { return end-start; } + size_t GetStart() const { return start; } + size_t GetEnd() const { return end; } + }; + + // Selection class + class Selection { + private: + std::vector ranges; + size_t count; + + public: + Selection(); + + void AddLine(size_t line) { AddRange(Range(line,line+1)); } + void AddRange(const Range &range); + void RemoveLine(size_t line) { RemoveRange(Range(line,line+1)); } + void RemoveRange(const Range &range); + void AddSelection (const Selection ¶m); + void RemoveSelection (const Selection ¶m); + + size_t GetCount() const { return count; } + size_t GetRanges() const { return ranges.size(); } + size_t GetLine(size_t n) const; + size_t GetLineInRange(size_t n,size_t range) const { return ranges.at(range).GetLine(n); } + bool IsContiguous() const { return GetRanges() <= 1; } + + }; + +} diff --git a/aegilib/include/aegilib/utils.h b/aegilib/include/aegilib/utils.h index 378025e27..19ab0e73f 100644 --- a/aegilib/include/aegilib/utils.h +++ b/aegilib/include/aegilib/utils.h @@ -96,4 +96,8 @@ namespace Gorgonsub { const wxChar *StringPtrTrim(wxChar *str,size_t len,size_t start); const wxChar *StringTrim(wxString &str,size_t start); bool AsciiStringCompareNoCase(const wxString &str1,const wxChar *str2); + + // Unicode routines + size_t GetUTF8Len(const wchar_t *utf16); + size_t UTF16toUTF8(const wchar_t *utf16,char *utf8); } diff --git a/aegilib/src/action.cpp b/aegilib/src/action.cpp index 0ef58f2a1..86db76105 100644 --- a/aegilib/src/action.cpp +++ b/aegilib/src/action.cpp @@ -116,11 +116,11 @@ void ActionRemove::Execute(Model &model) //////////////// // Constructors -ActionModify::ActionModify(shared_ptr data,int line,const String &sName) -: entry(data), lineNumber(line), section(sName) {} +ActionModify::ActionModify(shared_ptr data,int line,const String &sName,bool _noTextFields) +: entry(data), lineNumber(line), section(sName), noTextFields(_noTextFields) {} ActionModify::ActionModify(shared_ptr _delta,int line,const String &sName) -: delta(_delta), lineNumber(line), section(sName) {} +: delta(_delta), lineNumber(line), section(sName), noTextFields(false) {} ///////////////////////////////// @@ -135,7 +135,7 @@ ActionPtr ActionModify::GetAntiAction(const Model &model) const DeltaCoderPtr deltaCoder = oldEntry->GetDeltaCoder(); if (deltaCoder) { VoidPtr _delta; - if (entry) _delta = deltaCoder->EncodeDelta(entry,oldEntry); + if (entry) _delta = deltaCoder->EncodeDelta(entry,oldEntry,!noTextFields); else _delta = deltaCoder->EncodeReverseDelta(delta,oldEntry); return ActionPtr(new ActionModify(_delta,lineNumber,section)); } diff --git a/aegilib/src/actionlist.cpp b/aegilib/src/actionlist.cpp index acc1f5283..370ab71c1 100644 --- a/aegilib/src/actionlist.cpp +++ b/aegilib/src/actionlist.cpp @@ -126,7 +126,7 @@ SectionEntryPtr ActionList::ModifyLine(int position,const String section) { SectionPtr sect = model.GetSection(section); SectionEntryPtr entry = sect->GetEntry(position)->Clone(); - ActionPtr action = ActionPtr (new ActionModify(entry,position,section)); + ActionPtr action = ActionPtr (new ActionModify(entry,position,section,false)); AddAction(action); return entry; } diff --git a/aegilib/src/formats/format_ass_dialogue_delta.cpp b/aegilib/src/formats/format_ass_dialogue_delta.cpp index 8a300b7c8..a259d6992 100644 --- a/aegilib/src/formats/format_ass_dialogue_delta.cpp +++ b/aegilib/src/formats/format_ass_dialogue_delta.cpp @@ -45,7 +45,7 @@ using namespace Gorgonsub; //////////////////////////////////// // Encode delta between two entries -VoidPtr DialogueASSDeltaCoder::EncodeDelta(VoidPtr _from,VoidPtr _to) const +VoidPtr DialogueASSDeltaCoder::EncodeDelta(VoidPtr _from,VoidPtr _to,bool withTextFields) const { // Cast pointers shared_ptr from = static_pointer_cast (_from); @@ -59,7 +59,7 @@ VoidPtr DialogueASSDeltaCoder::EncodeDelta(VoidPtr _from,VoidPtr _to) const if (from->time[1] != to->time[1]) mask |= 0x0008; for (size_t i=0;i<4;i++) { if (from->margin[i] != to->margin[i]) mask |= 0x0010 << i; - if (from->text[i] != to->text[i]) mask |= 0x0100 << i; + if (withTextFields && from->text[i] != to->text[i]) mask |= 0x0100 << i; } // Calculate final size and allocate diff --git a/aegilib/src/formats/format_ass_dialogue_delta.h b/aegilib/src/formats/format_ass_dialogue_delta.h index fce79c17f..51c3392da 100644 --- a/aegilib/src/formats/format_ass_dialogue_delta.h +++ b/aegilib/src/formats/format_ass_dialogue_delta.h @@ -45,7 +45,7 @@ namespace Gorgonsub { void GetDelta(int mask,char *dst,shared_ptr to) const; public: - VoidPtr EncodeDelta(VoidPtr from,VoidPtr to) const; + VoidPtr EncodeDelta(VoidPtr from,VoidPtr to,bool withTextFields=true) const; VoidPtr EncodeReverseDelta(VoidPtr from,VoidPtr to) const; void ApplyDelta(VoidPtr delta,VoidPtr object) const; }; diff --git a/aegilib/src/selection.cpp b/aegilib/src/selection.cpp new file mode 100644 index 000000000..fd918e517 --- /dev/null +++ b/aegilib/src/selection.cpp @@ -0,0 +1,95 @@ +// Copyright (c) 2008, Rodrigo Braz Monteiro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of the Aegisub Group nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ----------------------------------------------------------------------------- +// +// AEGISUB/GORGONSUB +// +// Website: http://www.aegisub.net +// Contact: mailto:amz@aegisub.net +// + +#include "selection.h" +using namespace Gorgonsub; + + +/////////////// +// Constructor +Selection::Selection() +: count(0) +{ +} + + +//////////////// +// Adds a range +void Selection::AddRange(const Range &range) +{ + // TODO + (void) range; +} + + +/////////////////// +// Removes a range +void Selection::RemoveRange(const Range &range) +{ + // TODO + (void) range; +} + + +////////////////// +// Get a specific +size_t Selection::GetLine(size_t n) const +{ + // Find the nth line + size_t cur = 0; + size_t len = ranges.size(); + for (size_t i=0;i n) return ranges[i].GetLine(n-ranges[i].GetStart()); + } + return ~0UL; +} + + +//////////////////////////// +// Append another selection +void Selection::AddSelection (const Selection ¶m) +{ + (void) param; +} + + +////////////////////////////// +// Subtract another selection +void Selection::RemoveSelection (const Selection ¶m) +{ + (void) param; +} + diff --git a/aegilib/src/text_file_writer.cpp b/aegilib/src/text_file_writer.cpp index d1eb9d001..3a84fcf88 100644 --- a/aegilib/src/text_file_writer.cpp +++ b/aegilib/src/text_file_writer.cpp @@ -38,6 +38,7 @@ // Headers #include #include "text_file_writer.h" +#include "utils.h" using namespace Gorgonsub; @@ -83,11 +84,20 @@ void TextFileWriter::WriteLineToFile(Gorgonsub::String line,bool addLineBreak) { // 8-bit else { - wxCharBuffer buf = temp.mb_str(*conv); - if (!buf.data()) - return; - size_t len = strlen(buf.data()); - file.Write(buf.data(),(std::streamsize)len); + if (encoding == _T("UTF-8")) { + const wchar_t* src = temp.c_str(); + //size_t len = GetUTF8Len(src); + size_t len = temp.Length() * 2 + 2; + if (buffer.size() < len) buffer.resize(len); + size_t toWrite = UTF16toUTF8(src,&buffer[0]); + file.Write(&buffer[0],(std::streamsize)toWrite); + } + else { + wxCharBuffer buf = temp.mb_str(*conv); + if (!buf.data()) return; + size_t len = strlen(buf.data()); + file.Write(buf.data(),(std::streamsize)len); + } } } diff --git a/aegilib/src/text_file_writer.h b/aegilib/src/text_file_writer.h index 9b30f63ba..d24d42651 100644 --- a/aegilib/src/text_file_writer.h +++ b/aegilib/src/text_file_writer.h @@ -44,6 +44,7 @@ namespace Gorgonsub { private: wxString encoding; wxOutputStream &file; + std::vector buffer; shared_ptr conv; bool Is16; diff --git a/aegilib/src/utils.cpp b/aegilib/src/utils.cpp index 1ca413943..56d0c2225 100644 --- a/aegilib/src/utils.cpp +++ b/aegilib/src/utils.cpp @@ -200,3 +200,86 @@ bool Gorgonsub::AsciiStringCompareNoCase(const wxString &str1,const wxChar *str2 // Equal strings return true; } + + +/////////////////////////////////////////////// +// Get the UTF-8 length out of a UTF-16 string +size_t Gorgonsub::GetUTF8Len(const wchar_t *utf16) +{ + size_t len = 0; + wchar_t curChar = utf16[0]; + for (size_t i=0;curChar;i++) { + // 1 byte + if ((curChar & 0xFF80) == 0) len++; + + // Surrogate pair UTF-16, 4 bytes + else if ((curChar & 0xFC00) == 0xD800) { + len += 4; + i++; + } + + // 3 bytes + else if (curChar & 0xF800) len += 3; + + // 2 bytes + else if (curChar & 0xFF80) len += 2; + + // Get next + curChar = utf16[i]; + } + + return len; +} + + +/////////////////////////// +// Convert UTF-16 to UTF-8 +size_t Gorgonsub::UTF16toUTF8(const wchar_t *utf16,char *utf8) +{ + wchar_t curChar = 1; + size_t value; + size_t written = 0; + for (size_t i=0;curChar;i++) { + // Get next + curChar = utf16[i]; + + // 1 byte + if ((curChar & 0xFF80) == 0) { + *utf8++ = char(curChar); + if (curChar == 0) break; + written++; + } + + // 2 bytes + else if ((curChar & 0xF800) == 0) { + *utf8++ = char(((curChar & 0x07C0) >> 6) | 0xC0); + *utf8++ = char((curChar & 0x003F) | 0x80); + written += 2; + } + + // Surrogate pair UTF-16 + else if ((curChar & 0xFC00) == 0xD800) { + // Read + value = (curChar - 0xD800) << 10; + i++; + value |= utf16[i] & 0x3FF; + + // Write + *utf8++ = char(((value & 0x1C0000) >> 18) | 0xF0); + *utf8++ = char(((value & 0x03F000) >> 12) | 0x80); + *utf8++ = char(((value & 0x000FC0) >> 6) | 0x80); + *utf8++ = char((value & 0x00003F) | 0x80); + written += 4; + } + + // 3 bytes + else if (curChar & 0xF800) { + *utf8++ = char(((curChar & 0xF000) >> 12) | 0xE0); + *utf8++ = char(((curChar & 0x0FC0) >> 6) | 0x80); + *utf8++ = char((curChar & 0x003F) | 0x80); + written += 3; + } + } + return written; +} + diff --git a/aegilib/test/src/main.cpp b/aegilib/test/src/main.cpp index 1624bb053..29d28d2f0 100644 --- a/aegilib/test/src/main.cpp +++ b/aegilib/test/src/main.cpp @@ -78,17 +78,11 @@ int main() line->SetText(L"Hi, testing insertion of lines!"); cout << "Done.\n"; - // Create action list - cout << "Processing actions... "; - timer.Start(); - ActionListPtr actions = control.CreateActionList(L"Insert line"); - //actions->InsertLine(line,2); - //actions->RemoveLine(3,L"Events"); + // Issue an action + ActionListPtr actions = control.CreateActionList(L"Test"); SectionEntryDialoguePtr diag = dynamic_pointer_cast (actions->ModifyLine(10,L"Events")); diag->SetText(L"Hay guise sup"); actions->Finish(); - timer.Pause(); - cout << "Done in " << timer.Time() << " ms.\n"; // Undo cout << "Undoing and redoing 1000 times... "; @@ -108,5 +102,15 @@ int main() cout << "\n\nException: " << e.what() << endl << endl; } + if (false) { + wchar_t myArray[] = { 0xD834, 0xDD1E, 0 }; + String str = wxString(myArray); + cout << "Length: " << str.Length() << ". Contents: " << str[0] << "," << str[1] << endl; + wxCharBuffer buf = str.mb_str(wxConvUTF8); + unsigned char *chr = (unsigned char *) buf.data(); + cout << "UTF-8 Length: " << strlen(buf) << ". Contents: " << (size_t)chr[0] << "," << (size_t)chr[1] << "," << (size_t)chr[2] << "," << (size_t)chr[3] << endl; + str = wxString(buf,wxConvUTF8); + cout << "Length: " << str.Length() << ". Contents: " << str[0] << "," << str[1] << endl; + } return true; }