From f278c35f3f27c107e4c49c36c2a195cdf1bbe252 Mon Sep 17 00:00:00 2001 From: Niels Martin Hansen Date: Fri, 25 Apr 2014 16:04:08 +0200 Subject: [PATCH] Dynamically use either inline_string escaping or uuencoding for extradata Since luabins generates binary data which grows up to 3x by escaping, it's more efficient to uuencode that instead. A marker is placed as the first character of the value field, either 'e' for inline_string escaped text, or 'u' for uuencoded binary data. The key is always inline_string escaped, as it will typically be human readable. --- libaegisub/include/libaegisub/ass/uuencode.h | 4 ++-- src/ass_parser.cpp | 18 ++++++++++++++++-- src/subtitle_format_ass.cpp | 14 +++++++++++++- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/libaegisub/include/libaegisub/ass/uuencode.h b/libaegisub/include/libaegisub/ass/uuencode.h index fbc957c3d..b6b30e442 100644 --- a/libaegisub/include/libaegisub/ass/uuencode.h +++ b/libaegisub/include/libaegisub/ass/uuencode.h @@ -28,7 +28,7 @@ namespace agi { namespace ass { /// Encode a blob of data, using ASS's nonstandard variant template -std::string UUEncode(RandomAccessRange const& data) { +std::string UUEncode(RandomAccessRange const& data, bool insert_linebreaks=true) { using std::begin; using std::end; @@ -51,7 +51,7 @@ std::string UUEncode(RandomAccessRange const& data) { for (size_t i = 0; i < std::min(size - pos + 1, 4u); ++i) { ret += dst[i] + 33; - if (++written == 80 && pos + 3 < size) { + if (insert_linebreaks && ++written == 80 && pos + 3 < size) { written = 0; ret += "\r\n"; } diff --git a/src/ass_parser.cpp b/src/ass_parser.cpp index 478704eb5..4e2e9061b 100644 --- a/src/ass_parser.cpp +++ b/src/ass_parser.cpp @@ -22,6 +22,8 @@ #include "string_codec.h" #include "subtitle_format.h" +#include + #include #include #include @@ -115,13 +117,25 @@ void AssParser::ParseGraphicsLine(std::string const& data) { } void AssParser::ParseExtradataLine(std::string const &data) { - static const boost::regex matcher("Data:[[:space:]]*(\\d+),([^,]+),(.*)"); + static const boost::regex matcher("Data:[[:space:]]*(\\d+),([^,]+),(.)(.*)"); boost::match_results mr; if (boost::regex_match(data, mr, matcher)) { auto id = boost::lexical_cast(mr.str(1)); auto key = inline_string_decode(mr.str(2)); - auto value = inline_string_decode(mr.str(3)); + auto valuetype = mr.str(3); + auto value = mr.str(4); + if (valuetype == "e") { + // escaped/inline_string encoded + value = inline_string_decode(value); + } else if (valuetype == "u") { + // ass uuencoded + auto valuedata = agi::ass::UUDecode(value); + value = std::string(valuedata.begin(), valuedata.end()); + } else { + // unknown, error? + value = ""; + } // ensure next_extradata_id is always at least 1 more than the largest existing id target->next_extradata_id = std::max(id+1, target->next_extradata_id); diff --git a/src/subtitle_format_ass.cpp b/src/subtitle_format_ass.cpp index 3d5972a36..c5d1800df 100644 --- a/src/subtitle_format_ass.cpp +++ b/src/subtitle_format_ass.cpp @@ -27,6 +27,7 @@ #include "text_file_writer.h" #include "version.h" +#include #include DEFINE_SIMPLE_EXCEPTION(AssParseError, SubtitleFormatParseError, "subtitle_io/parse/ass") @@ -129,7 +130,18 @@ struct Writer { line += ","; line += inline_string_encode(edi.second.first); line += ","; - line += inline_string_encode(edi.second.second); + std::string encoded_data = inline_string_encode(edi.second.second); + if (4*edi.second.second.size() < 3*encoded_data.size()) { + // the inline_string encoding grew the data by more than uuencoding would + // so base64 encode it instead + line += "u"; // marker for uuencoding + encoded_data = agi::ass::UUEncode(edi.second.second, false); + printf("did uuencoding, original size=%lu, encoded size=%lu\n", edi.second.second.size(), encoded_data.size()); + line += encoded_data; + } else { + line += "e"; // marker for inline_string encoding (escaping) + line += encoded_data; + } file.WriteLineToFile(line); } }