Move uuencoding logic to libaegisub and add tests

This commit is contained in:
Thomas Goyne 2013-02-07 09:36:47 -08:00
parent be8790942a
commit 73217fd0e9
11 changed files with 212 additions and 80 deletions

View File

@ -33,17 +33,30 @@
<ForcedIncludeFiles>lagi_pre.h</ForcedIncludeFiles>
</ClCompile>
</ItemDefinitionGroup>
<!-- Source files -->
<ItemGroup>
<ClInclude Include="$(SrcDir)lagi_pre.h" />
<ClInclude Include="$(SrcDir)config.h" />
<ClInclude Include="$(SrcDir)common\charset_6937.h" />
<ClInclude Include="$(SrcDir)common\option_visit.h" />
<ClInclude Include="$(SrcDir)common\parser.h" />
<ClInclude Include="$(SrcDir)config.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\access.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\ass\dialogue_parser.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\ass\uuencode.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\background_runner.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\elements.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\reader.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\visitor.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\writer.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\calltip_provider.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\charset.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\charset_conv.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\charset_conv_win.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\color.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\dispatch.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\exception.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\fs.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\fs_fwd.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\hotkey.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\io.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\json.h" />
@ -52,68 +65,58 @@
<ClInclude Include="$(SrcDir)include\libaegisub\line_wrap.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\log.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\mru.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\of_type_adaptor.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\option.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\option_value.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\path.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\scoped_ptr.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\signal.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\spellchecker.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\split.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\thesaurus.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\time.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\util.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\util_osx.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\util_win.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\vfr.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\elements.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\reader.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\visitor.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\cajun\writer.h" />
<ClInclude Include="$(SrcDir)common\parser.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\ass\dialogue_parser.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\background_runner.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\color.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\spellchecker.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\calltip_provider.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\fs.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\of_type_adaptor.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\path.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\fs_fwd.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\dispatch.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\split.h" />
<ClInclude Include="$(SrcDir)include\libaegisub\time.h" />
<ClInclude Include="$(SrcDir)lagi_pre.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="$(SrcDir)windows\lagi_pre.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
<PrecompiledHeaderFile>lagi_pre.h</PrecompiledHeaderFile>
</ClCompile>
<ClCompile Include="$(SrcDir)ass\dialogue_parser.cpp" />
<ClCompile Include="$(SrcDir)ass\uuencode.cpp" />
<ClCompile Include="$(SrcDir)common\cajun\elements.cpp" />
<ClCompile Include="$(SrcDir)common\cajun\reader.cpp" />
<ClCompile Include="$(SrcDir)common\cajun\writer.cpp" />
<ClCompile Include="$(SrcDir)common\calltip_provider.cpp" />
<ClCompile Include="$(SrcDir)common\charset.cpp" />
<ClCompile Include="$(SrcDir)common\charset_6937.cpp" />
<ClCompile Include="$(SrcDir)common\charset_conv.cpp" />
<ClCompile Include="$(SrcDir)common\color.cpp" />
<ClCompile Include="$(SrcDir)common\dispatch.cpp" />
<ClCompile Include="$(SrcDir)common\fs.cpp" />
<ClCompile Include="$(SrcDir)common\hotkey.cpp" />
<ClCompile Include="$(SrcDir)common\io.cpp" />
<ClCompile Include="$(SrcDir)common\json.cpp" />
<ClCompile Include="$(SrcDir)common\keyframe.cpp" />
<ClCompile Include="$(SrcDir)common\log.cpp" />
<ClCompile Include="$(SrcDir)common\mru.cpp" />
<ClCompile Include="$(SrcDir)common\option.cpp" />
<ClCompile Include="$(SrcDir)common\option_visit.cpp" />
<ClCompile Include="$(SrcDir)common\parser.cpp" />
<ClCompile Include="$(SrcDir)common\path.cpp" />
<ClCompile Include="$(SrcDir)common\thesaurus.cpp" />
<ClCompile Include="$(SrcDir)common\util.cpp" />
<ClCompile Include="$(SrcDir)common\vfr.cpp" />
<ClCompile Include="$(SrcDir)windows\access.cpp" />
<ClCompile Include="$(SrcDir)windows\charset_conv_win.cpp" />
<ClCompile Include="$(SrcDir)windows\log_win.cpp" />
<ClCompile Include="$(SrcDir)windows\util_win.cpp" />
<ClCompile Include="$(SrcDir)ass\dialogue_parser.cpp" />
<ClCompile Include="$(SrcDir)common\color.cpp" />
<ClCompile Include="$(SrcDir)common\parser.cpp" />
<ClCompile Include="$(SrcDir)common\calltip_provider.cpp" />
<ClCompile Include="$(SrcDir)common\fs.cpp" />
<ClCompile Include="$(SrcDir)common\io.cpp" />
<ClCompile Include="$(SrcDir)common\path.cpp" />
<ClCompile Include="$(SrcDir)windows\fs.cpp" />
<ClCompile Include="$(SrcDir)windows\log_win.cpp" />
<ClCompile Include="$(SrcDir)windows\path_win.cpp" />
<ClCompile Include="$(SrcDir)common\dispatch.cpp" />
<ClCompile Include="$(SrcDir)windows\util_win.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="$(SrcDir)include\libaegisub\charsets.def" />

View File

@ -152,6 +152,9 @@
<ClInclude Include="$(SrcDir)include\libaegisub\time.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(SrcDir)include\libaegisub\ass\uuencode.h">
<Filter>ASS</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="$(SrcDir)windows\lagi_pre.cpp">
@ -250,6 +253,9 @@
<ClCompile Include="$(SrcDir)common\dispatch.cpp">
<Filter>Source Files\Common</Filter>
</ClCompile>
<ClCompile Include="$(SrcDir)ass\uuencode.cpp">
<Filter>ASS</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="$(SrcDir)include\libaegisub\charsets.def">

View File

@ -56,6 +56,7 @@
<ClCompile Include="$(SrcDir)tests\syntax_highlight.cpp" />
<ClCompile Include="$(SrcDir)tests\thesaurus.cpp" />
<ClCompile Include="$(SrcDir)tests\util.cpp" />
<ClCompile Include="$(SrcDir)tests\uuencode.cpp" />
<ClCompile Include="$(SrcDir)tests\vfr.cpp" />
<ClCompile Include="$(SrcDir)tests\word_split.cpp">
<ExcludedFromBuild>true</ExcludedFromBuild>

View File

@ -77,6 +77,9 @@
<ClCompile Include="$(SrcDir)tests\word_split.cpp">
<Filter>Tests</Filter>
</ClCompile>
<ClCompile Include="$(SrcDir)tests\uuencode.cpp">
<Filter>Tests</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(SrcDir)support\main.h">

View File

@ -13,6 +13,7 @@ unix/path.o: CXXFLAGS += -DP_DATA=\"$(P_DATA)\" -DP_DOC=\"$(P_DOC)\" -DP_LOCALE=
SRC += \
ass/dialogue_parser.cpp \
ass/uuencode.cpp \
common/cajun/elements.cpp \
common/cajun/reader.cpp \
common/cajun/writer.cpp \

View File

@ -0,0 +1,79 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include "../config.h"
#include "libaegisub/ass/uuencode.h"
// Despite being called uuencoding by ass_specs.doc, the format is actually
// somewhat different from real uuencoding. Each 3-byte chunk is split into 4
// 6-bit pieces, then 33 is added to each piece. Lines are wrapped after 80
// characters, and files with non-multiple-of-three lengths are padded with
// zero.
namespace agi { namespace ass {
std::vector<char> UUDecode(std::string const& str) {
std::vector<char> ret;
ret.reserve(str.size() * 3 / 4);
for(size_t pos = 0; pos + 1 < str.size(); pos += 4) {
size_t bytes = std::min<size_t>(str.size() - pos, 4);
unsigned char src[4] = { '\0', '\0', '\0', '\0' };
for (size_t i = 0; i < bytes; ++i)
src[i] = str[pos + i] - 33;
ret.push_back((src[0] << 2) | (src[1] >> 4));
if (bytes > 2)
ret.push_back(((src[1] & 0xF) << 4) | (src[2] >> 2));
if (bytes > 3)
ret.push_back(((src[2] & 0x3) << 6) | (src[3]));
}
return ret;
}
std::string UUEncode(std::vector<char> const& data) {
std::string ret;
ret.reserve((data.size() * 4 + 2) / 3 + data.size() / 80 * 2);
size_t written = 0;
for (size_t pos = 0; pos < data.size(); pos += 3) {
unsigned char src[3] = { '\0', '\0', '\0' };
memcpy(src, &data[pos], std::min<size_t>(3u, data.size() - pos));
unsigned char dst[4] = {
src[0] >> 2,
((src[0] & 0x3) << 4) | ((src[1] & 0xF0) >> 4),
((src[1] & 0xF) << 2) | ((src[2] & 0xC0) >> 6),
src[2] & 0x3F
};
for (size_t i = 0; i < std::min<size_t>(data.size() - pos + 1, 4u); ++i) {
ret += dst[i] + 33;
if (++written == 80 && pos + 3 < data.size()) {
written = 0;
ret += "\r\n";
}
}
}
return ret;
}
} }

View File

@ -0,0 +1,28 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include <string>
#include <vector>
namespace agi {
namespace ass {
/// Encode a blob of data, using ASS's nonstandard variant
std::string UUEncode(std::vector<char> const& data);
/// Decode an ASS uuencoded string which has had its newlines stripped
std::vector<char> UUDecode(std::string const& str);
}
}

View File

@ -36,14 +36,14 @@
#include "ass_attachment.h"
#include <libaegisub/ass/uuencode.h>
#include <libaegisub/io.h>
#include <boost/algorithm/string/predicate.hpp>
#include <fstream>
AssAttachment::AssAttachment(std::string const& name, AssEntryGroup group)
: data(new std::vector<char>)
, filename(name)
: filename(name)
, group(group)
{
}
@ -70,32 +70,9 @@ AssEntry *AssAttachment::Clone() const {
}
const std::string AssAttachment::GetEntryData() const {
size_t size = data->size();
size_t written = 0;
std::string entryData = (group == ENTRY_FONT ? "fontname: " : "filename: ") + filename + "\r\n";
entryData.reserve(size * 4 / 3 + size / 80 * 2 + entryData.size() + 2);
for (size_t pos = 0; pos < size; pos += 3) {
unsigned char src[3] = { '\0', '\0', '\0' };
memcpy(src, &(*data)[pos], std::min<size_t>(3u, size - pos));
unsigned char dst[4];
dst[0] = src[0] >> 2;
dst[1] = ((src[0] & 0x3) << 4) | ((src[1] & 0xF0) >> 4);
dst[2] = ((src[1] & 0xF) << 2) | ((src[2] & 0xC0) >> 6);
dst[3] = src[2] & 0x3F;
for (size_t i = 0; i < std::min<size_t>(size - pos + 1, 4u); ++i) {
entryData += dst[i] + 33;
if (++written == 80 && pos + 3 < size) {
written = 0;
entryData += "\r\n";
}
}
}
if (data)
entryData += agi::ass::UUEncode(*data);
return entryData;
}
@ -115,28 +92,7 @@ std::string AssAttachment::GetFileName(bool raw) const {
}
void AssAttachment::Finish() {
unsigned char src[4];
unsigned char dst[3];
data->reserve(buffer.size() * 3 / 4);
for(size_t pos = 0; pos + 1 < buffer.size(); ) {
size_t read = std::min<size_t>(buffer.size() - pos, 4);
// Move 4 bytes from buffer to src
for (size_t i = 0; i < read; ++i)
src[i] = (unsigned char)buffer[pos++] - 33;
for (size_t i = read; i < 4; ++i)
src[i] = 0;
// Convert the 4 bytes from source to 3 in dst
dst[0] = (src[0] << 2) | (src[1] >> 4);
dst[1] = ((src[1] & 0xF) << 4) | (src[2] >> 2);
dst[2] = ((src[2] & 0x3) << 6) | (src[3]);
copy(dst, dst + read - 1, back_inserter(*data));
}
data = std::make_shared<std::vector<char>>(agi::ass::UUDecode(buffer));
buffer.clear();
buffer.shrink_to_fit();
}

View File

@ -45,7 +45,7 @@ class AssAttachment : public AssEntry {
std::shared_ptr<std::vector<char>> data;
/// Encoded data which has been read from the script but not yet decoded
std::vector<char> buffer;
std::string buffer;
/// Name of the attached file, with SSA font mangling if it is a ttf
std::string filename;
@ -58,7 +58,7 @@ public:
/// Add a line of data (without newline) read from a subtitle file to the
/// buffer waiting to be decoded
void AddData(std::string const& data) { buffer.insert(buffer.end(), data.begin(), data.end()); }
void AddData(std::string const& data) { buffer += data; }
/// Decode all data passed with AddData
void Finish();

View File

@ -35,6 +35,7 @@ SRC = \
tests/syntax_highlight.cpp \
tests/thesaurus.cpp \
tests/util.cpp \
tests/uuencode.cpp \
tests/vfr.cpp \
tests/word_split.cpp \
${GTEST_ROOT}/src/gtest-all.cc

View File

@ -0,0 +1,54 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Aegisub Project http://www.aegisub.org/
#include <libaegisub/ass/uuencode.h>
#include "main.h"
#include <boost/algorithm/string/replace.hpp>
using namespace agi::ass;
TEST(lagi_uuencode, short_blobs) {
std::vector<char> data;
data.push_back(120);
EXPECT_STREQ("?!", UUEncode(data).c_str());
data.push_back(121);
EXPECT_STREQ("?(E", UUEncode(data).c_str());
data.push_back(122);
EXPECT_STREQ("?(F[", UUEncode(data).c_str());
}
TEST(lagi_uuencode, short_strings) {
std::vector<char> data;
data.push_back(120);
EXPECT_EQ(data, UUDecode("?!"));
data.push_back(121);
EXPECT_EQ(data, UUDecode("?(E"));
data.push_back(122);
EXPECT_EQ(data, UUDecode("?(F["));
}
TEST(lagi_uuencode, random_blobs_roundtrip) {
std::vector<char> data;
for (size_t len = 0; len < 200; ++len) {
EXPECT_EQ(data, UUDecode(boost::replace_all_copy(UUEncode(data), "\r\n", "")));
data.push_back(rand());
}
}