Optimize Extradata handling a bit

This commit is contained in:
Thomas Goyne 2014-09-06 09:16:44 -07:00
parent 4cf7cc819d
commit 58956b3ffc
6 changed files with 81 additions and 53 deletions

View File

@ -28,6 +28,8 @@
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem/path.hpp>
#include <cassert>
#include <unordered_map>
#include <unordered_set>
AssFile::AssFile() { }
@ -232,58 +234,79 @@ void AssFile::Sort(EntryList<AssDialogue> &lst, CompFunc comp, std::set<AssDialo
uint32_t AssFile::AddExtradata(std::string const& key, std::string const& value) {
for (auto const& data : Extradata) {
// perform brute-force deduplication by simple key and value comparison
if (key == data.second.first && value == data.second.second) {
return data.first;
if (key == data.key && value == data.value) {
return data.id;
}
}
// next_extradata_id must not exist
assert(Extradata.find(next_extradata_id) == Extradata.end());
Extradata[next_extradata_id] = {key, value};
Extradata.push_back(ExtradataEntry{next_extradata_id, key, value});
return next_extradata_id++; // return old value, then post-increment
}
std::map<std::string, std::string> AssFile::GetExtradata(std::vector<uint32_t> const& id_list) const {
// If multiple IDs have the same key name, the last ID wins
std::map<std::string, std::string> result;
for (auto id : id_list) {
auto it = Extradata.find(id);
if (it != Extradata.end())
result[it->second.first] = it->second.second;
namespace {
struct extradata_id_cmp {
bool operator()(ExtradataEntry const& e, uint32_t id) {
return e.id < id;
}
bool operator()(uint32_t id, ExtradataEntry const& e) {
return id < e.id;
}
};
template<typename ExtradataType, typename Func>
void enumerate_extradata(ExtradataType&& extradata, std::vector<uint32_t> const& id_list, Func&& f) {
auto begin = extradata.begin(), end = extradata.end();
for (auto id : id_list) {
auto it = lower_bound(begin, end, id, extradata_id_cmp{});
if (it != end) {
f(*it);
begin = it;
}
}
}
template<typename K, typename V>
using reference_map = std::unordered_map<std::reference_wrapper<const K>, V, std::hash<K>, std::equal_to<K>>;
}
std::vector<ExtradataEntry> AssFile::GetExtradata(std::vector<uint32_t> const& id_list) const {
std::vector<ExtradataEntry> result;
enumerate_extradata(Extradata, id_list, [&](ExtradataEntry const& e) {
result.push_back(e);
});
return result;
}
void AssFile::CleanExtradata() {
// Collect all IDs existing in the database
// Then remove all IDs found to be in use from this list
// Remaining is then all garbage IDs
std::vector<uint32_t> ids;
for (auto& it : Extradata)
ids.push_back(it.first);
if (ids.empty()) return;
if (Extradata.empty()) return;
// For each line, find which IDs it actually uses and remove them from the unused-list
std::unordered_set<uint32_t> ids_used;
for (auto& line : Events) {
if (line.ExtradataIds.get().empty()) continue;
// Find the ID for each unique key in the line
std::map<std::string, uint32_t> key_ids;
for (auto id : line.ExtradataIds.get()) {
auto ed_it = Extradata.find(id);
if (ed_it == Extradata.end())
continue;
key_ids[ed_it->second.first] = id;
reference_map<std::string, uint32_t> keys_used;
enumerate_extradata(Extradata, line.ExtradataIds.get(), [&](ExtradataEntry const& e) {
keys_used[e.key] = e.id;
});
for (auto const& used : keys_used)
ids_used.insert(used.second);
// If any keys were duplicated or missing, update the id list
if (keys_used.size() != line.ExtradataIds.get().size()) {
std::vector<uint32_t> ids;
ids.reserve(keys_used.size());
for (auto const& used : keys_used)
ids.push_back(used.second);
std::sort(begin(ids), end(ids));
line.ExtradataIds = std::move(ids);
}
// Update the line's ID list to only contain the actual ID for any duplicate keys
// Also mark found IDs as used in the cleaning list
std::vector<uint32_t> new_ids;
for (auto& keyid : key_ids) {
new_ids.push_back(keyid.second);
ids.erase(remove(begin(ids), end(ids), keyid.second), end(ids));
}
line.ExtradataIds = new_ids;
}
// The ids list should contain only unused IDs now
for (auto id : ids) {
Extradata.erase(id);
if (ids_used.size() != Extradata.size()) {
// Erase all no-longer-used extradata entries
Extradata.erase(std::remove_if(begin(Extradata), end(Extradata), [&](ExtradataEntry const& e) {
return !ids_used.count(e.id);
}), end(Extradata));
}
}

View File

@ -33,6 +33,7 @@
#include <libaegisub/signal.h>
#include <boost/intrusive/list.hpp>
#include <map>
#include <set>
#include <vector>
@ -45,7 +46,11 @@ class wxString;
template<typename T>
using EntryList = typename boost::intrusive::make_list<T, boost::intrusive::constant_time_size<false>, boost::intrusive::base_hook<AssEntryListHook>>::type;
using AegisubExtradataMap = std::map<uint32_t, std::pair<std::string, std::string>>;
struct ExtradataEntry {
uint32_t id;
std::string key;
std::string value;
};
struct AssFileCommit {
wxString const& message;
@ -83,7 +88,7 @@ public:
EntryList<AssStyle> Styles;
EntryList<AssDialogue> Events;
std::vector<AssAttachment> Attachments;
AegisubExtradataMap Extradata;
std::vector<ExtradataEntry> Extradata;
ProjectProperties Properties;
uint32_t next_extradata_id = 0;
@ -127,7 +132,7 @@ public:
/// @return ID of the created entry
uint32_t AddExtradata(std::string const& key, std::string const& value);
/// Fetch all extradata entries from a list of IDs
std::map<std::string, std::string> GetExtradata(std::vector<uint32_t> const& id_list) const;
std::vector<ExtradataEntry> GetExtradata(std::vector<uint32_t> const& id_list) const;
/// Remove unreferenced extradata entries
void CleanExtradata();
@ -199,4 +204,3 @@ public:
/// @param limit If non-empty, only lines in this set are sorted
static void Sort(EntryList<AssDialogue>& lst, CompFunc comp = CompStart, std::set<AssDialogue*> const& limit = std::set<AssDialogue*>());
};

View File

@ -219,7 +219,7 @@ void AssParser::ParseExtradataLine(std::string const &data) {
// ensure next_extradata_id is always at least 1 more than the largest existing id
target->next_extradata_id = std::max(id+1, target->next_extradata_id);
target->Extradata[id] = {key, value};
target->Extradata.push_back(ExtradataEntry{id, std::move(key), std::move(value)});
}
}

View File

@ -179,8 +179,8 @@ namespace Automation4 {
// create extradata table
lua_newtable(L);
for (auto const& ed : ass->GetExtradata(dia->ExtradataIds)) {
push_value(L, ed.first);
push_value(L, ed.second);
push_value(L, ed.key);
push_value(L, ed.value);
lua_settable(L, -3);
}
lua_setfield(L, -2, "extra");
@ -309,7 +309,8 @@ namespace Automation4 {
get_string_or_default(L, -2),
get_string_or_default(L, -1)));
});
dia->ExtradataIds = new_ids;
std::sort(begin(new_ids), end(new_ids));
dia->ExtradataIds = std::move(new_ids);
}
else {
error(L, "Found line with unknown class: %s", lclass.c_str());

View File

@ -57,7 +57,7 @@ struct SubsController::UndoInfo {
std::vector<AssStyle> styles;
std::vector<AssDialogueBase> events;
std::vector<AssAttachment> attachments;
AegisubExtradataMap extradata;
std::vector<ExtradataEntry> extradata;
mutable std::vector<int> selection;
int active_line_id = 0;

View File

@ -122,8 +122,8 @@ struct Writer {
file.WriteLineToFile(key + std::to_string(n));
}
void WriteExtradata(AegisubExtradataMap const& extradata) {
if (extradata.size() == 0)
void WriteExtradata(std::vector<ExtradataEntry> const& extradata) {
if (extradata.empty())
return;
group = AssEntryGroup::EXTRADATA;
@ -131,16 +131,16 @@ struct Writer {
file.WriteLineToFile("[Aegisub Extradata]");
for (auto const& edi : extradata) {
std::string line = "Data: ";
line += std::to_string(edi.first);
line += std::to_string(edi.id);
line += ",";
line += inline_string_encode(edi.second.first);
line += inline_string_encode(edi.key);
line += ",";
std::string encoded_data = inline_string_encode(edi.second.second);
if (4*edi.second.second.size() < 3*encoded_data.size()) {
std::string encoded_data = inline_string_encode(edi.value);
if (4*edi.value.size() < 3*encoded_data.size()) {
// the inline_string encoding grew the data by more than uuencoding would
// so base64 encode it instead
line += "u"; // marker for uuencoding
line += agi::ass::UUEncode(edi.second.second, false);
line += agi::ass::UUEncode(edi.value, false);
} else {
line += "e"; // marker for inline_string encoding (escaping)
line += encoded_data;