Rewrite LuaScriptReader, making it simpler and adding support for more charsets

Originally committed to SVN as r5619.
This commit is contained in:
Thomas Goyne 2011-09-28 19:45:16 +00:00
parent 432abc309b
commit 0b6ef72242
2 changed files with 64 additions and 157 deletions

View File

@ -1,31 +1,16 @@
// Copyright (c) 2006, 2007, Niels Martin Hansen // Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
// All rights reserved.
// //
// Redistribution and use in source and binary forms, with or without // Permission to use, copy, modify, and distribute this software for any
// modification, are permitted provided that the following conditions are met: // purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
// //
// * Redistributions of source code must retain the above copyright notice, // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// this list of conditions and the following disclaimer. // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// * Redistributions in binary form must reproduce the above copyright notice, // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// this list of conditions and the following disclaimer in the documentation // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// and/or other materials provided with the distribution. // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// * Neither the name of the Aegisub Group nor the names of its contributors // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// may be used to endorse or promote products derived from this software // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Aegisub Project http://www.aegisub.org/
// //
// $Id$ // $Id$
@ -34,98 +19,50 @@
/// @ingroup scripting /// @ingroup scripting
/// ///
#include "config.h" #include "config.h"
#ifdef WITH_AUTO4_LUA #ifdef WITH_AUTO4_LUA
#include "auto4_lua_scriptreader.h" #include "auto4_lua_scriptreader.h"
#include <libaegisub/io.h>
#include <libaegisub/charset_conv.h>
#include "charset_detect.h"
#include "compat.h"
/// DOCME
namespace Automation4 { namespace Automation4 {
LuaScriptReader::LuaScriptReader(wxString const& filename)
: conv(new agi::charset::IconvWrapper(CharSetDetect::GetEncoding(filename).c_str(), "utf-8", false))
/// @brief // LuaScriptReader , file(agi::io::Open(STD_STR(filename)))
/// @param filename
///
LuaScriptReader::LuaScriptReader(const wxString &filename)
{ {
#ifdef WIN32
f = _wfopen(filename.wc_str(), L"rb");
#else
f = fopen(filename.fn_str(), "rb");
#endif
if (!f)
throw "Could not open script file";
first = true;
databuf = new char[bufsize];
} }
LuaScriptReader::~LuaScriptReader() { }
/// @brief DOCME const char *LuaScriptReader::Read(size_t *bytes_read) {
/// char in_buf[512];
LuaScriptReader::~LuaScriptReader() file->read(in_buf, sizeof(in_buf));
{
if (databuf) const char *in = in_buf;
delete[] databuf; char *out = buf;
fclose(f); size_t in_bytes = file->gcount();
size_t out_bytes = sizeof(buf);
conv->Convert(&in, &in_bytes, &out, &out_bytes);
*bytes_read = out - buf;
// Skip the bom
if (*bytes_read >= 3 && buf[0] == -17 && buf[1] == -69 && buf[2] == -65) {
*bytes_read -= 3;
return buf + 3;
}
return buf;
} }
const char* LuaScriptReader::reader_func(lua_State *, void *data, size_t *size) {
/// @brief DOCME return static_cast<LuaScriptReader*>(data)->Read(size);
/// @param L
/// @param data
/// @param size
///
const char* LuaScriptReader::reader_func(lua_State *L, void *data, size_t *size)
{
LuaScriptReader *self = (LuaScriptReader*)(data);
unsigned char *b = (unsigned char *)self->databuf;
FILE *f = self->f;
if (feof(f)) {
*size = 0;
return 0;
} }
if (self->first) {
// check if file is sensible and maybe skip bom
if ((*size = fread(b, 1, 4, f)) == 4) {
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF) {
// got an utf8 file with bom
// nothing further to do, already skipped the bom
fseek(f, -1, SEEK_CUR);
} else {
// oops, not utf8 with bom
// check if there is some other BOM in place and complain if there is...
if ((b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00) || // utf32be
(b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF) || // utf32le
(b[0] == 0xFF && b[1] == 0xFE) || // utf16be
(b[0] == 0xFE && b[1] == 0xFF) || // utf16le
(b[0] == 0x2B && b[1] == 0x2F && b[2] == 0x76) || // utf7
(b[0] == 0x00 && b[2] == 0x00) || // looks like utf16be
(b[1] == 0x00 && b[3] == 0x00)) { // looks like utf16le
throw "The script file uses an unsupported character set. Only UTF-8 is supported.";
}
// assume utf8 without bom, and rewind file
fseek(f, 0, SEEK_SET);
}
} else {
// hmm, rather short file this...
// doesn't have a bom, assume it's just ascii/utf8 without bom
return self->databuf; // *size is already set
}
self->first = false;
}
*size = fread(b, 1, bufsize, f);
return self->databuf;
}
}; };
#endif // WITH_AUTO4_LUA #endif // WITH_AUTO4_LUA

View File

@ -1,31 +1,16 @@
// Copyright (c) 2006, 2007, Niels Martin Hansen // Copyright (c) 2011, Thomas Goyne <plorkyeran@aegisub.org>
// All rights reserved.
// //
// Redistribution and use in source and binary forms, with or without // Permission to use, copy, modify, and distribute this software for any
// modification, are permitted provided that the following conditions are met: // purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
// //
// * Redistributions of source code must retain the above copyright notice, // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// this list of conditions and the following disclaimer. // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// * Redistributions in binary form must reproduce the above copyright notice, // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// this list of conditions and the following disclaimer in the documentation // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// and/or other materials provided with the distribution. // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// * Neither the name of the Aegisub Group nor the names of its contributors // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// may be used to endorse or promote products derived from this software // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Aegisub Project http://www.aegisub.org/
// //
// $Id$ // $Id$
@ -34,42 +19,27 @@
/// @ingroup scripting /// @ingroup scripting
/// ///
#ifndef AGI_PRE
#include <wx/string.h> #include <istream>
#ifdef __WINDOWS__
#include "../../contrib/lua51/src/lua.h"
#else
#include "lua.hpp"
#endif #endif
#include <libaegisub/scoped_ptr.h>
struct lua_State;
namespace agi { namespace charset { class IconvWrapper; } }
class wxString;
/// DOCME
namespace Automation4 { namespace Automation4 {
class LuaScriptReader {
agi::scoped_ptr<agi::charset::IconvWrapper> conv;
agi::scoped_ptr<std::istream> file;
char buf[512];
const char *Read(size_t *bytes_read);
/// DOCME public:
struct LuaScriptReader { LuaScriptReader(wxString const& filename);
/// DOCME
FILE *f;
/// DOCME
bool first;
/// DOCME
char *databuf;
/// DOCME
static const size_t bufsize = 512;
LuaScriptReader(const wxString &filename);
~LuaScriptReader(); ~LuaScriptReader();
static const char* reader_func(lua_State *L, void *data, size_t *size);
static const char* reader_func(lua_State *, void *data, size_t *size);
}; };
}; };