// Copyright (c) 2010, Thomas Goyne // // Permission to use, copy, modify, and distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice appear in all copies. // // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /// @file line_iterator.h /// @brief An iterator over lines in a stream /// @ingroup libaegisub #pragma once #include #include #include #include #include #include namespace agi { /// @class line_iterator /// @brief An iterator over lines in a stream template class line_iterator final : public std::iterator { std::istream *stream; ///< Stream to iterator over OutputType value; ///< Value to return when this is dereference std::shared_ptr conv; int cr; ///< CR character in the source encoding int lf; ///< LF character in the source encoding size_t width; ///< width of LF character in the source encoding /// @brief Convert a string to the output type /// @param str Line read from the file /// /// line_iterator users can either ensure that operator>> is defined for /// their desired output type or simply provide a specialization of this /// method which does the conversion. inline bool convert(std::string &str); /// Called after construction for specializations that need to do things void init() { }; /// @brief Get the next line from the stream /// @param[out] str String to fill with the next line void getline(std::string &str); /// @brief Get the next value from the stream void next(); public: /// @brief Constructor /// @param stream The stream to read from. The calling code is responsible /// for ensuring that the stream remains valid for the /// lifetime of the iterator and that it get cleaned up. /// @param encoding Encoding of the text read from the stream line_iterator(std::istream &stream, std::string encoding = "utf-8") : stream(&stream) , cr('\r') , lf('\n') , width(1) { if (boost::to_lower_copy(encoding) != "utf-8") { agi::charset::IconvWrapper c("utf-8", encoding.c_str()); c.Convert("\r", 1, reinterpret_cast(&cr), sizeof(int)); c.Convert("\n", 1, reinterpret_cast(&lf), sizeof(int)); width = c.RequiredBufferSize("\n"); conv = std::make_shared(encoding.c_str(), "utf-8"); } init(); ++(*this); } /// @brief Invalid iterator constructor; use for end iterator line_iterator() : stream(nullptr) { } /// @brief Copy constructor /// @param that line_iterator to copy from line_iterator(line_iterator const&) = default; OutputType const& operator*() const { return value; } OutputType const* operator->() const { return &value; } line_iterator& operator++() { next(); return *this; } line_iterator operator++(int) { line_iterator tmp(*this); ++*this; return tmp; } bool operator==(line_iterator const& rgt) const { return stream == rgt.stream; } bool operator!=(line_iterator const& rgt) const { return !operator==(rgt); } // typedefs needed by some stl algorithms typedef OutputType* pointer; typedef OutputType& reference; typedef const OutputType* const_pointer; typedef const OutputType& const_reference; line_iterator& operator=(line_iterator that) { using std::swap; swap(*this, that); return *this; } void swap(line_iterator &that) throw() { using std::swap; swap(stream, that.stream); swap(value, that.value); swap(conv, that.conv); swap(lf, that.lf); swap(cr, that.cr); swap(width, that.width); } }; // Enable range-based for template line_iterator& begin(line_iterator& it) { return it; } template line_iterator end(line_iterator&) { return agi::line_iterator(); } template void line_iterator::getline(std::string &str) { union { int32_t chr; char buf[4]; } u; for (;;) { u.chr = 0; std::streamsize read = stream->rdbuf()->sgetn(u.buf, width); if (read < (std::streamsize)width) { for (int i = 0; i < read; i++) { str += u.buf[i]; } stream->setstate(std::ios::eofbit); return; } if (u.chr == cr) continue; if (u.chr == lf) return; for (int i = 0; i < read; i++) { str += u.buf[i]; } } } template void line_iterator::next() { if (!stream) return; if (!stream->good()) { stream = nullptr; return; } std::string str, cstr, *target; if (width == 1) { std::getline(*stream, str); if (str.size() && str.back() == '\r') str.pop_back(); } else { getline(str); } if (conv.get()) { conv->Convert(str, cstr); target = &cstr; } else { target = &str; } if (!convert(*target)) next(); } template<> inline void line_iterator::next() { if (!stream) return; if (!stream->good()) { stream = nullptr; return; } std::string cstr; std::string *target = conv ? &cstr : &value; if (width == 1) { std::getline(*stream, *target); if (target->size() && target->back() == '\r') target->pop_back(); } else getline(*target); if (conv.get()) { value.clear(); conv->Convert(*target, value); } } template inline bool line_iterator::convert(std::string &str) { std::istringstream ss(str); ss >> value; return !ss.fail(); } template void swap(agi::line_iterator &lft, agi::line_iterator &rgt) { lft.swap(rgt); } }