// Copyright (c) 2009, Karl Blomster // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of the Aegisub Group nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // Aegisub Project http://www.aegisub.org/ /// @file video_provider_yuv4mpeg.cpp /// @brief Video provider reading YUV4MPEG files directly without depending on external libraries /// @ingroup video_input /// #include "include/aegisub/video_provider.h" #include "utils.h" #include "video_frame.h" #include #include #include #include #include #include #include #include /// the maximum allowed header length, in bytes #define YUV4MPEG_HEADER_MAXLEN 128 namespace { /// @class YUV4MPEGVideoProvider /// @brief Implements reading of YUV4MPEG uncompressed video files class YUV4MPEGVideoProvider final : public VideoProvider { /// Pixel formats enum Y4M_PixelFormat { Y4M_PIXFMT_NONE = -1, /// not set/unknown /// 4:2:0 sampling variants. /// afaict the only difference between these three /// is the chroma sample location, and nobody cares about that. Y4M_PIXFMT_420JPEG, /// 4:2:0, H/V centered, for JPEG/MPEG-1 Y4M_PIXFMT_420MPEG2, /// 4:2:0, H cosited, for MPEG-2 Y4M_PIXFMT_420PALDV, /// 4:2:0, alternating Cb/Cr, for PAL-DV Y4M_PIXFMT_411, /// 4:1:1, H cosited Y4M_PIXFMT_422, /// 4:2:2, H cosited Y4M_PIXFMT_444, /// 4:4:4, i.e. no chroma subsampling Y4M_PIXFMT_444ALPHA, /// 4:4:4 plus alpha channel Y4M_PIXFMT_MONO /// luma only (grayscale) }; /// Interlacing mode for an entire stream enum Y4M_InterlacingMode { Y4M_ILACE_NOTSET = -1, /// undefined Y4M_ILACE_PROGRESSIVE, /// progressive (no interlacing) Y4M_ILACE_TFF, /// interlaced, top field first Y4M_ILACE_BFF, /// interlaced, bottom field first Y4M_ILACE_MIXED, /// mixed interlaced/progressive, possibly with RFF flags Y4M_ILACE_UNKNOWN /// unknown interlacing mode (not the same as undefined) }; /// Frame information flags enum Y4M_FrameFlags { Y4M_FFLAG_NOTSET = -1, /// undefined Y4M_FFLAG_NONE = 0x0000, /// no flags set /// field order/repeat field flags Y4M_FFLAG_R_TFF = 0x0001, /// top field first Y4M_FFLAG_R_TFF_R = 0x0002, /// top field first, and repeat that field Y4M_FFLAG_R_BFF = 0x0004, /// bottom field first Y4M_FFLAG_R_BFF_R = 0x0008, /// bottom field first, and repeat that field Y4M_FFLAG_R_P = 0x0010, /// progressive Y4M_FFLAG_R_P_R = 0x0020, /// progressive, and repeat frame once Y4M_FFLAG_R_P_RR = 0x0040, /// progressive, and repeat frame twice /// temporal sampling flags Y4M_FFLAG_T_P = 0x0080, /// progressive (fields sampled at the same time) Y4M_FFLAG_T_I = 0x0100, /// interlaced (fields sampled at different times) /// chroma subsampling flags Y4M_FFLAG_C_P = 0x0200, /// progressive (whole frame subsampled) Y4M_FFLAG_C_I = 0x0400, /// interlaced (fields subsampled independently) Y4M_FFLAG_C_UNKNOWN = 0x0800 /// unknown (only allowed for non-4:2:0 sampling) }; agi::read_file_mapping file; bool inited = false; /// initialization state int w = 0, h = 0; /// frame width/height int num_frames = -1; /// length of file in frames int frame_sz; /// size of each frame in bytes int luma_sz; /// size of the luma plane of each frame, in bytes int chroma_sz; /// size of one of the two chroma planes of each frame, in bytes Y4M_PixelFormat pixfmt = Y4M_PIXFMT_NONE; /// colorspace/pixel format Y4M_InterlacingMode imode = Y4M_ILACE_NOTSET; /// interlacing mode (for the entire stream) struct { int num = -1; /// numerator int den = 1; /// denominator } fps_rat; /// framerate agi::vfr::Framerate fps; agi::ycbcr_converter conv{agi::ycbcr_matrix::bt601, agi::ycbcr_range::tv}; /// a list of byte positions detailing where in the file /// each frame header can be found std::vector seek_table; void ParseFileHeader(const std::vector& tags); Y4M_FrameFlags ParseFrameHeader(const std::vector& tags); std::vector ReadHeader(uint64_t &startpos); int IndexFile(uint64_t pos); public: YUV4MPEGVideoProvider(agi::fs::path const& filename); void GetFrame(int n, VideoFrame &frame) override; void SetColorSpace(std::string const&) override { } int GetFrameCount() const override { return num_frames; } int GetWidth() const override { return w; } int GetHeight() const override { return h; } double GetDAR() const override { return 0; } agi::vfr::Framerate GetFPS() const override { return fps; } std::vector GetKeyFrames() const override { return {}; } std::string GetColorSpace() const override { return "TV.601"; } std::string GetDecoderName() const override { return "YU4MPEG"; } bool WantsCaching() const override { return true; } }; /// @brief Constructor /// @param filename The filename to open YUV4MPEGVideoProvider::YUV4MPEGVideoProvider(agi::fs::path const& filename) : file(filename) { if (file.size() < 10) throw VideoNotSupported("File is not a YUV4MPEG file (too small)"); if (strncmp("YUV4MPEG2 ", file.read(0, 10), 10)) throw VideoNotSupported("File is not a YUV4MPEG file (bad magic)"); uint64_t pos = 0; ParseFileHeader(ReadHeader(pos)); if (w <= 0 || h <= 0) throw VideoOpenError("Invalid resolution"); if (fps_rat.num <= 0 || fps_rat.den <= 0) { fps_rat.num = 25; fps_rat.den = 1; LOG_D("provider/video/yuv4mpeg") << "framerate info unavailable, assuming 25fps"; } if (pixfmt == Y4M_PIXFMT_NONE) pixfmt = Y4M_PIXFMT_420JPEG; if (imode == Y4M_ILACE_NOTSET) imode = Y4M_ILACE_UNKNOWN; luma_sz = w * h; switch (pixfmt) { case Y4M_PIXFMT_420JPEG: case Y4M_PIXFMT_420MPEG2: case Y4M_PIXFMT_420PALDV: chroma_sz = (w * h) >> 2; break; default: /// @todo add support for more pixel formats throw VideoOpenError("Unsupported pixel format"); } frame_sz = luma_sz + chroma_sz*2; num_frames = IndexFile(pos); if (num_frames <= 0 || seek_table.empty()) throw VideoOpenError("Unable to determine file length"); } /// @brief Read a frame or file header at a given file position /// @param startpos The byte offset at where to start reading /// @return A list of parameters std::vector YUV4MPEGVideoProvider::ReadHeader(uint64_t &pos) { std::vector tags; if (pos >= file.size()) return tags; auto len = std::min(YUV4MPEG_HEADER_MAXLEN, file.size() - pos); auto buff = file.read(pos, len); // read header until terminating newline (0x0A) is found auto curtag = buff; auto end = buff + len; for (; buff < end && *buff != 0x0A; ++buff, ++pos) { if (*buff == 0) throw VideoOpenError("ReadHeader: Malformed header (unexpected NUL)"); if (*buff == 0x20) { if (curtag != buff) tags.emplace_back(curtag, buff); curtag = buff + 1; } } if (buff == end) throw VideoOpenError("ReadHeader: Malformed header (no terminating newline found)"); // if only one tag with no trailing space was found (possible in the // FRAME header case), make sure we get it if (curtag != buff) tags.emplace_back(curtag, buff); pos += 1; // Move past newline return tags; } /// @brief Parses a list of parameters and sets reader state accordingly /// @param tags The list of parameters to parse void YUV4MPEGVideoProvider::ParseFileHeader(const std::vector& tags) { if (tags.size() <= 1) throw VideoOpenError("ParseFileHeader: contentless header"); if (tags.front() != "YUV4MPEG2") throw VideoOpenError("ParseFileHeader: malformed header (bad magic)"); // temporary stuff int t_w = -1; int t_h = -1; int t_fps_num = -1; int t_fps_den = -1; Y4M_InterlacingMode t_imode = Y4M_ILACE_NOTSET; Y4M_PixelFormat t_pixfmt = Y4M_PIXFMT_NONE; for (unsigned i = 1; i < tags.size(); i++) { char type = tags[i][0]; std::string tag = tags[i].substr(1); const char *err = nullptr; if (type == 'W') { if (!agi::util::try_parse(tag, &t_w)) err = "invalid width"; } else if (type == 'H') { if (!agi::util::try_parse(tag, &t_h)) err = "invalid height"; } else if (type == 'F') { size_t pos = tag.find(':'); if (pos == tag.npos) err = "invalid framerate"; if (!agi::util::try_parse(tag.substr(0, pos), &t_fps_num) || !agi::util::try_parse(tag.substr(pos + 1), &t_fps_den)) err = "invalid framerate"; } else if (type == 'C') { // technically this should probably be case sensitive, // but being liberal in what you accept doesn't hurt boost::to_lower(tag); if (tag == "420") t_pixfmt = Y4M_PIXFMT_420JPEG; // is this really correct? else if (tag == "420jpeg") t_pixfmt = Y4M_PIXFMT_420JPEG; else if (tag == "420mpeg2") t_pixfmt = Y4M_PIXFMT_420MPEG2; else if (tag == "420paldv") t_pixfmt = Y4M_PIXFMT_420PALDV; else if (tag == "411") t_pixfmt = Y4M_PIXFMT_411; else if (tag == "422") t_pixfmt = Y4M_PIXFMT_422; else if (tag == "444") t_pixfmt = Y4M_PIXFMT_444; else if (tag == "444alpha") t_pixfmt = Y4M_PIXFMT_444ALPHA; else if (tag == "mono") t_pixfmt = Y4M_PIXFMT_MONO; else err = "invalid or unknown colorspace"; } else if (type == 'I') { boost::to_lower(tag); if (tag == "p") t_imode = Y4M_ILACE_PROGRESSIVE; else if (tag == "t") t_imode = Y4M_ILACE_TFF; else if (tag == "b") t_imode = Y4M_ILACE_BFF; else if (tag == "m") t_imode = Y4M_ILACE_MIXED; else if (tag == "?") t_imode = Y4M_ILACE_UNKNOWN; else err = "invalid or unknown interlacing mode"; } else LOG_D("provider/video/yuv4mpeg") << "Unparsed tag: " << tags[i]; if (err) throw VideoOpenError(err); } // The point of all this is to allow multiple YUV4MPEG2 headers in a single file // (can happen if you concat several files) as long as they have identical // header flags. The spec doesn't explicitly say you have to allow this, // but the "reference implementation" (mjpegtools) does, so I'm doing it too. if (inited) { const char *err = nullptr; if (t_w > 0 && t_w != w) err = "illegal width change"; if (t_h > 0 && t_h != h) err = "illegal height change"; if ((t_fps_num > 0 && t_fps_den > 0) && (t_fps_num != fps_rat.num || t_fps_den != fps_rat.den)) err = "illegal framerate change"; if (t_pixfmt != Y4M_PIXFMT_NONE && t_pixfmt != pixfmt) err = "illegal colorspace change"; if (t_imode != Y4M_ILACE_NOTSET && t_imode != imode) err = "illegal interlacing mode change"; if (err) throw VideoOpenError(err); } else { w = t_w; h = t_h; fps_rat.num = t_fps_num; fps_rat.den = t_fps_den; pixfmt = t_pixfmt != Y4M_PIXFMT_NONE ? t_pixfmt : Y4M_PIXFMT_420JPEG; imode = t_imode != Y4M_ILACE_NOTSET ? t_imode : Y4M_ILACE_UNKNOWN; fps = double(fps_rat.num) / fps_rat.den; inited = true; } } /// @brief Parses a frame header /// @param tags The list of parameters to parse /// @return The flags set, as a binary mask /// This function is currently unimplemented (it will always return Y4M_FFLAG_NONE). YUV4MPEGVideoProvider::Y4M_FrameFlags YUV4MPEGVideoProvider::ParseFrameHeader(const std::vector& tags) { if (tags.front() == "FRAME") return Y4M_FFLAG_NONE; /// @todo implement parsing of frame flags throw VideoOpenError("ParseFrameHeader: malformed frame header (bad magic)"); } /// @brief Indexes the file /// @return The number of frames found in the file /// This function goes through the file, finds and parses all file and frame headers, /// and creates a seek table that lists the byte positions of all frames so seeking /// can easily be done. int YUV4MPEGVideoProvider::IndexFile(uint64_t pos) { int framecount = 0; // the ParseFileHeader() call in LoadVideo() will already have read // the file header for us and set the seek position correctly while (true) { // continue reading headers until no more are found std::vector tags = ReadHeader(pos); if (tags.empty()) break; // no more headers Y4M_FrameFlags flags = Y4M_FFLAG_NOTSET; if (tags.front() == "YUV4MPEG2") { ParseFileHeader(tags); continue; } else if (tags.front() == "FRAME") flags = ParseFrameHeader(tags); if (flags == Y4M_FFLAG_NONE) { framecount++; seek_table.push_back(pos); pos += frame_sz; } else { /// @todo implement rff flags etc } } return framecount; } void YUV4MPEGVideoProvider::GetFrame(int n, VideoFrame &frame) { n = mid(0, n, num_frames - 1); int uv_width = w / 2; auto src_y = reinterpret_cast(file.read(seek_table[n], luma_sz + chroma_sz * 2)); auto src_u = src_y + luma_sz; auto src_v = src_u + chroma_sz; frame.data.resize(w * h * 4); unsigned char *dst = &frame.data[0]; for (int py = 0; py < h; ++py) { for (int px = 0; px < w / 2; ++px) { const uint8_t u = *src_u++; const uint8_t v = *src_v++; for (unsigned int i = 0; i < 2; ++i) { const uint8_t y = *src_y++; auto rgb = conv.ycbcr_to_rgb({{y, u, v}}); *dst++ = rgb[2]; *dst++ = rgb[1]; *dst++ = rgb[0]; *dst++ = 0; } } // Roll back u/v on even lines if (!(py & 1)) { src_u -= uv_width; src_v -= uv_width; } } frame.flipped = false; frame.width = w; frame.height = h; frame.pitch = w * 4; } } namespace agi { class BackgroundRunner; } std::unique_ptr CreateYUV4MPEGVideoProvider(agi::fs::path const& path, std::string const&, agi::BackgroundRunner *) { return agi::make_unique(path); }