cm3d2tool/source/cm3d2/arc.d

343 lines
8.8 KiB
D

module cm3d2.arc;
import std.file;
import std.mmfile;
import std.conv;
import std.stdio;
import std.zlib;
import dfuse.fuse;
import cm3d2;
class WarcFile
{
class DirectoryHashNode
{
private ulong _id;
public FileHashEntry[] subdirectoryEntries;
public FileHashEntry[] fileEntries;
private uint _depth;
private ulong[] _parents;
public DirectoryHashNode[] subdirectories;
@property
ulong id()
{
return _id;
}
this(ubyte[] data)
{
auto unknown1 = data.readInteger!uint();
auto unknown2 = data.readInteger!uint();
assert(unknown1 == 0x20 && unknown2 == 0x10, "Invalid directory descriptor");
_id = data.readInteger!ulong();
auto subdirectoryCount = data.readInteger!uint();
auto fileCount = data.readInteger!uint();
_depth = data.readInteger!uint();
auto unknown3 = data.readBytes(4);
subdirectoryEntries = new FileHashEntry[subdirectoryCount];
for (auto i = 0; i < subdirectoryCount; i++)
{
subdirectoryEntries[i] = FileHashEntry(data);
}
fileEntries = new FileHashEntry[fileCount];
for (auto i = 0; i < fileCount; i++)
{
fileEntries[i] = FileHashEntry(data);
}
_parents = new ulong[_depth];
for (auto i = 0; i < _depth; i++)
{
_parents[i] = data.readInteger!ulong();
}
subdirectories = new DirectoryHashNode[subdirectoryCount];
for (auto i = 0; i < subdirectoryCount; i++)
{
subdirectories[i] = new DirectoryHashNode(data);
}
}
}
struct FileHashEntry
{
ulong hash;
long offset;
this(ref ubyte[] data)
{
hash = data.readInteger!ulong();
offset = data.readInteger!long();
}
}
abstract class ArcEntry
{
public wstring name;
public ArcEntry parent;
public ArcEntry[] children;
this(ArcEntry parent)
{
this.parent = parent;
}
}
class DirectoryEntry : ArcEntry
{
this(ArcEntry parent = null)
{
super(parent);
}
}
class FileEntry : ArcEntry
{
private bool _deflated;
private uint _uncompressedSize;
private uint _compressedSize;
private ubyte[] _fileData;
this(ref ubyte[] data, ArcEntry parent = null)
{
super(parent);
_deflated = data.readInteger!uint() == 1;
auto unknown = data.readBytes(4);
if (unknown != [0, 0, 0, 0])
{
stderr.writeln("FileEntry.unknown: ", unknown);
}
_uncompressedSize = data.readInteger!uint();
_compressedSize = data.readInteger!uint();
assert(_deflated || _compressedSize == _uncompressedSize, "Uncompressed file entry with unequal sizes");
_fileData = data.readBytes(_compressedSize);
}
@property
ubyte[] data()
{
if (_deflated)
{
return cast(ubyte[]) uncompress(_fileData[], _uncompressedSize);
}
else
{
return _fileData[];
}
}
}
private MmFile _file;
private string _path;
private ubyte[] _header;
private ubyte[] _fileData;
private ubyte[] _footer;
private DirectoryHashNode _utf8HashTree;
private DirectoryHashNode _utf16HashTree;
private wstring[ulong] _nameLookupTable;
public ArcEntry[] entries;
this(MmFile file, string path)
{
_file = file;
_path = path;
_header = (cast(ubyte[]) _file[])[0 .. 28];
readHeader();
readFooter();
auto rootName = _nameLookupTable[_utf16HashTree.id];
stderr.writeln("root name: " ~ rootName);
ArcEntry[] getArcEntries(ArcEntry parent, DirectoryHashNode directoryHashNode)
{
ArcEntry[] entries;
foreach (fileHashEntry; directoryHashNode.fileEntries)
{
auto file = _fileData[fileHashEntry.offset .. $];
auto fileEntry = new FileEntry(file, parent);
fileEntry.name = _nameLookupTable[fileHashEntry.hash];
entries ~= fileEntry;
}
foreach (i, directoryHashEntry; directoryHashNode.subdirectoryEntries)
{
auto directoryEntry = new DirectoryEntry(parent);
directoryEntry.name = _nameLookupTable[directoryHashEntry.hash];
directoryEntry.children = getArcEntries(directoryEntry, directoryHashNode.subdirectories[i]);
entries ~= directoryEntry;
}
return entries;
}
entries = getArcEntries(null, _utf16HashTree);
}
private void readHeader()
{
auto data = _header[];
auto type = cast(string) data.readBytes(4);
assert(type == "warc", _path ~ ": invalid warc file");
auto unknown1 = data.readBytes(4);
assert(unknown1 == [0xFF, 0xAA, 0x45, 0xF1], _path ~ ": invalid warc file");
auto fileVersion = data.readInteger!uint();
assert(fileVersion == 1000, _path ~ ": unrecognised version (" ~ fileVersion.to!string
~ ")");
auto unknown2 = data.readInteger!uint();
assert(unknown2 == 4, _path ~ ": invalid warc file");
auto unknown3 = data.readInteger!uint();
assert(unknown3 == 2, _path ~ ": invalid warc file");
auto fileDataLength = data.readInteger!ulong();
_footer = (cast(ubyte[]) _file[])[_header.length + fileDataLength .. $];
_fileData = (cast(ubyte[]) _file[])[_header.length .. _header.length + fileDataLength];
assert(data.length == 0, _path ~ ": unexpected data at end of .arc header");
}
private void readFooter()
{
auto data = _footer[];
enum BlockType : uint
{
UTF16HashData = 0,
UTF8HashData = 1,
UTF16NameData = 3
}
while (_utf8HashTree is null || _utf16HashTree is null || _nameLookupTable == null)
{
auto blockType = data.readInteger!BlockType();
auto blockSize = data.readInteger!ulong();
auto blockData = data.readBytes(blockSize);
if (blockType == BlockType.UTF8HashData)
{
_utf8HashTree = new DirectoryHashNode(blockData);
}
else if (blockType == BlockType.UTF16HashData)
{
_utf16HashTree = new DirectoryHashNode(blockData);
}
else if (blockType == BlockType.UTF16NameData)
{
readNameLookupTable(new FileEntry(blockData).data);
}
else
{
assert(false, _path ~ ": unknown footer block type (" ~ blockType.to!string ~ ")");
}
}
assert(data.length == 0, _path ~ ": unexpected data at end of .arc footer");
}
private void readNameLookupTable(ubyte[] data)
{
_nameLookupTable = null;
while (data.length > 0)
{
auto hash = data.readInteger!ulong();
auto nameLength = data.readInteger!uint();
auto name = cast(wstring) data.readBytes(nameLength * 2);
stderr.writeln(hash, " => \"", name, "\"");
_nameLookupTable[hash] = name;
}
}
}
class WarpFile
{
private MmFile _file;
private string _path;
this(MmFile file, string path)
{
_file = file;
_path = path;
readHeader();
}
private void readHeader()
{
auto data = cast(ubyte[]) _file[];
auto type = cast(string) data.readBytes(4);
assert(type == "warp", "Invalid warp file: " ~ type);
}
}
class ArcFileSystem : Operations
{
private string[] _arcPaths;
private WarcFile[] _warcFiles;
private WarpFile[] _warpFiles;
this(string[] arcPaths)
{
foreach (path; arcPaths)
{
assert(path.exists);
stderr.write(path ~ ": ");
auto mmfile = new MmFile(path);
auto type = cast(string) mmfile[0 .. 4];
if (type == "warc")
{
stderr.writeln("warc");
_warcFiles ~= new WarcFile(mmfile, path);
}
else if (type == "warp")
{
stderr.writeln("warp");
_warpFiles ~= new WarpFile(mmfile, path);
}
else
{
assert(false, path ~ ": not a valid .arc file");
}
}
stderr.writeln(".arc files checked");
throw new Exception("Not implemented yet");
}
}