feature: new chunk and write strategy

This commit is contained in:
Pitu 2021-01-07 02:11:53 +09:00
parent d2bf8ce8c8
commit 28efb0d9ed
8 changed files with 448 additions and 221 deletions

5
package-lock.json generated
View File

@ -4218,6 +4218,11 @@
}
}
},
"blake3": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/blake3/-/blake3-2.1.4.tgz",
"integrity": "sha512-70hmx0lPd6zmtNwxPT4/1P0pqaEUlTJ0noUBvCXPLfMpN0o8PPaK3q7ZlpRIyhrqcXxeMAJSowNm/L9oi/x1XA=="
},
"block-stream": {
"version": "0.0.9",
"resolved": "https://registry.npmjs.org/block-stream/-/block-stream-0.0.9.tgz",

View File

@ -38,6 +38,7 @@
"@nuxtjs/axios": "^5.4.1",
"adm-zip": "^0.4.13",
"bcrypt": "^5.0.0",
"blake3": "^2.1.4",
"body-parser": "^1.18.3",
"buefy": "^0.9.4",
"busboy": "^0.2.14",

View File

@ -1,99 +0,0 @@
const path = require('path');
const jetpack = require('fs-jetpack');
const randomstring = require('randomstring');
const Util = require('../../utils/Util');
const Route = require('../../structures/Route');
class uploadPOST extends Route {
constructor() {
super('/upload/chunks', 'post', {
bypassAuth: true,
canApiKey: true
});
}
async run(req, res, db) {
const user = await Util.isAuthorized(req);
if (!user && process.env.PUBLIC_MODE === 'false') return res.status(401).json({ message: 'Not authorized to use this resource' });
const filename = Util.getUniqueFilename(randomstring.generate(32));
// console.log('Files', req.body.files);
const info = {
size: req.body.files[0].size,
url: `${process.env.DOMAIN}/`
};
for (const chunk of req.body.files) {
const { uuid } = chunk;
// console.log('Chunk', chunk);
const chunkOutput = path.join(__dirname,
'../../../../',
process.env.UPLOAD_FOLDER,
'chunks',
uuid);
const chunkDir = await jetpack.list(chunkOutput);
const ext = path.extname(chunkDir[0]);
const output = path.join(__dirname,
'../../../../',
process.env.UPLOAD_FOLDER,
`${filename}${ext || ''}`);
chunkDir.sort();
// Save some data
info.name = `${filename}${ext || ''}`;
info.url += `${filename}${ext || ''}`;
info.data = chunk;
for (let i = 0; i < chunkDir.length; i++) {
const dir = path.join(__dirname,
'../../../../',
process.env.UPLOAD_FOLDER,
'chunks',
uuid,
chunkDir[i]);
const file = await jetpack.readAsync(dir, 'buffer');
await jetpack.appendAsync(output, file);
}
await jetpack.removeAsync(chunkOutput);
}
/*
If a file with the same hash and user is found, delete this
uploaded copy and return a link to the original
*/
info.hash = await Util.getFileHash(info.name);
let existingFile = await Util.checkIfFileExists(db, user, info.hash);
if (existingFile) {
existingFile = Util.constructFilePublicLink(existingFile);
res.json({
message: 'Successfully uploaded the file.',
name: existingFile.name,
hash: existingFile.hash,
size: existingFile.size,
url: `${process.env.DOMAIN}/${existingFile.name}`,
deleteUrl: `${process.env.DOMAIN}/api/file/${existingFile.id}`,
repeated: true
});
return Util.deleteFile(info.name);
}
// Otherwise generate thumbs and do the rest
Util.generateThumbnails(info.name);
const insertedId = await Util.saveFileToDatabase(req, res, user, db, info, {
originalname: info.data.original, mimetype: info.data.type
});
if (!insertedId) return res.status(500).json({ message: 'There was an error saving the file.' });
info.deleteUrl = `${process.env.DOMAIN}/api/file/${insertedId[0]}`;
Util.saveFileToAlbum(db, req.headers.albumid, insertedId);
delete info.chunk;
return res.status(201).send({
message: 'Sucessfully merged the chunk(s).',
...info
});
}
}
module.exports = uploadPOST;

View File

@ -1,41 +1,247 @@
const path = require('path');
const jetpack = require('fs-jetpack');
const multer = require('multer');
const moment = require('moment');
const Util = require('../../utils/Util');
const Route = require('../../structures/Route');
const multerStorage = require('../../utils/multerStorage');
const upload = multer({
storage: multer.memoryStorage(),
const chunksData = {};
const chunkedUploadsTimeout = 1800000;
const chunksDir = path.join(__dirname, '../../../../', process.env.UPLOAD_FOLDER, 'chunks');
const uploadDir = path.join(__dirname, '../../../../', process.env.UPLOAD_FOLDER);
class ChunksData {
constructor(uuid, root) {
this.uuid = uuid;
this.root = root;
this.filename = 'tmp';
this.chunks = 0;
this.stream = null;
this.hasher = null;
}
onTimeout() {
if (this.stream && !this.stream.writableEnded) {
this.stream.end();
}
if (this.hasher) {
this.hasher.dispose();
}
cleanUpChunks(this.uuid, true);
}
setTimeout(delay) {
this.clearTimeout();
this._timeout = setTimeout(this.onTimeout.bind(this), delay);
}
clearTimeout() {
if (this._timeout) {
clearTimeout(this._timeout);
}
}
}
const initChunks = async uuid => {
if (chunksData[uuid] === undefined) {
const root = path.join(chunksDir, uuid);
await jetpack.dirAsync(root);
chunksData[uuid] = new ChunksData(uuid, root);
}
chunksData[uuid].setTimeout(chunkedUploadsTimeout);
return chunksData[uuid];
};
const executeMulter = multer({
// Guide: https://github.com/expressjs/multer#limits
limits: {
fileSize: parseInt(process.env.MAX_SIZE, 10) * (1000 * 1000),
// Maximum number of non-file fields.
// Dropzone.js will add 6 extra fields for chunked uploads.
// We don't use them for anything else.
fields: 6,
// Maximum number of file fields.
// Chunked uploads still need to provide ONLY 1 file field.
// Otherwise, only one of the files will end up being properly stored,
// and that will also be as a chunk.
files: 1
},
fileFilter: (req, file, cb) =>
// TODO: Enable blacklisting of files/extensions
/*
if (options.blacklist.mimes.includes(file.mimetype)) {
return cb(new Error(`${file.mimetype} is a blacklisted filetype.`));
} else if (options.blacklist.extensions.some(ext => path.extname(file.originalname).toLowerCase() === ext)) {
return cb(new Error(`${path.extname(file.originalname).toLowerCase()} is a blacklisted extension.`));
fileFilter(req, file, cb) {
file.extname = Util.getExtension(file.originalname);
if (Util.isExtensionBlocked(file.extname)) {
return cb(`${file.extname ? `${file.extname.substr(1).toUpperCase()} files` : 'Files with no extension'} are not permitted.`);
}
*/
cb(null, true)
// Re-map Dropzone keys so people can manually use the API without prepending 'dz'
for (const key in req.body) {
if (!/^dz/.test(key)) continue;
req.body[key.replace(/^dz/, '')] = req.body[key];
delete req.body[key];
}
return cb(null, true);
},
storage: multerStorage({
destination(req, file, cb) {
// Is file a chunk!?
file._isChunk = req.body.uuid !== undefined && req.body.chunkindex !== undefined;
if (file._isChunk) {
initChunks(req.body.uuid)
.then(chunksData => {
file._chunksData = chunksData;
cb(null, chunksData.root);
})
.catch(error => {
console.error(error);
return cb('Could not process the chunked upload. Try again?');
});
} else {
return cb(null, uploadDir);
}
},
filename(req, file, cb) {
if (file._isChunk) {
return cb(null, chunksData[req.body.uuid].filename);
}
const name = Util.getUniqueFilename(file.extname);
if (name) return cb(null, name);
return cb('ERROR');
}
})
}).array('files[]');
/*
TODO: If source has transparency generate a png thumbnail, otherwise a jpg.
TODO: If source is a gif, generate a thumb of the first frame and play the gif on hover on the frontend.
const uploadFile = async (req, res) => {
const error = await new Promise(resolve => executeMulter(req, res, err => resolve(err)));
TODO: Think if its worth making a folder with the user uuid in uploads/ and upload the pictures there so
that this way at least not every single file will be in 1 directory
if (error) {
const suppress = [
'LIMIT_FILE_SIZE',
'LIMIT_UNEXPECTED_FILE'
];
if (suppress.includes(error.code)) {
throw error.toString();
} else {
throw error;
}
}
XXX: Now that the default behaviour is to serve files with node, we can actually pull this off.
Before this, having files in subfolders meant messing with nginx and the paths,
but now it should be fairly easy to re-arrange the folder structure with express.static
I see great value in this, open to suggestions.
*/
if (!req.files || !req.files.length) {
throw 'No files.'; // eslint-disable-line no-throw-literal
}
// If the uploaded file is a chunk then just say that it was a success
const uuid = req.body.uuid;
if (chunksData[uuid] !== undefined) {
req.files.forEach(file => {
chunksData[uuid].chunks++;
});
res.json({ success: true });
return;
}
const infoMap = req.files.map(file => ({
path: path.join(uploadDir, file.filename),
data: file
}));
return infoMap[0];
};
const finishChunks = async (req, res) => {
const check = file => typeof file.uuid !== 'string' ||
!chunksData[file.uuid] ||
chunksData[file.uuid].chunks < 2;
const files = req.body.files;
if (!Array.isArray(files) || !files.length || files.some(check)) {
throw 'An unexpected error occurred.'; // eslint-disable-line no-throw-literal
}
const infoMap = [];
try {
await Promise.all(files.map(async file => {
// Close stream
chunksData[file.uuid].stream.end();
/*
if (chunksData[file.uuid].chunks > maxChunksCount) {
throw 'Too many chunks.'; // eslint-disable-line no-throw-literal
}
*/
file.extname = typeof file.original === 'string' ? Util.getExtension(file.original) : '';
if (Util.isExtensionBlocked(file.extname)) {
throw `${file.extname ? `${file.extname.substr(1).toUpperCase()} files` : 'Files with no extension'} are not permitted.`; // eslint-disable-line no-throw-literal
}
file.size = chunksData[file.uuid].stream.bytesWritten;
// Double-check file size
const tmpfile = path.join(chunksData[file.uuid].root, chunksData[file.uuid].filename);
const lstat = await jetpack.inspect(tmpfile);
if (lstat.size !== file.size) {
throw `File size mismatched (${lstat.size} vs. ${file.size}).`; // eslint-disable-line no-throw-literal
}
// Generate name
const name = Util.getUniqueFilename(file.extname);
// Move tmp file to final destination
const destination = path.join(uploadDir, name);
await jetpack.move(tmpfile, destination);
const hash = chunksData[file.uuid].hasher.digest('hex');
// Continue even when encountering errors
await cleanUpChunks(file.uuid).catch(console.error);
const data = {
filename: name,
originalname: file.original || '',
extname: file.extname,
mimetype: file.type || '',
size: file.size,
hash
};
infoMap.push({ path: destination, data });
}));
return infoMap[0];
} catch (error) {
// Dispose unfinished hasher and clean up leftover chunks
// Should continue even when encountering errors
files.forEach(file => {
if (chunksData[file.uuid] === undefined) return;
try {
if (chunksData[file.uuid].hasher) {
chunksData[file.uuid].hasher.dispose();
}
} catch (_) {}
cleanUpChunks(file.uuid).catch(console.error);
});
// Re-throw error
throw error;
}
};
const cleanUpChunks = async (uuid, onTimeout) => {
// Remove tmp file
await jetpack.removeAsync(path.join(chunksData[uuid].root, chunksData[uuid].filename))
.catch(error => {
if (error.code !== 'ENOENT') console.error(error);
});
// Remove UUID dir
await jetpack.removeAsync(chunksData[uuid].root);
// Delete cached chunks data
if (!onTimeout) chunksData[uuid].clearTimeout();
delete chunksData[uuid];
};
class uploadPOST extends Route {
constructor() {
@ -48,108 +254,37 @@ class uploadPOST extends Route {
async run(req, res, db) {
const user = await Util.isAuthorized(req);
if (!user && process.env.PUBLIC_MODE === 'false') return res.status(401).json({ message: 'Not authorized to use this resource' });
const albumId = req.body.albumid || req.headers.albumid;
const { finishedchunks } = req.headers;
const albumId = req.headers.albumid ? req.headers.albumid === 'null' ? null : req.headers.albumid : null; // askjdhakjsdhkjhaskjdfhsadjkfghsadjkhgdfkjgh undefined or null as string
if (albumId && !user) return res.status(401).json({ message: 'Only registered users can upload files to an album' });
if (albumId && user) {
const album = await db.table('albums').where({ id: albumId, userId: user.id }).first();
if (!album) return res.status(401).json({ message: 'Album doesn\'t exist or it doesn\'t belong to the user' });
}
return upload(req, res, async err => {
if (err) console.error(err.message);
let uploadedFile = {};
let insertedId;
// eslint-disable-next-line no-underscore-dangle
const remappedKeys = this._remapKeys(req.body);
const file = req.files[0];
const ext = path.extname(file.originalname);
const hash = Util.generateFileHash(file.buffer);
const filename = Util.getUniqueFilename(file.originalname);
/*
First let's get the hash of the file. This will be useful to check if the file
has already been upload by either the user or an anonymous user.
In case this is true, instead of uploading it again we retrieve the url
of the file that is already saved and thus don't store extra copies of the same file.
For this we need to wait until we have a filename so that we can delete the uploaded file.
*/
const exists = await Util.checkIfFileExists(db, user, hash);
if (exists) return this.fileExists(res, exists, filename);
if (remappedKeys && remappedKeys.uuid) {
const chunkOutput = path.join(__dirname,
'../../../../',
process.env.UPLOAD_FOLDER,
'chunks',
remappedKeys.uuid,
`${remappedKeys.chunkindex.padStart(3, 0)}${ext || ''}`);
await jetpack.writeAsync(chunkOutput, file.buffer);
} else {
const output = path.join(__dirname,
'../../../../',
process.env.UPLOAD_FOLDER,
filename);
await jetpack.writeAsync(output, file.buffer);
uploadedFile = {
name: filename,
hash,
size: file.buffer.length,
url: filename
};
}
if (!remappedKeys || !remappedKeys.uuid) {
Util.generateThumbnails(uploadedFile.name);
insertedId = await Util.saveFileToDatabase(req, res, user, db, uploadedFile, file);
if (!insertedId) return res.status(500).json({ message: 'There was an error saving the file.' });
uploadedFile.deleteUrl = `${process.env.DOMAIN}/api/file/${insertedId[0]}`;
/*
If the upload had an album specified we make sure to create the relation
and update the according timestamps..
*/
Util.saveFileToAlbum(db, albumId, insertedId);
}
uploadedFile = Util.constructFilePublicLink(uploadedFile);
return res.status(201).send({
message: 'Sucessfully uploaded the file.',
...uploadedFile
});
});
}
fileExists(res, exists, filename) {
exists = Util.constructFilePublicLink(exists);
res.json({
message: 'Successfully uploaded the file.',
name: exists.name,
hash: exists.hash,
size: exists.size,
url: `${process.env.DOMAIN}/${exists.name}`,
deleteUrl: `${process.env.DOMAIN}/api/file/${exists.id}`,
repeated: true
});
return Util.deleteFile(filename);
}
_remapKeys(body) {
const keys = Object.keys(body);
if (keys.length) {
for (const key of keys) {
if (!/^dz/.test(key)) continue;
body[key.replace(/^dz/, '')] = body[key];
delete body[key];
}
return body;
let file;
if (finishedchunks) {
file = await finishChunks(req, res);
} else {
// If nothing is returned we assume it was a chunk ¯\_(ツ)_/¯
file = await uploadFile(req, res);
if (!file) return;
}
const result = await Util.storeFileToDb(req, res, user, file, db);
if (albumId) await Util.saveFileToAlbum(db, albumId, result.id);
result.deleteUrl = `${process.env.DOMAIN}/api/file/${result.id[0]}`;
return res.status(201).send({
message: 'Sucessfully uploaded the file.',
url: result.url,
name: result.file.name,
hash: result.file.hash,
deleteUrl: result.deleteUrl,
size: result.file.size
});
}
}

View File

@ -29,7 +29,7 @@ class Server {
this.server = express();
this.server.set('trust proxy', 1);
this.server.use(helmet());
this.server.use(cors({ allowedHeaders: ['Accept', 'Authorization', 'Cache-Control', 'X-Requested-With', 'Content-Type', 'albumId'] }));
this.server.use(cors({ allowedHeaders: ['Accept', 'Authorization', 'Cache-Control', 'X-Requested-With', 'Content-Type', 'albumId', 'finishedChunks'] }));
this.server.use((req, res, next) => {
// This bypasses the headers.accept for album download, since it's accesed directly through the browser.
if ((req.url.includes('/api/album/') || req.url.includes('/zip')) && req.method === 'GET') return next();

View File

@ -23,6 +23,7 @@ const log = require('./Log');
const ThumbUtil = require('./ThumbUtil');
const blockedExtensions = process.env.BLOCKED_EXTENSIONS.split(',');
const preserveExtensions = ['.tar.gz', '.tar.z', '.tar.bz2', '.tar.lzma', '.tar.lzo', '.tar.xz'];
class Util {
static uploadPath = path.join(__dirname, '../../../', process.env.UPLOAD_FOLDER);
@ -50,12 +51,12 @@ class Util {
return file;
}
static getUniqueFilename(name) {
static getUniqueFilename(extension) {
const retry = (i = 0) => {
const filename = randomstring.generate({
length: parseInt(process.env.GENERATED_FILENAME_LENGTH, 10),
capitalization: 'lowercase'
}) + path.extname(name).toLowerCase();
}) + extension;
// TODO: Change this to look for the file in the db instead of in the filesystem
const exists = jetpack.exists(path.join(Util.uploadPath, filename));
@ -280,6 +281,68 @@ class Util {
}
}
static async fileExists(res, exists, filename) {
exists = Util.constructFilePublicLink(exists);
res.json({
message: 'Successfully uploaded the file.',
name: exists.name,
hash: exists.hash,
size: exists.size,
url: `${process.env.DOMAIN}/${exists.name}`,
deleteUrl: `${process.env.DOMAIN}/api/file/${exists.id}`,
repeated: true
});
return this.deleteFile(filename);
}
static async storeFileToDb(req, res, user, file, db) {
const dbFile = await db.table('files')
.where(function() {
if (user === undefined) {
this.whereNull('userid');
} else {
this.where('userid', user.id);
}
})
.where({
hash: file.data.hash,
size: file.data.size
})
.first();
if (dbFile) {
await this.fileExists(res, dbFile, file.data.filename);
return;
}
const now = moment.utc().toDate();
const data = {
userId: user ? user.id : null,
name: file.data.filename,
original: file.data.originalname,
type: file.data.mimetype,
size: file.data.size,
hash: file.data.hash,
ip: req.ip,
createdAt: now,
editedAt: now
};
Util.generateThumbnails(file.data.filename);
let fileId;
if (process.env.DB_CLIENT === 'sqlite3') {
fileId = await db.table('files').insert(data);
} else {
fileId = await db.table('files').insert(data, 'id');
}
return {
file: data,
id: fileId
};
}
static async saveFileToAlbum(db, albumId, insertedId) {
if (!albumId) return;
@ -291,6 +354,36 @@ class Util {
console.error(error);
}
}
static getExtension(filename) {
// Always return blank string if the filename does not seem to have a valid extension
// Files such as .DS_Store (anything that starts with a dot, without any extension after) will still be accepted
if (!/\../.test(filename)) return '';
let lower = filename.toLowerCase(); // due to this, the returned extname will always be lower case
let multi = '';
let extname = '';
// check for multi-archive extensions (.001, .002, and so on)
if (/\.\d{3}$/.test(lower)) {
multi = lower.slice(lower.lastIndexOf('.') - lower.length);
lower = lower.slice(0, lower.lastIndexOf('.'));
}
// check against extensions that must be preserved
for (const extPreserve of preserveExtensions) {
if (lower.endsWith(extPreserve)) {
extname = extPreserve;
break;
}
}
if (!extname) {
extname = lower.slice(lower.lastIndexOf('.') - lower.length); // path.extname(lower)
}
return extname + multi;
}
}
module.exports = Util;

View File

@ -0,0 +1,91 @@
const fs = require('fs');
const path = require('path');
const blake3 = require('blake3');
const jetpack = require('fs-jetpack');
function DiskStorage(opts) {
this.getFilename = opts.filename;
if (typeof opts.destination === 'string') {
jetpack.dir(opts.destination);
this.getDestination = function($0, $1, cb) { cb(null, opts.destination); };
} else {
this.getDestination = opts.destination;
}
}
DiskStorage.prototype._handleFile = function _handleFile(req, file, cb) {
const that = this; // eslint-disable-line consistent-this
that.getDestination(req, file, (err, destination) => {
if (err) return cb(err);
that.getFilename(req, file, (err, filename) => {
if (err) return cb(err);
const finalPath = path.join(destination, filename);
const onerror = err => {
hash.dispose(); // eslint-disable-line no-use-before-define
cb(err);
};
let outStream;
let hash;
if (file._isChunk) {
if (!file._chunksData.stream) {
file._chunksData.stream = fs.createWriteStream(finalPath, { flags: 'a' });
file._chunksData.stream.on('error', onerror);
}
if (!file._chunksData.hasher) {
file._chunksData.hasher = blake3.createHash();
}
outStream = file._chunksData.stream;
hash = file._chunksData.hasher;
} else {
outStream = fs.createWriteStream(finalPath);
outStream.on('error', onerror);
hash = blake3.createHash();
}
file.stream.on('error', onerror);
file.stream.on('data', d => hash.update(d));
if (file._isChunk) {
file.stream.on('end', () => {
cb(null, {
destination,
filename,
path: finalPath
});
});
file.stream.pipe(outStream, { end: false });
} else {
outStream.on('finish', () => {
cb(null, {
destination,
filename,
path: finalPath,
size: outStream.bytesWritten,
hash: hash.digest('hex')
});
});
file.stream.pipe(outStream);
}
});
});
};
DiskStorage.prototype._removeFile = function _removeFile(req, file, cb) {
const path = file.path;
delete file.destination;
delete file.filename;
delete file.path;
fs.unlink(path, cb);
};
module.exports = function(opts) {
return new DiskStorage(opts);
};

View File

@ -121,7 +121,7 @@ export default {
chunking: true,
retryChunks: true,
retryChunksLimit: 3,
parallelChunkUploads: true,
parallelChunkUploads: false,
chunkSize: this.config.chunkSize * 1000000,
chunksUploaded: this.dropzoneChunksUploaded,
maxFilesize: this.config.maxFileSize,
@ -176,7 +176,7 @@ export default {
console.error(file, message, xhr);
},
async dropzoneChunksUploaded(file, done) {
const { data } = await this.$axios.post(`${this.config.baseURL}/upload/chunks`, {
const { data } = await this.$axios.post(`${this.config.baseURL}/upload`, {
files: [{
uuid: file.upload.uuid,
original: file.name,
@ -186,7 +186,8 @@ export default {
}]
}, {
headers: {
albumId: this.selectedAlbum ? this.selectedAlbum : null
albumId: this.selectedAlbum ? this.selectedAlbum : null,
finishedChunks: true
}
});