Merge pull request #2668 from simong/tidy
Tidy HTML before trying to convert it with abiword
This commit is contained in:
commit
5615bab0d9
|
@ -90,6 +90,10 @@
|
|||
Abiword is needed to advanced import/export features of pads*/
|
||||
"abiword" : null,
|
||||
|
||||
/* This is the path to the Tidy executable. Setting it to null, disables Tidy.
|
||||
Tidy is used to improve the quality of exported pads*/
|
||||
"tidyHtml" : null,
|
||||
|
||||
/* Allow import of file types other than the supported types: txt, doc, docx, rtf, odt, html & htm */
|
||||
"allowUnknownFileEnds" : true,
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ var fs = require("fs");
|
|||
var settings = require('../utils/Settings');
|
||||
var os = require('os');
|
||||
var hooks = require("ep_etherpad-lite/static/js/pluginfw/hooks");
|
||||
var TidyHtml = require('../utils/TidyHtml');
|
||||
|
||||
//load abiword only if its enabled
|
||||
if(settings.abiword != null)
|
||||
|
@ -172,12 +173,19 @@ exports.doExport = function(req, res, padId, type)
|
|||
fs.writeFile(srcFile, html, callback);
|
||||
}
|
||||
},
|
||||
//send the convert job to abiword
|
||||
|
||||
// Tidy up the exported HTML
|
||||
function(callback)
|
||||
{
|
||||
//ensure html can be collected by the garbage collector
|
||||
html = null;
|
||||
|
||||
TidyHtml.tidy(srcFile, callback);
|
||||
},
|
||||
|
||||
//send the convert job to abiword
|
||||
function(callback)
|
||||
{
|
||||
destFile = tempDirectory + "/etherpad_export_" + randNum + "." + type;
|
||||
abiword.convertFile(srcFile, destFile, type, callback);
|
||||
},
|
||||
|
|
|
@ -152,6 +152,11 @@ exports.minify = true;
|
|||
*/
|
||||
exports.abiword = null;
|
||||
|
||||
/**
|
||||
* The path of the tidy executable
|
||||
*/
|
||||
exports.tidyHtml = null;
|
||||
|
||||
/**
|
||||
* Should we support none natively supported file types on import?
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Tidy up the HTML in a given file
|
||||
*/
|
||||
|
||||
var log4js = require('log4js');
|
||||
var settings = require('./Settings');
|
||||
var spawn = require('child_process').spawn;
|
||||
|
||||
exports.tidy = function(srcFile, callback) {
|
||||
var logger = log4js.getLogger('TidyHtml');
|
||||
|
||||
// Don't do anything if Tidy hasn't been enabled
|
||||
if (!settings.tidyHtml) {
|
||||
logger.debug('tidyHtml has not been configured yet, ignoring tidy request');
|
||||
return callback(null);
|
||||
}
|
||||
|
||||
var errMessage = '';
|
||||
|
||||
// Spawn a new tidy instance that cleans up the file inline
|
||||
logger.debug('Tidying ' + srcFile);
|
||||
var tidy = spawn(settings.tidyHtml, ['-modify', srcFile]);
|
||||
|
||||
// Keep track of any error messages
|
||||
tidy.stderr.on('data', function (data) {
|
||||
errMessage += data.toString();
|
||||
});
|
||||
|
||||
// Wait until Tidy is done
|
||||
tidy.on('close', function(code) {
|
||||
// Tidy returns a 0 when no errors occur and a 1 exit code when
|
||||
// the file could be tidied but a few warnings were generated
|
||||
if (code === 0 || code === 1) {
|
||||
logger.debug('Tidied ' + srcFile + ' successfully');
|
||||
return callback(null);
|
||||
} else {
|
||||
logger.error('Failed to tidy ' + srcFile + '\n' + errMessage);
|
||||
return callback('Tidy died with exit code ' + code);
|
||||
}
|
||||
});
|
||||
};
|
|
@ -0,0 +1,63 @@
|
|||
var assert = require('assert')
|
||||
fs = require('fs'),
|
||||
path = require('path'),
|
||||
TidyHtml = null,
|
||||
Settings = null;
|
||||
|
||||
var npm = require("../../../../src/node_modules/npm/lib/npm.js");
|
||||
|
||||
describe('tidyHtml', function() {
|
||||
before(function(done) {
|
||||
npm.load({}, function(err) {
|
||||
assert.ok(!err);
|
||||
TidyHtml = require('../../../../src/node/utils/TidyHtml');
|
||||
Settings = require('../../../../src/node/utils/Settings');
|
||||
return done()
|
||||
});
|
||||
});
|
||||
|
||||
it('Tidies HTML', function(done) {
|
||||
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
|
||||
if (!Settings.tidyHtml) {
|
||||
this.skip();
|
||||
}
|
||||
|
||||
// Try to tidy up a bad HTML file
|
||||
var tmpDir = process.env.TEMP || "/tmp";
|
||||
var tmpFile = path.join(tmpDir, 'tmp_' + (Math.floor(Math.random() * 1000000)) + '.html')
|
||||
fs.writeFileSync(tmpFile, '<html><body><p>a paragraph</p><li>List without outer UL</li>trailing closing p</p></body></html>');
|
||||
TidyHtml.tidy(tmpFile, function(err){
|
||||
assert.ok(!err);
|
||||
|
||||
// Read the file again
|
||||
var cleanedHtml = fs.readFileSync(tmpFile).toString();
|
||||
|
||||
var expectedHtml = [
|
||||
'<title></title>',
|
||||
'</head>',
|
||||
'<body>',
|
||||
'<p>a paragraph</p>',
|
||||
'<ul>',
|
||||
'<li>List without outer UL</li>',
|
||||
'<li style="list-style: none">trailing closing p</li>',
|
||||
'</ul>',
|
||||
'</body>',
|
||||
'</html>',
|
||||
].join('\n');
|
||||
assert.notStrictEqual(cleanedHtml.indexOf(expectedHtml), -1);
|
||||
return done();
|
||||
});
|
||||
});
|
||||
|
||||
it('can deal with errors', function(done) {
|
||||
// If the user hasn't configured Tidy, we skip this tests as it's required for this test
|
||||
if (!Settings.tidyHtml) {
|
||||
this.skip();
|
||||
}
|
||||
|
||||
TidyHtml.tidy('/some/none/existing/file.html', function(err) {
|
||||
assert.ok(err);
|
||||
return done();
|
||||
});
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue