Merge pull request #1491 from ether/export-txt-with-indent

TXT Export Indentation on lists Edit
This commit is contained in:
Marcel Klehr 2013-02-18 12:13:06 -08:00
commit 4592397bd9
5 changed files with 504 additions and 151 deletions

View File

@ -146,12 +146,11 @@ exports.getPad = function(id, text, callback)
else else
{ {
pad = new Pad(id); pad = new Pad(id);
//initalize the pad //initalize the pad
pad.init(text, function(err) pad.init(text, function(err)
{ {
if(ERR(err, callback)) return; if(ERR(err, callback)) return;
globalPads.set(id, pad); globalPads.set(id, pad);
callback(null, pad); callback(null, pad);
}); });

View File

@ -20,6 +20,7 @@
var ERR = require("async-stacktrace"); var ERR = require("async-stacktrace");
var exporthtml = require("../utils/ExportHtml"); var exporthtml = require("../utils/ExportHtml");
var exporttxt = require("../utils/ExportTxt");
var exportdokuwiki = require("../utils/ExportDokuWiki"); var exportdokuwiki = require("../utils/ExportDokuWiki");
var padManager = require("../db/PadManager"); var padManager = require("../db/PadManager");
var async = require("async"); var async = require("async");
@ -48,22 +49,75 @@ exports.doExport = function(req, res, padId, type)
res.attachment(padId + "." + type); res.attachment(padId + "." + type);
//if this is a plain text export, we can do this directly //if this is a plain text export, we can do this directly
// We have to over engineer this because tabs are stored as attributes and not plain text
if(type == "txt") if(type == "txt")
{ {
padManager.getPad(padId, function(err, pad) var txt;
{ var randNum;
ERR(err); var srcFile, destFile;
if(req.params.rev){
pad.getInternalRevisionAText(req.params.rev, function(junk, text) async.series([
{ //render the txt document
res.send(text.text ? text.text : null); function(callback)
});
}
else
{ {
res.send(pad.text()); exporttxt.getPadTXTDocument(padId, req.params.rev, false, function(err, _txt)
{
if(ERR(err, callback)) return;
txt = _txt;
callback();
});
},
//decide what to do with the txt export
function(callback)
{
//if this is a txt export, we can send this from here directly
res.send(txt);
callback("stop");
},
//send the convert job to abiword
function(callback)
{
//ensure html can be collected by the garbage collector
txt = null;
destFile = tempDirectory + "/eplite_export_" + randNum + "." + type;
abiword.convertFile(srcFile, destFile, type, callback);
},
//send the file
function(callback)
{
res.sendfile(destFile, null, callback);
},
//clean up temporary files
function(callback)
{
async.parallel([
function(callback)
{
fs.unlink(srcFile, callback);
},
function(callback)
{
//100ms delay to accomidate for slow windows fs
if(os.type().indexOf("Windows") > -1)
{
setTimeout(function()
{
fs.unlink(destFile, callback);
}, 100);
}
else
{
fs.unlink(destFile, callback);
}
}
], callback);
} }
}); ], function(err)
{
if(err && err != "stop") ERR(err);
})
} }
else if(type == 'dokuwiki') else if(type == 'dokuwiki')
{ {

View File

@ -0,0 +1,87 @@
/**
* Helpers for export requests
*/
/*
* 2011 Peter 'Pita' Martischka (Primary Technology Ltd)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var async = require("async");
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
var padManager = require("../db/PadManager");
var ERR = require("async-stacktrace");
var Security = require('ep_etherpad-lite/static/js/security');
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
exports.getPadPlainText = function(pad, revNum){
var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext());
var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var apool = pad.pool();
var pieces = [];
for (var i = 0; i < textLines.length; i++){
var line = _analyzeLine(textLines[i], attribLines[i], apool);
if (line.listLevel){
var numSpaces = line.listLevel * 2 - 1;
var bullet = '*';
pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n');
}
else{
pieces.push(line.text, '\n');
}
}
return pieces.join('');
}
exports._analyzeLine = function(text, aline, apool){
var line = {};
// identify list
var lineMarker = 0;
line.listLevel = 0;
if (aline){
var opIter = Changeset.opIterator(aline);
if (opIter.hasNext()){
var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool);
if (listType){
lineMarker = 1;
listType = /([a-z]+)([12345678])/.exec(listType);
if (listType){
line.listTypeName = listType[1];
line.listLevel = Number(listType[2]);
}
}
}
}
if (lineMarker){
line.text = text.substring(1);
line.aline = Changeset.subattribution(aline, 1);
}
else{
line.text = text;
line.aline = aline;
}
return line;
}
exports._encodeWhitespace = function(s){
return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c){
return "&#" +c.charCodeAt(0) + ";"
});
}

View File

@ -21,31 +21,9 @@ var padManager = require("../db/PadManager");
var ERR = require("async-stacktrace"); var ERR = require("async-stacktrace");
var Security = require('ep_etherpad-lite/static/js/security'); var Security = require('ep_etherpad-lite/static/js/security');
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks'); var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
function getPadPlainText(pad, revNum) var getPadPlainText = require('./ExportHelper').getPadPlainText
{ var _analyzeLine = require('./ExportHelper')._analyzeLine;
var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : pad.atext()); var _encodeWhitespace = require('./ExportHelper')._encodeWhitespace;
var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var apool = pad.pool();
var pieces = [];
for (var i = 0; i < textLines.length; i++)
{
var line = _analyzeLine(textLines[i], attribLines[i], apool);
if (line.listLevel)
{
var numSpaces = line.listLevel * 2 - 1;
var bullet = '*';
pieces.push(new Array(numSpaces + 1).join(' '), bullet, ' ', line.text, '\n');
}
else
{
pieces.push(line.text, '\n');
}
}
return pieces.join('');
}
function getPadHTML(pad, revNum, callback) function getPadHTML(pad, revNum, callback)
{ {
@ -503,45 +481,6 @@ function getHTMLFromAtext(pad, atext, authorColors)
return pieces.join(''); return pieces.join('');
} }
function _analyzeLine(text, aline, apool)
{
var line = {};
// identify list
var lineMarker = 0;
line.listLevel = 0;
if (aline)
{
var opIter = Changeset.opIterator(aline);
if (opIter.hasNext())
{
var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool);
if (listType)
{
lineMarker = 1;
listType = /([a-z]+)([12345678])/.exec(listType);
if (listType)
{
line.listTypeName = listType[1];
line.listLevel = Number(listType[2]);
}
}
}
}
if (lineMarker)
{
line.text = text.substring(1);
line.aline = Changeset.subattribution(aline, 1);
}
else
{
line.text = text;
line.aline = aline;
}
return line;
}
exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback) exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback)
{ {
padManager.getPad(padId, function (err, pad) padManager.getPad(padId, function (err, pad)
@ -578,79 +517,6 @@ exports.getPadHTMLDocument = function (padId, revNum, noDocType, callback)
}); });
} }
function _encodeWhitespace(s) {
return s.replace(/[^\x21-\x7E\s\t\n\r]/g, function(c)
{
return "&#" +c.charCodeAt(0) + ";"
});
}
// copied from ACE
function _processSpaces(s)
{
var doesWrap = true;
if (s.indexOf("<") < 0 && !doesWrap)
{
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function (m)
{
parts.push(m);
});
if (doesWrap)
{
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
for (var i = parts.length - 1; i >= 0; i--)
{
var p = parts[i];
if (p == " ")
{
if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false;
beforeSpace = true;
}
else if (p.charAt(0) != "<")
{
endOfLine = false;
beforeSpace = false;
}
}
// beginning of line is nbsp
for (var i = 0; i < parts.length; i++)
{
var p = parts[i];
if (p == " ")
{
parts[i] = '&nbsp;';
break;
}
else if (p.charAt(0) != "<")
{
break;
}
}
}
else
{
for (var i = 0; i < parts.length; i++)
{
var p = parts[i];
if (p == " ")
{
parts[i] = '&nbsp;';
}
}
}
return parts.join('');
}
// copied from ACE // copied from ACE
var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/;
@ -676,3 +542,57 @@ function _findURLs(text)
return urls; return urls;
} }
// copied from ACE
function _processSpaces(s){
var doesWrap = true;
if (s.indexOf("<") < 0 && !doesWrap){
// short-cut
return s.replace(/ /g, '&nbsp;');
}
var parts = [];
s.replace(/<[^>]*>?| |[^ <]+/g, function (m){
parts.push(m);
});
if (doesWrap){
var endOfLine = true;
var beforeSpace = false;
// last space in a run is normal, others are nbsp,
// end of line is nbsp
for (var i = parts.length - 1; i >= 0; i--){
var p = parts[i];
if (p == " "){
if (endOfLine || beforeSpace) parts[i] = '&nbsp;';
endOfLine = false;
beforeSpace = true;
}
else if (p.charAt(0) != "<"){
endOfLine = false;
beforeSpace = false;
}
}
// beginning of line is nbsp
for (var i = 0; i < parts.length; i++){
var p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
break;
}
else if (p.charAt(0) != "<"){
break;
}
}
}
else
{
for (var i = 0; i < parts.length; i++){
var p = parts[i];
if (p == " "){
parts[i] = '&nbsp;';
}
}
}
return parts.join('');
}

293
src/node/utils/ExportTxt.js Normal file
View File

@ -0,0 +1,293 @@
/**
* TXT export
*/
/*
* 2013 John McLear
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var async = require("async");
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
var padManager = require("../db/PadManager");
var ERR = require("async-stacktrace");
var Security = require('ep_etherpad-lite/static/js/security');
var hooks = require('ep_etherpad-lite/static/js/pluginfw/hooks');
var getPadPlainText = require('./ExportHelper').getPadPlainText;
var _analyzeLine = require('./ExportHelper')._analyzeLine;
// This is slightly different than the HTML method as it passes the output to getTXTFromAText
function getPadTXT(pad, revNum, callback)
{
var atext = pad.atext;
var html;
async.waterfall([
// fetch revision atext
function (callback)
{
if (revNum != undefined)
{
pad.getInternalRevisionAText(revNum, function (err, revisionAtext)
{
if(ERR(err, callback)) return;
atext = revisionAtext;
callback();
});
}
else
{
callback(null);
}
},
// convert atext to html
function (callback)
{
html = getTXTFromAtext(pad, atext); // only this line is different to the HTML function
callback(null);
}],
// run final callback
function (err)
{
if(ERR(err, callback)) return;
callback(null, html);
});
}
exports.getPadTXT = getPadTXT;
// This is different than the functionality provided in ExportHtml as it provides formatting
// functionality that is designed specifically for TXT exports
function getTXTFromAtext(pad, atext, authorColors)
{
var apool = pad.apool();
var textLines = atext.text.slice(0, -1).split('\n');
var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text);
var tags = ['h1', 'h2', 'strong', 'em', 'u', 's'];
var props = ['heading1', 'heading2', 'bold', 'italic', 'underline', 'strikethrough'];
var anumMap = {};
var css = "";
props.forEach(function (propName, i)
{
var propTrueNum = apool.putAttrib([propName, true], true);
if (propTrueNum >= 0)
{
anumMap[propTrueNum] = i;
}
});
function getLineTXT(text, attribs)
{
var propVals = [false, false, false];
var ENTER = 1;
var STAY = 2;
var LEAVE = 0;
// Use order of tags (b/i/u) as order of nesting, for simplicity
// and decent nesting. For example,
// <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i>
// becomes
// <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i>
var taker = Changeset.stringIterator(text);
var assem = Changeset.stringAssembler();
var openTags = [];
var idx = 0;
function processNextChars(numChars)
{
if (numChars <= 0)
{
return;
}
var iter = Changeset.opIterator(Changeset.subattribution(attribs, idx, idx + numChars));
idx += numChars;
while (iter.hasNext())
{
var o = iter.next();
var propChanged = false;
Changeset.eachAttribNumber(o.attribs, function (a)
{
if (a in anumMap)
{
var i = anumMap[a]; // i = 0 => bold, etc.
if (!propVals[i])
{
propVals[i] = ENTER;
propChanged = true;
}
else
{
propVals[i] = STAY;
}
}
});
for (var i = 0; i < propVals.length; i++)
{
if (propVals[i] === true)
{
propVals[i] = LEAVE;
propChanged = true;
}
else if (propVals[i] === STAY)
{
propVals[i] = true; // set it back
}
}
// now each member of propVal is in {false,LEAVE,ENTER,true}
// according to what happens at start of span
if (propChanged)
{
// leaving bold (e.g.) also leaves italics, etc.
var left = false;
for (var i = 0; i < propVals.length; i++)
{
var v = propVals[i];
if (!left)
{
if (v === LEAVE)
{
left = true;
}
}
else
{
if (v === true)
{
propVals[i] = STAY; // tag will be closed and re-opened
}
}
}
var tags2close = [];
for (var i = propVals.length - 1; i >= 0; i--)
{
if (propVals[i] === LEAVE)
{
//emitCloseTag(i);
tags2close.push(i);
propVals[i] = false;
}
else if (propVals[i] === STAY)
{
//emitCloseTag(i);
tags2close.push(i);
}
}
for (var i = 0; i < propVals.length; i++)
{
if (propVals[i] === ENTER || propVals[i] === STAY)
{
propVals[i] = true;
}
}
// propVals is now all {true,false} again
} // end if (propChanged)
var chars = o.chars;
if (o.lines)
{
chars--; // exclude newline at end of line, if present
}
var s = taker.take(chars);
// removes the characters with the code 12. Don't know where they come
// from but they break the abiword parser and are completly useless
// s = s.replace(String.fromCharCode(12), "");
// remove * from s, it's just not needed on a blank line.. This stops
// plugins from being able to display * at the beginning of a line
// s = s.replace("*", ""); // Then remove it
assem.append(s);
} // end iteration over spans in line
var tags2close = [];
for (var i = propVals.length - 1; i >= 0; i--)
{
if (propVals[i])
{
tags2close.push(i);
propVals[i] = false;
}
}
} // end processNextChars
processNextChars(text.length - idx);
return(assem.toString());
} // end getLineHTML
var pieces = [css];
// Need to deal with constraints imposed on HTML lists; can
// only gain one level of nesting at once, can't change type
// mid-list, etc.
// People might use weird indenting, e.g. skip a level,
// so we want to do something reasonable there. We also
// want to deal gracefully with blank lines.
// => keeps track of the parents level of indentation
var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...]
for (var i = 0; i < textLines.length; i++)
{
var line = _analyzeLine(textLines[i], attribLines[i], apool);
var lineContent = getLineTXT(line.text, line.aline);
if(line.listTypeName == "bullet"){
lineContent = "* " + lineContent; // add a bullet
}
if(line.listLevel > 0){
for (var j = line.listLevel - 1; j >= 0; j--){
pieces.push('\t');
}
if(line.listTypeName == "number"){
pieces.push(line.listLevel + ". ");
// This is bad because it doesn't truly reflect what the user
// sees because browsers do magic on nested <ol><li>s
}
pieces.push(lineContent, '\n');
}else{
pieces.push(lineContent, '\n');
}
}
return pieces.join('');
}
exports.getTXTFromAtext = getTXTFromAtext;
exports.getPadTXTDocument = function (padId, revNum, noDocType, callback)
{
padManager.getPad(padId, function (err, pad)
{
if(ERR(err, callback)) return;
getPadTXT(pad, revNum, function (err, html)
{
if(ERR(err, callback)) return;
callback(null, html);
});
});
}