2012-02-28 21:19:10 +01:00
|
|
|
|
2011-08-18 21:58:56 +02:00
|
|
|
var startTime = new Date().getTime();
|
|
|
|
var fs = require("fs");
|
|
|
|
var ueberDB = require("ueberDB");
|
|
|
|
var mysql = require("mysql");
|
|
|
|
var async = require("async");
|
2012-02-26 17:48:17 +01:00
|
|
|
var Changeset = require("ep_etherpad-lite/static/js/Changeset");
|
|
|
|
var randomString = require('ep_etherpad-lite/static/js/pad_utils').randomString;
|
|
|
|
var AttributePoolFactory = require("ep_etherpad-lite/static/js/AttributePoolFactory");
|
2011-08-18 21:58:56 +02:00
|
|
|
|
|
|
|
var settingsFile = process.argv[2];
|
|
|
|
var sqlOutputFile = process.argv[3];
|
|
|
|
|
|
|
|
//stop if the settings file is not set
|
|
|
|
if(!settingsFile || !sqlOutputFile)
|
|
|
|
{
|
|
|
|
console.error("Use: node convert.js $SETTINGSFILE $SQLOUTPUT");
|
|
|
|
process.exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
log("read settings file...");
|
|
|
|
//read the settings file and parse the json
|
|
|
|
var settings = JSON.parse(fs.readFileSync(settingsFile, "utf8"));
|
|
|
|
log("done");
|
|
|
|
|
|
|
|
log("open output file...");
|
|
|
|
var sqlOutput = fs.openSync(sqlOutputFile, "w");
|
|
|
|
var sql = "SET CHARACTER SET UTF8;\n" +
|
|
|
|
"CREATE TABLE IF NOT EXISTS `store` ( \n" +
|
|
|
|
"`key` VARCHAR( 100 ) NOT NULL , \n" +
|
|
|
|
"`value` LONGTEXT NOT NULL , \n" +
|
|
|
|
"PRIMARY KEY ( `key` ) \n" +
|
|
|
|
") ENGINE = INNODB;\n" +
|
|
|
|
"START TRANSACTION;\n\n";
|
|
|
|
fs.writeSync(sqlOutput, sql);
|
|
|
|
log("done");
|
|
|
|
|
|
|
|
//set setings for ep db
|
|
|
|
var etherpadDB= new mysql.Client();
|
|
|
|
etherpadDB.host = settings.etherpadDB.host;
|
|
|
|
etherpadDB.port = settings.etherpadDB.port;
|
|
|
|
etherpadDB.database = settings.etherpadDB.database;
|
|
|
|
etherpadDB.user = settings.etherpadDB.user;
|
|
|
|
etherpadDB.password = settings.etherpadDB.password;
|
|
|
|
|
|
|
|
//get the timestamp once
|
|
|
|
var timestamp = new Date().getTime();
|
|
|
|
|
|
|
|
var padIDs;
|
|
|
|
|
|
|
|
async.series([
|
|
|
|
//get all padids out of the database...
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
log("get all padIds out of the database...");
|
|
|
|
|
2011-08-24 14:31:02 +02:00
|
|
|
etherpadDB.query("SELECT ID FROM PAD_META", [], function(err, _padIDs)
|
2011-08-18 21:58:56 +02:00
|
|
|
{
|
|
|
|
padIDs = _padIDs;
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
log("done");
|
|
|
|
|
|
|
|
//create a queue with a concurrency 100
|
|
|
|
var queue = async.queue(function (padId, callback)
|
|
|
|
{
|
|
|
|
convertPad(padId, function(err)
|
|
|
|
{
|
|
|
|
incrementPadStats();
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
}, 100);
|
|
|
|
|
|
|
|
//set the step callback as the queue callback
|
|
|
|
queue.drain = callback;
|
|
|
|
|
|
|
|
//add the padids to the worker queue
|
|
|
|
for(var i=0,length=padIDs.length;i<length;i++)
|
|
|
|
{
|
|
|
|
queue.push(padIDs[i].ID);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
], function(err)
|
|
|
|
{
|
|
|
|
if(err) throw err;
|
|
|
|
|
|
|
|
//write the groups
|
|
|
|
var sql = "";
|
|
|
|
for(var proID in proID2groupID)
|
|
|
|
{
|
|
|
|
var groupID = proID2groupID[proID];
|
|
|
|
var subdomain = proID2subdomain[proID];
|
|
|
|
|
|
|
|
sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("group:" + groupID) + ", " + etherpadDB.escape(JSON.stringify(groups[groupID]))+ ");\n";
|
|
|
|
sql+="REPLACE INTO store VALUES (" + etherpadDB.escape("mapper2group:subdomain:" + subdomain) + ", " + etherpadDB.escape(groupID)+ ");\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
//close transaction
|
|
|
|
sql+="COMMIT;";
|
|
|
|
|
|
|
|
//end the sql file
|
|
|
|
fs.writeSync(sqlOutput, sql, undefined, "utf-8");
|
|
|
|
fs.closeSync(sqlOutput);
|
|
|
|
|
|
|
|
log("finished.");
|
|
|
|
process.exit(0);
|
|
|
|
});
|
|
|
|
|
|
|
|
function log(str)
|
|
|
|
{
|
|
|
|
console.log((new Date().getTime() - startTime)/1000 + "\t" + str);
|
|
|
|
}
|
|
|
|
|
|
|
|
var padsDone = 0;
|
|
|
|
|
|
|
|
function incrementPadStats()
|
|
|
|
{
|
|
|
|
padsDone++;
|
|
|
|
|
|
|
|
if(padsDone%100 == 0)
|
|
|
|
{
|
|
|
|
var averageTime = Math.round(padsDone/((new Date().getTime() - startTime)/1000));
|
|
|
|
log(padsDone + "/" + padIDs.length + "\t" + averageTime + " pad/s")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var proID2groupID = {};
|
|
|
|
var proID2subdomain = {};
|
|
|
|
var groups = {};
|
|
|
|
|
|
|
|
function convertPad(padId, callback)
|
|
|
|
{
|
|
|
|
var changesets = [];
|
|
|
|
var changesetsMeta = [];
|
|
|
|
var chatMessages = [];
|
|
|
|
var authors = [];
|
|
|
|
var apool;
|
|
|
|
var subdomain;
|
|
|
|
var padmeta;
|
|
|
|
|
|
|
|
async.series([
|
|
|
|
//get all needed db values
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
async.parallel([
|
|
|
|
//get the pad revisions
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT * FROM `PAD_CHAT_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_CHAT_META` WHERE ID=?)";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
2011-08-18 21:58:56 +02:00
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
//parse the pages
|
|
|
|
for(var i=0,length=results.length;i<length;i++)
|
|
|
|
{
|
|
|
|
parsePage(chatMessages, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
|
|
}
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the chat entries
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT * FROM `PAD_REVS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVS_META` WHERE ID=?)";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
2011-08-18 21:58:56 +02:00
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
//parse the pages
|
|
|
|
for(var i=0,length=results.length;i<length;i++)
|
|
|
|
{
|
|
|
|
parsePage(changesets, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, false);
|
|
|
|
}
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the pad revisions meta data
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT * FROM `PAD_REVMETA_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_REVMETA_META` WHERE ID=?)";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
2011-08-18 21:58:56 +02:00
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
//parse the pages
|
|
|
|
for(var i=0,length=results.length;i<length;i++)
|
|
|
|
{
|
|
|
|
parsePage(changesetsMeta, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
|
|
}
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the attribute pool of this pad
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT `JSON` FROM `PAD_APOOL` WHERE `ID` = ?";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
|
|
|
{
|
|
|
|
apool=JSON.parse(results[0].JSON).x;
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the authors informations
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT * FROM `PAD_AUTHORS_TEXT` WHERE NUMID = (SELECT `NUMID` FROM `PAD_AUTHORS_META` WHERE ID=?)";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
2011-08-18 21:58:56 +02:00
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
//parse the pages
|
|
|
|
for(var i=0, length=results.length;i<length;i++)
|
|
|
|
{
|
|
|
|
parsePage(authors, results[i].PAGESTART, results[i].OFFSETS, results[i].DATA, true);
|
|
|
|
}
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the pad information
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
var sql = "SELECT JSON FROM `PAD_META` WHERE ID=?";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [padId], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
2011-08-24 14:31:02 +02:00
|
|
|
try
|
|
|
|
{
|
|
|
|
padmeta = JSON.parse(results[0].JSON).x;
|
|
|
|
}catch(e) {err = e}
|
2011-08-18 21:58:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
},
|
|
|
|
//get the subdomain
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
//skip if this is no proPad
|
|
|
|
if(padId.indexOf("$") == -1)
|
|
|
|
{
|
|
|
|
callback();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
//get the proID out of this padID
|
|
|
|
var proID = padId.split("$")[0];
|
|
|
|
|
|
|
|
var sql = "SELECT subDomain FROM pro_domains WHERE ID = ?";
|
|
|
|
|
|
|
|
etherpadDB.query(sql, [proID], function(err, results)
|
|
|
|
{
|
|
|
|
if(!err)
|
|
|
|
{
|
|
|
|
subdomain = results[0].subDomain;
|
|
|
|
}
|
|
|
|
|
|
|
|
callback(err);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
], callback);
|
|
|
|
},
|
|
|
|
function(callback)
|
|
|
|
{
|
|
|
|
//saves all values that should be written to the database
|
|
|
|
var values = {};
|
|
|
|
|
|
|
|
//this is a pro pad, let's convert it to a group pad
|
|
|
|
if(padId.indexOf("$") != -1)
|
|
|
|
{
|
|
|
|
var padIdParts = padId.split("$");
|
|
|
|
var proID = padIdParts[0];
|
|
|
|
var padName = padIdParts[1];
|
|
|
|
|
|
|
|
var groupID
|
|
|
|
|
|
|
|
//this proID is not converted so far, do it
|
|
|
|
if(proID2groupID[proID] == null)
|
|
|
|
{
|
|
|
|
groupID = "g." + randomString(16);
|
|
|
|
|
|
|
|
//create the mappers for this new group
|
|
|
|
proID2groupID[proID] = groupID;
|
|
|
|
proID2subdomain[proID] = subdomain;
|
|
|
|
groups[groupID] = {pads: {}};
|
|
|
|
}
|
|
|
|
|
|
|
|
//use the generated groupID;
|
|
|
|
groupID = proID2groupID[proID];
|
|
|
|
|
|
|
|
//rename the pad
|
|
|
|
padId = groupID + "$" + padName;
|
|
|
|
|
|
|
|
//set the value for this pad in the group
|
|
|
|
groups[groupID].pads[padId] = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
var newAuthorIDs = {};
|
|
|
|
var oldName2newName = {};
|
|
|
|
|
|
|
|
//replace the authors with generated authors
|
|
|
|
// we need to do that cause etherpad saves pad local authors, etherpad lite uses them global
|
|
|
|
for(var i in apool.numToAttrib)
|
|
|
|
{
|
|
|
|
var key = apool.numToAttrib[i][0];
|
|
|
|
var value = apool.numToAttrib[i][1];
|
|
|
|
|
|
|
|
//skip non authors and anonymous authors
|
|
|
|
if(key != "author" || value == "")
|
|
|
|
continue;
|
|
|
|
|
|
|
|
//generate new author values
|
|
|
|
var authorID = "a." + randomString(16);
|
|
|
|
var authorColorID = authors[i].colorId || Math.floor(Math.random()*32);
|
|
|
|
var authorName = authors[i].name || null;
|
|
|
|
|
|
|
|
//overwrite the authorID of the attribute pool
|
|
|
|
apool.numToAttrib[i][1] = authorID;
|
|
|
|
|
|
|
|
//write the author to the database
|
|
|
|
values["globalAuthor:" + authorID] = {"colorId" : authorColorID, "name": authorName, "timestamp": timestamp};
|
|
|
|
|
|
|
|
//save in mappers
|
|
|
|
newAuthorIDs[i] = authorID;
|
|
|
|
oldName2newName[value] = authorID;
|
|
|
|
}
|
|
|
|
|
|
|
|
//save all revisions
|
|
|
|
for(var i=0;i<changesets.length;i++)
|
|
|
|
{
|
|
|
|
values["pad:" + padId + ":revs:" + i] = {changeset: changesets[i],
|
|
|
|
meta : {
|
|
|
|
author: newAuthorIDs[changesetsMeta[i].a],
|
|
|
|
timestamp: changesetsMeta[i].t,
|
|
|
|
atext: changesetsMeta[i].atext || undefined
|
|
|
|
}};
|
|
|
|
}
|
|
|
|
|
|
|
|
//save all chat messages
|
|
|
|
for(var i=0;i<chatMessages.length;i++)
|
|
|
|
{
|
|
|
|
values["pad:" + padId + ":chat:" + i] = {"text": chatMessages[i].lineText,
|
|
|
|
"userId": oldName2newName[chatMessages[i].userId],
|
|
|
|
"time": chatMessages[i].time}
|
|
|
|
}
|
|
|
|
|
|
|
|
//generate the latest atext
|
|
|
|
var fullAPool = AttributePoolFactory.createAttributePool().fromJsonable(apool);
|
|
|
|
var keyRev = Math.floor(padmeta.head / padmeta.keyRevInterval) * padmeta.keyRevInterval;
|
|
|
|
var atext = changesetsMeta[keyRev].atext;
|
|
|
|
var curRev = keyRev;
|
|
|
|
while (curRev < padmeta.head)
|
|
|
|
{
|
|
|
|
curRev++;
|
|
|
|
var changeset = changesets[curRev];
|
|
|
|
atext = Changeset.applyToAText(changeset, atext, fullAPool);
|
|
|
|
}
|
|
|
|
|
|
|
|
values["pad:" + padId] = {atext: atext,
|
|
|
|
pool: apool,
|
|
|
|
head: padmeta.head,
|
|
|
|
chatHead: padmeta.numChatMessages }
|
|
|
|
|
|
|
|
}
|
|
|
|
catch(e)
|
|
|
|
{
|
|
|
|
console.error("Error while converting pad " + padId + ", pad skiped");
|
|
|
|
console.error(e.stack ? e.stack : JSON.stringify(e));
|
|
|
|
callback();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
var sql = "";
|
|
|
|
for(var key in values)
|
|
|
|
{
|
|
|
|
sql+="REPLACE INTO store VALUES (" + etherpadDB.escape(key) + ", " + etherpadDB.escape(JSON.stringify(values[key]))+ ");\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
fs.writeSync(sqlOutput, sql, undefined, "utf-8");
|
|
|
|
callback();
|
|
|
|
}
|
|
|
|
], callback);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This parses a Page like Etherpad uses them in the databases
|
|
|
|
* The offsets descripes the length of a unit in the page, the data are
|
|
|
|
* all values behind each other
|
|
|
|
*/
|
|
|
|
function parsePage(array, pageStart, offsets, data, json)
|
|
|
|
{
|
|
|
|
var start = 0;
|
|
|
|
var lengths = offsets.split(",");
|
|
|
|
|
|
|
|
for(var i=0;i<lengths.length;i++)
|
|
|
|
{
|
|
|
|
var unitLength = lengths[i];
|
|
|
|
|
|
|
|
//skip empty units
|
|
|
|
if(unitLength == "")
|
|
|
|
continue;
|
|
|
|
|
|
|
|
//parse the number
|
|
|
|
unitLength = Number(unitLength);
|
|
|
|
|
|
|
|
//cut the unit out of data
|
|
|
|
var unit = data.substr(start, unitLength);
|
|
|
|
|
|
|
|
//put it into the array
|
|
|
|
array[pageStart + i] = json ? JSON.parse(unit) : unit;
|
|
|
|
|
|
|
|
//update start
|
|
|
|
start+=unitLength;
|
|
|
|
}
|
|
|
|
}
|