toolbox/extras/characterai-dumper/characterai-dumper.user.js

286 lines
10 KiB
JavaScript

// ==UserScript==
// @name CharacterAI Dumper
// @namespace Violentmonkey Scripts
// @match https://beta.character.ai/*
// @grant none
// @version 1.4
// @author 0x000011b
// @description Allows downloading saved chat messages and character definitions from CharacterAI.
// @downloadURL https://github.com/0x000011b/characterai-dumper/raw/master/characterai-dumper.user.js
// @updateURL https://github.com/0x000011b/characterai-dumper/raw/master/characterai-dumper.user.js
// ==/UserScript==
const log = (firstArg, ...remainingArgs) =>
console.log(`[CharacterAI Dumper v1.4] ${firstArg}`, ...remainingArgs);
log.error = (firstArg, ...remainingArgs) =>
console.error(`[CharacterAI Dumper v1.4] ${firstArg}`, ...remainingArgs);
// Endpoints to intercept.
const CHARACTER_INFO_URL = "https://beta.character.ai/chat/character/info/";
const CHARACTER_EXTRA_INFO_URL = "https://beta.character.ai/chat/character/";
const CHARACTER_HISTORIES_URL =
"https://beta.character.ai/chat/character/histories/";
/** Maps a character's identifier to their basic info + chat histories. */
const characterToSavedDataMap = {};
/** Creates the "Download" link on the "View Saved Chats" page. */
const addDownloadLinkInSavedChats = (dataString, filename) => {
// Don't create duplicate links.
if (document.getElementById("injected-chat-dl-link")) {
return;
}
// We want to add a link next to the "your past conversations with XXX" text.
const suspectedElements = document.getElementsByClassName("home-sec-header");
for (const element of suspectedElements) {
if (!element.textContent.includes("Your Past Conversations with")) {
continue;
}
const dataBlob = new Blob([dataString], { type: "text/plain" });
const downloadLink = document.createElement("a");
downloadLink.id = "injected-chat-dl-link";
downloadLink.textContent = "Download";
downloadLink.href = URL.createObjectURL(dataBlob);
downloadLink.download = filename;
downloadLink.style = "padding-left: 8px";
element.appendChild(downloadLink);
}
};
/** Creates the "Download" link in the "Character Editor" page. */
const addDownloadLinkInCharacterEditor = (
dataString,
filename,
characterName
) => {
if (document.getElementById("injected-character-info-dl-link")) {
return;
}
const suspectedElements = document.querySelectorAll(
"div.p-0.m-1.mb-3.border.rounded.m-1"
);
for (const element of suspectedElements) {
if (!element.textContent.includes(characterName)) {
continue;
}
const dataBlob = new Blob([dataString], { type: "text/plain" });
const downloadLink = document.createElement("a");
downloadLink.id = "injected-character-info-dl-link";
downloadLink.textContent = "Download";
downloadLink.href = URL.createObjectURL(dataBlob);
downloadLink.download = filename;
downloadLink.style = "padding-left: 66px";
element.appendChild(downloadLink);
}
};
/** Escapes a string so it can be used inside a regex. */
const escapeStringForRegExp = (stringToGoIntoTheRegex) => {
return stringToGoIntoTheRegex.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
};
/** Takes in chat histories and anonymizes them. */
const anonymizeHistories = (histories) => {
const namesToReplace = new Set();
for (const history of histories.histories) {
for (const msg of history.msgs) {
if (msg.src.is_human) {
// First, we save the original name so we can search for it and redact
// it in the messages.
namesToReplace.add(msg.src.user.username);
namesToReplace.add(msg.src.user.first_name);
namesToReplace.add(msg.src.user.account.name);
namesToReplace.add(msg.src.user.name);
namesToReplace.add(msg.src.name);
namesToReplace.add(msg.display_name);
// Then, we anonymize `src` (since the source is the human).
msg.src.user.username = "[USERNAME_REDACTED]";
msg.src.user.first_name = "[FIRST_NAME_REDACTED]";
msg.src.user.account.name = "[ACCOUNT_NAME_REDACTED]";
msg.src.user.name = "[NAME_REDACTED]";
msg.src.name = "[NAME_REDACTED]";
msg.display_name = "[DISPLAY_NAME_REDACTED]";
} else {
// Same logic as above.
namesToReplace.add(msg.tgt.user.username);
namesToReplace.add(msg.tgt.user.first_name);
namesToReplace.add(msg.tgt.user.account.name);
namesToReplace.add(msg.tgt.user.name);
namesToReplace.add(msg.tgt.name);
// Need to anonymize `tgt`.
msg.tgt.user.username = "[USERNAME_REDACTED]";
msg.tgt.user.first_name = "[FIRST_NAME_REDACTED]";
msg.tgt.user.account.name = "[ACCOUNT_NAME_REDACTED]";
msg.tgt.user.name = "[NAME_REDACTED]";
msg.tgt.name = "[NAME_REDACTED]";
// Now, since this is a bot message, there's a chance that the bot
// uttered the user's name, so let's replace that inside the message
// text.
namesToReplace.forEach((nameToReplace) => {
if (!nameToReplace) {
return;
}
const replacementRegex = new RegExp(
"\\b" + escapeStringForRegExp(nameToReplace) + "\\b",
"g"
);
msg.text = msg.text.replace(
replacementRegex,
"[NAME_IN_MESSAGE_REDACTED]"
);
});
}
}
// And just being extra paranoid: by the time we've gone through both user
// _and_ bot messages, we might've seen more names to redact, so let's go
// back to the first message and attempt to redact it again just in case we
// have new names.
if (history.msgs.length) {
namesToReplace.forEach((nameToReplace) => {
if (!nameToReplace) {
return;
}
const replacementRegex = new RegExp(
"\\b" + escapeStringForRegExp(nameToReplace) + "\\b",
"g"
);
history.msgs[0].text = history.msgs[0].text.replace(
replacementRegex,
"[NAME_IN_MESSAGE_REDACTED]"
);
});
}
}
// This was modified in-place, but we return it here for simplicity at the
// call site even though it's technically useless (and slightly misleading).
return histories;
};
/** Configures XHook to intercept the endpoints we care about. */
const configureXHookIntercepts = () => {
xhook.after((_req, res) => {
try {
const endpoint = res.finalUrl;
if (
endpoint !== CHARACTER_INFO_URL &&
endpoint !== CHARACTER_HISTORIES_URL &&
endpoint !== CHARACTER_EXTRA_INFO_URL
) {
// We don't care about other endpoints.
return;
}
const data = JSON.parse(res.data);
let characterIdentifier;
if (res.finalUrl === CHARACTER_INFO_URL) {
characterIdentifier = data.character.name.trim();
data.character.user__username = "[BOT_CREATOR_NAME_REDACTED]";
log(`Got character info for ${characterIdentifier}, caching...`);
if (!characterToSavedDataMap[characterIdentifier]) {
characterToSavedDataMap[characterIdentifier] = {};
}
characterToSavedDataMap[characterIdentifier].info = data;
} else if (res.finalUrl === CHARACTER_HISTORIES_URL) {
characterIdentifier = data.histories[0].msgs[0].src.name.trim();
log(`Got chat histories for ${characterIdentifier}, caching...`);
if (!characterToSavedDataMap[characterIdentifier]) {
characterToSavedDataMap[characterIdentifier] = {};
}
characterToSavedDataMap[characterIdentifier].histories =
anonymizeHistories(data);
} else if (res.finalUrl === CHARACTER_EXTRA_INFO_URL) {
characterIdentifier = data.character.name.trim();
data.user__username = "[BOT_CREATOR_NAME_REDACTED]";
data.character.user__username = "[BOT_CREATOR_NAME_REDACTED]";
log(
`Got definitions for ${characterIdentifier}, creating download link.`
);
log("If it doesn't show up, here's the data:", JSON.stringify(data));
// The character editor returns all the info we want in a single
// request, so we can just create the button and return from this
// function already.
setTimeout(
() =>
addDownloadLinkInCharacterEditor(
JSON.stringify(data),
`${characterIdentifier} (Definitions).json`,
characterIdentifier
),
2000
);
return;
}
const currentCharacter = characterToSavedDataMap[characterIdentifier];
if (currentCharacter.info && currentCharacter.histories) {
// We have all the downloadable data for this character, and we're on the
// correct page. Create the download link.
log(
`Got all the data for ${characterIdentifier}, creating download link.`
);
log(
"If it doesn't show up, here's the data:",
JSON.stringify(currentCharacter)
);
// For some reason, the link doesn't get added if we call this right now,
// so we wait a little while instead. Probably React re-render fuckery.
setTimeout(
() =>
addDownloadLinkInSavedChats(
JSON.stringify(currentCharacter),
`${characterIdentifier}.json`
),
2000
);
}
} catch (err) {
log.error("ERROR:", err);
}
});
};
// This is where XHook (lib for intercepting XHR/AJAX calls) gets injected into
// the document, and once it gets properly parsed it'll call out to the setup
// function.
//
// Copy-pasted and slightly adapted from: https://stackoverflow.com/a/8578840
log("Injecting XHook to intercept XHR/AJAX calls.");
(function (document, elementTagName, elementTagId) {
var js,
fjs = document.getElementsByTagName(elementTagName)[0];
if (document.getElementById(elementTagId)) {
return;
}
js = document.createElement(elementTagName);
js.id = elementTagId;
js.onload = function () {
log("Done! Configuring intercepts.");
configureXHookIntercepts();
};
// Link to hosted version taken from the official repo:
// https://github.com/jpillora/xhook
js.src = "https://jpillora.com/xhook/dist/xhook.min.js";
fjs.parentNode.insertBefore(js, fjs);
})(document, "script", "xhook");