diff --git a/waifu/datasets/kajiwoto.py b/waifu/datasets/kajiwoto.py
index 78fb0c1..039573e 100644
--- a/waifu/datasets/kajiwoto.py
+++ b/waifu/datasets/kajiwoto.py
@@ -12,6 +12,9 @@ from waifu.utils.dataset import get_data_path
 # The regex used to find message variants (e.g.: `%{Hi|Hello} there!`)
 KAJIWOTO_VARIANT_REGEX = re.compile(r'%{(.+?)}')
 
+# These bots shouldn't be a part of the final dataset, for whatever reason.
+BLACKLISTED_BOT_IDS = set(["WvqA"])
+
 logger = logging.getLogger(__name__)
 
 
@@ -262,6 +265,10 @@ def _enumerate_kajiwoto_json_files() -> list[str]:
             # Don't want to list metadata files here.
             continue
 
+        if item.replace(".json", "") in BLACKLISTED_BOT_IDS:
+            # Don't want blacklisted bots being included.
+            continue
+
         item_path = os.path.join(dataset_path, item)
         if not os.path.isfile(item_path):
             # Don't care about folders.
diff --git a/waifu/modules/discord_vdm.py b/waifu/modules/discord_vdm.py
index 264dfce..3b31051 100644
--- a/waifu/modules/discord_vdm.py
+++ b/waifu/modules/discord_vdm.py
@@ -23,6 +23,8 @@ from waifu.utils.dataset import get_data_path
 # Matches user mentions, channel links, emotes and maybe other stuff.
 SPECIAL_TOKENS_REGEX = re.compile(r"<[@:#].+?>")
 
+MINIMUM_EPISODE_LENGTH = 5
+
 logger = logging.getLogger(__name__)
 
 
@@ -45,9 +47,10 @@ class DiscordVDM(BaseModule):
                 turns, last_message_id = episode_contents
 
                 # Discard short episodes.
-                if len(turns) < 8:
-                    logger.debug("Found short %s-turn episode, discarding.",
-                                 len(turns))
+                if len(turns) < MINIMUM_EPISODE_LENGTH:
+                    logger.debug(
+                        "Found short %s-turn episode (< %s), discarding.",
+                        len(turns), MINIMUM_EPISODE_LENGTH)
                     continue
 
                 # Discard conversations with overly short messages.
@@ -194,7 +197,8 @@ def _build_episode_turns(
             continue
 
         if _looks_like_ooc(cleaned_text):
-            # Self-explanatory.
+            logger.debug("Dropping what _seems_ to be OOC talk: `%s`",
+                         cleaned_text)
             continue
 
         # Get username.
diff --git a/waifu/scripts/build_dataset.py b/waifu/scripts/build_dataset.py
index e6b4e97..cf9a22a 100755
--- a/waifu/scripts/build_dataset.py
+++ b/waifu/scripts/build_dataset.py
@@ -10,6 +10,7 @@ import sys
 import typing as t
 
 from waifu.modules import BaseModule
+from waifu.utils.strings import contains_suspect_unicode
 
 # TODO(11b): Needs manual maintenance to keep up-to-date. Consider doing some
 # metaprogramming trickery to build this list out instead.
@@ -118,6 +119,8 @@ def main() -> None:
         # file.
         for module in modules:
             for episode in module():
+                if contains_suspect_unicode(episode):
+                    print(f"Found suspect unicode contents in `{episode}`")
                 json_line = json.dumps({"text": episode})
                 output_file.write(f"{json_line}\n")
 
diff --git a/waifu/utils/strings.py b/waifu/utils/strings.py
index eb92fca..0674cc5 100644
--- a/waifu/utils/strings.py
+++ b/waifu/utils/strings.py
@@ -2,6 +2,7 @@
 
 # Some of this is pasta from Meta's ParlAI. See:
 # https://github.com/facebookresearch/ParlAI/blob/main/parlai/utils/strings.py
+import regex
 
 
 def normalize_string(text: str, version: int = 1) -> str:
@@ -54,9 +55,18 @@ def title_case(string: str) -> str:
 
 def uppercase(string: str) -> str:
     '''
-    Make the first character of the string uppercase, if the string is non-empty.
+    Makes the first character of the string uppercase, if the string is
+    non-empty.
     '''
     if len(string) == 0:
         return string
     else:
         return string[0].upper() + string[1:]
+
+
+def contains_suspect_unicode(string: str) -> bool:
+    '''
+    Returns whether the given string seems to have suspect Unicode trickery
+    (e.g.: Zalgo text).
+    '''
+    return regex.search(r"\pM{3,}", string) is not None