diff --git a/README.md b/README.md index 5530621..60f4d34 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ The default behavior is to write a file called `rev-{GIT_REVISION_HASH}-args{HAS The script also has an option to print some examples instead of writing to a file, for debugging/dev purposes. Example usage: ```bash -$ ./waifu/scripts/build_dataset.py --print 1 --modules 'light_dialogue_vdm:LightDialogueVDM' # or -p 1 and -m ... +$ ./waifu/scripts/build_dataset.py --print 1 --modules 'light_dialogue_pdm:LightDialoguePDM' # or -p 1 and -m ... ``` Example output: diff --git a/waifu/modules/light_dialogue_vdm.py b/waifu/modules/light_dialogue_pdm.py similarity index 68% rename from waifu/modules/light_dialogue_vdm.py rename to waifu/modules/light_dialogue_pdm.py index 7e9588f..f1d5255 100644 --- a/waifu/modules/light_dialogue_vdm.py +++ b/waifu/modules/light_dialogue_pdm.py @@ -1,22 +1,22 @@ import typing as t +from waifu.core.consts import PromptConstants from waifu.datasets.light_dialogue import LightDialogueDataset from waifu.modules import BaseModule from waifu.utils.strings import normalize_string, title_case -class LightDialogueVDM(BaseModule): - '''Vanilla Dialogue Module based on the LIGHT dialogue dataset.''' +class LightDialoguePDM(BaseModule): + '''Persona Dialogue Module based on the LIGHT dataset.''' def generator(self) -> t.Generator[str, None, None]: for episode in LightDialogueDataset(): - # TODO(11b): Context and persona don't belong in a vanilla dialogue - # module. + # TODO(11b): Scenario doesn't belong in a persona dialog module. context_message = f"Context: {episode.context[0]}\n" persona_message = "" for agent in episode.agents: - persona_message += f"{title_case(agent.name)}'s Description: {agent.persona}\n" + persona_message += f"{PromptConstants.pdm_prefix_for(title_case(agent.name))}: {agent.persona}\n" episode_messages: t.List[str] = [context_message, persona_message] turn_count = len(episode.speech) @@ -30,9 +30,13 @@ class LightDialogueVDM(BaseModule): # If there was an action performed in that turn, add it to the # string. - action = episode.action[idx] - if action is not None: - message += f" *{action}*" + # + # NOTE(11b): Disabled for now. Adding the action like this + # generates grammatically incorrect sentences. + + # action = episode.action[idx] + # if action is not None: + # message += f" *{action}*" # If there was an emote in that turn, add it to the string. emote = episode.emote[idx] diff --git a/waifu/scripts/build_dataset.py b/waifu/scripts/build_dataset.py index c146587..ca5e509 100755 --- a/waifu/scripts/build_dataset.py +++ b/waifu/scripts/build_dataset.py @@ -11,13 +11,13 @@ import typing as t from waifu.modules import BaseModule -# TODO(11b): Needs manual maintenance ot keep up-to-date. Consider doing some +# TODO(11b): Needs manual maintenance to keep up-to-date. Consider doing some # metaprogramming trickery to build this list out instead. DEFAULT_MODULE_LIST = [ "characterai_pdm:CharacterAiPDM", "kajiwoto_pdm:KajiwotoPDM", # "kajiwoto_vdm:KajiwotoVDM", - "light_dialogue_vdm:LightDialogueVDM", + "light_dialogue_pdm:LightDialoguePDM", ] DEFAULT_MODULES_STRING = ",".join(DEFAULT_MODULE_LIST)