toolbox/toolbox/modules/light_dialogue_pdm.py

import typing as t

from toolbox.core.consts import PromptConstants
from toolbox.datasets.light_dialogue import LightDialogueDataset
from toolbox.modules import BaseModule
from toolbox.utils.strings import normalize_string, title_case


class LightDialoguePDM(BaseModule):
    '''Persona Dialogue Module based on the LIGHT dataset.'''

    def generator(self) -> t.Generator[list[str], None, None]:
        for episode in LightDialogueDataset():
            # TODO(11b): Scenario doesn't belong in a persona dialog module.
            context_message = f"Scenario: {episode.context[0]}\n"

            persona_message = ""
            for agent in episode.agents:
                persona_message += f"{PromptConstants.pdm_prefix_for(title_case(agent.name))}: {agent.persona}\n"

            episode_messages: t.List[str] = [context_message, persona_message]
            turn_count = len(episode.speech)

            for idx in range(turn_count):
                character = title_case(episode.character[idx])
                speech = normalize_string(episode.speech[idx])

                # Start off with just the actual speech dialogue.
                message = speech

                # If there was an action performed in that turn, add it to the
                # string.
                #
                # NOTE(11b): Disabled for now. Adding the action like this
                # generates grammatically incorrect sentences.

                # action = episode.action[idx]
                # if action is not None:
                #     message += f" *{action}*"

                # If there was an emote in that turn, add it to the string.
                emote = episode.emote[idx]
                if emote is not None:
                    message = f"*{emote}* {message}"

                # Finally, prepend the turn character's name.
                message = f"{character}: {message}"

                episode_messages.append(message)

            yield episode_messages