toolbox/toolbox/datasets/light_dialogue.py

62 lines
1.7 KiB
Python

import os
import pickle
import typing as t
from dataclasses import dataclass
import mashumaro
from toolbox.datasets import BaseDataset
from toolbox.utils.dataset import get_data_path
@dataclass(frozen=True)
class LightDialogueAgent(mashumaro.DataClassDictMixin):
name: str
persona: str
@dataclass(frozen=True)
class LightDialogueSetting(mashumaro.DataClassDictMixin):
name: str
category: str
description: str
background: str
@dataclass(frozen=True)
class LightDialogueEpisode(mashumaro.DataClassDictMixin):
agents: t.List[LightDialogueAgent]
setting: LightDialogueSetting
character: t.List[str]
context: t.List[str]
room_objects: t.List[t.List[str]]
room_agents: t.List[t.List[str]]
all_descriptions: t.Dict[str, str]
available_actions: t.List[t.List[str]]
carrying: t.List[t.List[str]]
wielding: t.List[t.List[str]]
speech: t.List[str]
emote: t.List[str]
action: t.List[str]
class LightDialogueDataset(BaseDataset[LightDialogueEpisode]):
'''
LIGHT: Learning in Interactive Games with Humans and Text
The LIGHT project is a large-scale fantasy text adventure game research
platform for training agents that can both talk and act, interacting either
with other models or with humans.
https://parl.ai/projects/light/
'''
def generator(self) -> t.Generator[LightDialogueEpisode, None, None]:
root_data_path = get_data_path("light_dialogue")
light_data_path = os.path.join(root_data_path, "light_data.pkl")
with open(light_data_path, "rb") as light_data_file:
light_data = pickle.load(light_data_file)
for episode in light_data:
yield LightDialogueEpisode.from_dict(episode)