First

2024-10-27 22:19:51 +01:00 · 2024-10-27 22:19:51 +01:00 · a5d9f6d1bd
commit a5d9f6d1bd
38 changed files with 5279 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+v2a
+*.srt
+*.ass
+*.txt
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "subm/argparse"]
+	path = subm/argparse
+	url = https://github.com/cofyc/argparse
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+CFLAGS += $(shell pkg-config --cflags harfbuzz)
+LIBS += $(shell pkg-config --libs harfbuzz)
+CFLAGS += $(shell pkg-config --cflags freetype2)
+LIBS += $(shell pkg-config --libs freetype2)
+LIBS += -lm
+
+INCLUDES = subm/argparse
+
+v2a: src/*.c subm/argparse/argparse.c
+	${CC} $^ ${CFLAGS} -I${INCLUDES} -ggdb -std=gnu11 -o $@ ${LIBS} -fsanitize=address -fsanitize=leak -fsanitize=undefined
+
+r: v2a
+	./v2a
+
+g: v2a
+	gdb ./v2a
+
+clean:
+	-rm -- v2a
+
+re: clean v2a
--- a/README.md
+++ b/README.md
@ -0,0 +1,37 @@
+# vtt2ass-cstyle
+A .vtt subtitle converter to .ass and .srt.
+
+## Building
+Clone the repo with
+```sh
+git clone --recurse-submodules https://github.com/moex3/vtt2ass-cstyle
+```
+Make sure the required dependencies are present: `harfbuzz` and `freetype2`.
+
+Build with
+```sh
+make
+```
+By default it will build a debug build.
+
+To generate both .srt and .ass subs in one run:
+```sh
+./v2a srt --output out.srt ass --output out.ass --width 1920 --height 1080 --font ~/.local/share/fonts/ipaexg.ttf  input.vtt
+```
+This will generate `out.srt` and `out.ass` with the specified video dimensions, using the `ipaexg` font from the the `input.vtt` file.
+
+## Features
+- Handle ruby tags for srt by enclosing them in parenthesis, and by positioning the text correctly for ass.
+NO vertical ruby for now.
+![srt ruby](https://ra.thesungod.xyz/MNDU3ZUE.jpeg)
+![ass ruby](https://ra.thesungod.xyz/VK_f23Tc.jpeg)
+- Basic vertical text support for ass subtitles.
+- Bold, italic and underline support.
+
+## Missing features
+- No overlap detection.
+- No region support.
+- The positioning can be buggy (I can never get that right).
+- Not tested with real word subtitles. If you want to use it, and it fails on that subtitle, make an issue.
+- I wrote the vtt tokenizer myself, not according to the spec.
+- Codebase quality. Enough said.
--- a/src/ass.c
+++ b/src/ass.c
@ -0,0 +1,666 @@
+#include "ass.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "util.h"
+#include "dyna.h"
+#include "opts.h"
+#include "cuepos.h"
+#include "ass_ruby.h"
+#include "font.h"
+
+// for debug
+#include <signal.h>
+
+#define DEBUGBOX 1
+
+#if DEBUGBOX == 1
+#include "textextents.h"
+#endif
+
+static void ass_write_text(FILE *f, const struct cue *c);
+static void ass_pos_line_in_box(const struct cue *c, const struct cuepos_box *box, struct ass_cue_pos *op);
+
+static void ass_node_free(void *ptr)
+{
+    struct ass_node *n = ptr;
+    //if (n->style)
+        //free(n->style);
+    if (n->text)
+        free(n->text);
+}
+
+static int ass_node_compar(const void *d1, const void *d2)
+{
+    const struct ass_node *a = d1, *b = d2;
+    if (a->start_ms < b->start_ms)
+        return -1;
+    if (a->start_ms > b->start_ms)
+        return 1;
+
+    /* Times are the same, do by layer */
+    return a->layer - b->layer;
+}
+
+static void ass_ms_to_str(int64_t tms, int n, char out[n])
+{
+    int h, m, s, ms;
+    h = tms / H_IN_MS;
+    tms %= H_IN_MS;
+
+    m = tms / M_IN_MS;
+    tms %= M_IN_MS;
+
+    s = tms / S_IN_MS;
+    ms = tms % S_IN_MS;
+
+    snprintf(out, n, "%d:%02d:%02d.%02d", h, m, s, ms / 10);
+}
+
+static void ass_write_dialog_prop(FILE *f, const struct ass_node *an)
+{
+    char ts_start[32], ts_end[32];
+    const char *name = "";
+    const char *stylename = "Default";
+    if (an->style)
+        stylename = an->style->name;
+
+    ass_ms_to_str(an->start_ms, sizeof(ts_start), ts_start);
+    ass_ms_to_str(an->end_ms, sizeof(ts_end), ts_end);
+
+    fprintf(f,
+            "Dialogue: %d,%s,%s,%s,%s,%04d,%04d,%04d,,"
+            , an->layer, ts_start, ts_end, stylename, name, 0, 0, 0
+           );
+}
+
+#if 1
+static int ass_draw_box(int n, char out_tags[n], const struct ass_cue_pos *an7pos, const struct cuepos_box *box, const char *color)
+{
+    const char *col = color;
+    if (col == NULL)
+        col = "0000FF";
+
+    return snprintf(out_tags, n, "{\\alpha&HB0&\\c&H%s&\\an7\\pos(%d,%d)\\p1}m %d %d l %d %d %d %d %d %d", col,
+            an7pos->posx, an7pos->posy,
+            box->left, box->top, box->left + box->width, box->top,
+            box->left + box->width, box->top + box->height, box->left, box->top + box->height);
+}
+
+void ass_append_box(const struct cue *c, const struct ass_cue_pos *an7pos,
+        const struct cuepos_box *box, const char *color, struct ass_params *ap)
+{
+    char text[1024];
+    int text_len;
+    struct ass_node anode = {
+        .start_ms = c->time_start,
+        .end_ms = c->time_end,
+        .layer = 0,
+        .style = ass_styles_get(ap->styles, "Default"),
+    };
+
+    text_len = ass_draw_box(sizeof(text), text, an7pos, box, color);
+    assert(text_len < sizeof(text));
+
+    anode.text = strdup(text);
+    dyna_append(ap->ass_nodes, &anode);
+}
+#endif
+
+static void ass_write_header(FILE *f)
+{
+    fprintf(f,
+            "[Script Info]\n"
+            "; Script generated by vtt2ass-cstyle\n"
+            "; <link here>\n"
+            "ScriptType: v4.00+\n"
+            "Collisions: Normal\n"
+            "ScaledBorderAndShadow: Yes\n"
+            "PlayResX: %d\n"
+            "PlayResY: %d\n"
+            "LayoutResX: %d\n"
+            "LayoutResY: %d\n"
+            //"Kerning: Yes\n"
+            "\n"
+            "[V4+ Styles]\n"
+            "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n"
+            , vinf->width, vinf->height, vinf->width, vinf->height);
+}
+
+static void ass_write_styles(FILE *f, const struct ass_params *ap)
+{
+    for (int i = 0; i < ap->styles->e_idx; i++) {
+        char bord_color_str[12];
+        const char *fontname;
+        const struct ass_style *s = dyna_elem(ap->styles, i);
+
+        ass_style_rgb_to_str(s->bord_color, bord_color_str);
+        fontname = font_get_name(font_get_face(s->fontpath));
+
+        fprintf(f,
+                "Style: %s,%s,46,&H00FFFFFF,&H000000FF,%s,&H7F000000,0,0,0,0,100,100,%g,0,1,%g,%g,2,0,0,0,1\n"
+                "\n", s->name, fontname, bord_color_str, s->fsp, s->bord, s->shad);
+    }
+}
+
+static void ass_write_events_header(FILE *f)
+{
+    fputs(
+            "[Events]\n"
+            "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
+            ,f);
+}
+
+void ass_push_style_stack(struct stack *style_stack, const struct ass_style *style)
+{
+#define copyifset(tag) \
+    if (style->tag##_set) { \
+        new_style.tag = style->tag; \
+        new_style.tag##_set = true; \
+    }
+
+    /* Pushing a style means copying the previous one, and overwriting with new values as needed */
+
+    const struct ass_style *top_style = stack_top(style_stack);
+    assert(top_style);
+    struct ass_style new_style = *top_style;
+
+    copyifset(fsp);
+    copyifset(bord);
+    copyifset(bord_color);
+    copyifset(italic);
+    copyifset(bold);
+    copyifset(underline);
+    copyifset(ruby_under);
+
+    stack_push(style_stack, &new_style);
+
+#undef copyifset
+}
+
+int style_to_inline_tags(const struct ass_style *style, int out_len, char out[out_len])
+{
+    printf("Applying styles\n");
+#define apply_bool(tag, ass_tag) \
+    if (style->tag##_set) { \
+        w += snprintf(out + w, out_len - w, ass_tag "%d", style->tag); \
+    }
+
+    int w = 0;
+
+    w += snprintf(out + w, out_len - w, "{");
+
+    apply_bool(italic, "\\i");
+    apply_bool(bold, "\\b");
+    apply_bool(underline, "\\u");
+
+    w += snprintf(out + w, out_len - w, "}");
+    return w;
+#undef apply_bool
+}
+
+static void class_to_style(const char *class_name, const struct ass_params *ap, struct ass_style *out)
+{
+    printf("Class to style\n");
+    // This will have to be changed in the future...
+    char cname_with_cue[128];
+
+    sprintf(cname_with_cue, "::cue(.%s)", class_name);
+
+    const struct cue_style *cs = cuestyle_get_by_selector(ap->cuestyles, cname_with_cue);
+    if (cs == NULL) {
+        fprintf(stderr, "Cannot find style with full class name: '%s'\n", cname_with_cue);
+        return;
+    }
+
+    if (cs->italic) {
+        out->italic = out->italic_set = true;
+    }
+    if (cs->ruby_position != RUBYPOS_UNSET) {
+        out->ruby_under = cs->ruby_position == RUBYPOS_UNDER;
+        out->ruby_under_set = true;
+    }
+}
+
+void ass_node_to_style(const struct vtt_node *node, const struct ass_params *ap, struct ass_style *out)
+{
+    memset(out, 0, sizeof(*out));
+    out->italic = node->type == VNODE_ITALIC;
+    out->italic_set = node->type == VNODE_ITALIC;
+    out->bold = node->type == VNODE_BOLD;
+    out->bold_set = node->type == VNODE_BOLD;
+    out->underline = node->type == VNODE_UNDERLINE;
+    out->underline_set = node->type == VNODE_UNDERLINE;
+
+    if (node->type == VNODE_CLASS) {
+        for (int i = 0; i < node->class_names->e_idx; i++) {
+            const char *class_name = *(char**)dyna_elem(node->class_names, i);
+            class_to_style(class_name, ap, out);
+        }
+    }
+}
+
+static int ass_text_collect_tags_and_escape(const struct vtt_node *node, int n, char out[n], struct stack *style_stack, bool *have_ruby, const struct ass_params *ap)
+{
+    int w = 0;
+
+    if (node->type == VNODE_TIMESTAMP)
+        return 0;
+
+    if (node->type == VNODE_TEXT) {
+
+        w += style_to_inline_tags(stack_top(style_stack), n - w, out + w);
+
+        char *nn = node->text, *pn = nn;
+        while (*pn) {
+            nn = strchr(pn, '\n');
+            if (!nn) {
+                w += snprintf(out + w, n - w, "%s", pn);
+                if (w > n) return w;
+                break;
+            }
+            w += snprintf(out + w, n - w, "%.*s\\N", (int)(nn - pn), pn);
+            if (w > n) return w;
+            pn = nn + 1;
+        }
+        return w;
+    } else if (node->type == VNODE_RUBY_TEXT) {
+        if (have_ruby)
+            *have_ruby = true;
+        return 0;
+    } else if (node->type == VNODE_ITALIC) {
+        struct ass_style ns = {
+            .italic = true, .italic_set = true,
+        };
+        ass_push_style_stack(style_stack, &ns);
+    } else if (node->type == VNODE_BOLD) {
+        struct ass_style ns = {
+            .bold = true, .bold_set = true,
+        };
+        ass_push_style_stack(style_stack, &ns);
+    } else if (node->type == VNODE_UNDERLINE) {
+        struct ass_style ns = {
+            .underline = true, .underline_set = true,
+        };
+        ass_push_style_stack(style_stack, &ns);
+    } else if (node->type == VNODE_CLASS) {
+        struct ass_style ns = {0};
+        for (int i = 0; i < node->class_names->e_idx; i++) {
+            const char *class_name = *(char**)dyna_elem(node->class_names, i);
+            printf("name: %s\n", class_name);
+            class_to_style(class_name, ap, &ns);
+        }
+        ass_push_style_stack(style_stack, &ns);
+    }
+
+    for (int i = 0; node->childs && i < node->childs->e_idx; i++) {
+        struct vtt_node *cn = dyna_elem(node->childs, i);
+
+        w += ass_text_collect_tags_and_escape(cn, n - w, out + w, style_stack, have_ruby, ap);
+        if (w > n) return w;
+    }
+
+    switch (node->type) {
+    case VNODE_CLASS:
+    case VNODE_ITALIC:
+    case VNODE_BOLD:
+    case VNODE_UNDERLINE:
+        printf("Style Pop\n");
+        stack_pop(style_stack);
+        break;
+    default:
+        break;
+    }
+
+    return w;
+}
+
+/* Convert align for horizontal text into
+ * align for vertical text */
+static int ass_horiz_to_vert_align(int align)
+{
+#if 0
+    int map[] = {
+        [1] = 7, [4] = 8, [7] = 9,
+        [2] = 4, [5] = 5, [8] = 6,
+        [3] = 1, [6] = 2, [9] = 3,
+    };
+#endif
+    // TODO this
+    int map[] = {
+        [1] = 3, [4] = 2, [7] = 9,
+        [2] = 8, [5] = 5, [8] = 6,
+        [3] = 9, [6] = 8, [9] = 3,
+    };
+    assert(align < ARRSIZE(map));
+    int a = map[align];
+    assert(a);
+    return a;
+}
+
+static void ass_pos_line_in_box(const struct cue *c, const struct cuepos_box *box, struct ass_cue_pos *op)
+{
+    int alignmap[3][3] = {
+        { 7, 8, 9 },
+        { 4, 5, 6 },
+        { 1, 2, 3 },
+    };
+    //BP;
+    int xal = 0, yal = 0;
+    int lc = util_count_node_lines(c->text_node);
+    op->fs = 5 * (vinf->height / 100.0f);
+    int text_height = lc * op->fs;
+    bool is_wdh = c->writing_direction == WD_HORIZONTAL;
+
+    if (is_wdh) {
+        switch (c->text_align) {
+        case TEXT_ALIGN_LEFT:
+            xal = 0;
+            op->posx = box->left;
+            break;
+        case TEXT_ALIGN_RIGHT:
+            xal = 2;
+            op->posx = box->left + box->width;
+            break;
+        default:
+        case TEXT_ALIGN_CENTER:
+            xal = 1;
+            op->posx = box->left + box->width / 2;
+            break;
+        }
+
+        if (true || IS_AUTO(c->line)) {
+            yal = 2;
+            op->posy = box->top + box->height;
+        } else {
+            switch (c->line_align) {
+            case LINE_ALIGN_START: // spec
+                yal = 0;
+                op->posy = box->top;
+                break;
+            case LINE_ALIGN_CENTER:
+                yal = 1;
+                op->posy = box->top + box->height / 2;
+                break;
+            case LINE_ALIGN_END: // spec
+                yal = 2;
+                op->posy = box->top + box->height;
+                break;
+            default:
+                assert(false && "Unhandled line align in pos_in_box");
+                break;
+            }
+        }
+    } else {
+        /* Vertical text */
+        enum cue_pos_align posalign = cuepos_compute_pos_align(c);
+        bool is_right = c->writing_direction == WD_VERTICAL_GROW_RIGHT;
+
+        if (false && IS_AUTO(c->line)) {
+            xal = 2;
+            op->posx = box->left + box->width;
+        } else {
+            switch (c->line_align) {
+            case LINE_ALIGN_START: // spec
+                yal = is_right ? 0 : 2;
+                op->posx = box->left + box->width * is_right;
+                break;
+            case LINE_ALIGN_CENTER:
+                yal = 1;
+                op->posx = box->left + box->width / 2;
+                break;
+            case LINE_ALIGN_END: // spec
+                yal = is_right ? 2 : 0;
+                op->posx = box->left + box->width * is_right;
+                break;
+            default:
+                assert(false && "Unhandled line align in pos_in_box");
+                break;
+            }
+        }
+
+        switch (c->text_align) {
+        case TEXT_ALIGN_LEFT:
+            xal = 0;
+            op->posy = box->top;
+            break;
+        case TEXT_ALIGN_RIGHT:
+            xal = 2;
+            op->posy = box->top + box->height;
+            break;
+        default:
+        case TEXT_ALIGN_CENTER:
+            xal = 1;
+            op->posy = box->top + box->height / 2;
+            break;
+        }
+    }
+
+#if 0
+    if (c->writing_direction != WD_HORIZONTAL) {
+        /* If vertical writing, adjust aligns
+         * \an7 for horizontal is \an1 for vertical */
+        if (yal == 0)
+            yal = 2;
+    }
+#endif
+
+    op->align = alignmap[yal][xal];
+    op->logical_align = op->align;
+    printf("Align is: %d\n", op->align);
+
+    if (lc > 1) {
+        if (op->posy + text_height > vinf->height &&
+                (op->align == 7 || op->align == 8 || op->align == 9)) {
+            /* Would be outside the video, shift it up */
+            int extra = op->posy + text_height - vinf->height;
+            op->posy -= extra;
+            assert(false);
+        }
+    }
+}
+
+int ass_opt_tags_str(const struct cue *c, const struct ass_params *ap, int tags_size, char tags[tags_size])
+{
+    int r = 0;
+
+    if (c->writing_direction != WD_HORIZONTAL) {
+        /* Some kind of vertical */
+        FT_Face font = font_get_face(ap->fontpath);
+        const char *fontname = font_get_name(font);
+        r += snprintf(tags + r, tags_size - r, "\\fn@%s\\frz270", fontname);
+    }
+
+    if (r < tags_size) {
+        tags[r] = '\0';
+    }
+    return r;
+}
+
+static void ass_cue2ass(struct cue *c, struct ass_params *ap)
+{
+    char escaped_text[1024];
+    char tag_text[1024];
+    char tag2_text[128];
+    bool have_ruby = false;
+    struct ass_cue_pos pi = {0};
+    struct cuepos_box boxp = {0};
+    struct ass_style *style = ass_styles_get(ap->styles, "Default");
+    struct ass_node anode = {
+        .start_ms = c->time_start,
+        .end_ms = c->time_end,
+        .layer = 9,
+        .style = style,
+    };
+    int escaped_text_len, tag_text_len, tag2_text_len;
+    stack_init(style_stack, sizeof(struct ass_style), 30);
+    stack_push(&style_stack, style);
+
+    cuepos_apply_cue_settings(c, &boxp);
+#if DEBUGBOX == 1
+    if (opts_ass_debug_boxes) {
+        struct ass_cue_pos boxpos = {0};
+        ass_append_box(c, &boxpos, &boxp, "00FF00", ap);
+    }
+#endif
+    ass_pos_line_in_box(c, &boxp, &pi);
+
+    /* TODO: cont. here, add inline tags for classes here */
+    escaped_text_len = ass_text_collect_tags_and_escape(c->text_node, sizeof(escaped_text), escaped_text, &style_stack, &have_ruby, ap);
+    assert(escaped_text_len < sizeof(escaped_text));
+    if (have_ruby) {
+        /* If it has ruby, use ruby text rendering */
+        ass_ruby_write(c, &pi, ap);
+        return;
+    }
+    /* If no ruby, use the normal rendering */
+
+    tag2_text_len = ass_opt_tags_str(c, ap, sizeof(tag2_text), tag2_text);
+    assert(tag2_text_len < sizeof(tag2_text));
+    tag_text_len = snprintf(tag_text, sizeof(tag_text), "{\\an%d\\fs%d\\pos(%d,%d)%s}", pi.align, pi.fs, pi.posx, pi.posy, tag2_text);
+    assert(tag_text_len < sizeof(tag_text));
+
+    anode.text = malloc(tag_text_len + escaped_text_len + 1);
+    sprintf(anode.text, "%s%s", tag_text, escaped_text);
+
+    dyna_append(ap->ass_nodes, &anode);
+
+#if DEBUGBOX == 1
+    if (opts_ass_debug_boxes) {
+        char text[1024];
+        char line[1024];
+        struct ass_cue_pos an7pos;
+        struct text_extents line_exts[64];
+        int line_ext_count = 0;
+        struct text_extents full_ext;
+
+        ctxt_text(c->text_node, sizeof(text), text);
+        const char *start = text;
+        const char *end = strchr(text, '\n');
+        while (end) {
+            memset(line, 0, sizeof(line));
+            memcpy(line, start, end - start);
+            te_simple(style->fontpath, line, pi.fs, style->fsp, false, &line_exts[line_ext_count++]);
+            start = end + 1;
+            end = strchr(start, '\n');
+        }
+        if (*start) {
+            memset(line, 0, sizeof(line));
+            memcpy(line, start, strlen(start));
+            te_simple(style->fontpath, line, pi.fs, style->fsp, false, &line_exts[line_ext_count++]);
+        }
+
+        util_combine_extents(line_ext_count, line_exts, &full_ext);
+        util_cue_pos_to_an7(&pi, &full_ext, &an7pos);
+
+        struct cuepos_box box = {
+            .width = full_ext.width,
+            .height = full_ext.height,
+        };
+        ass_append_box(c, &an7pos, &box, "FC19DA", ap);
+    }
+#endif
+
+}
+
+static void ass_write_ass_nodes(FILE *f, const struct dyna *ass_nodes)
+{
+    for (int i = 0; i < ass_nodes->e_idx; i++) {
+        const struct ass_node *an = dyna_elem(ass_nodes, i);
+
+        ass_write_dialog_prop(f, an);
+        fputs(an->text, f);
+        fputs("\n", f);
+    }
+}
+
+static void text_shadow_to_bord(const struct cue_style *cs, struct ass_style *st, struct ass_params *ap)
+{
+    if (cs->text_shadow_count == 0)
+        return;
+    if (cs->text_shadow_count != 4) {
+        fprintf(stderr, "Text shadow count is not 4, skipping it...\n");
+        return;
+    }
+
+    /* A lot simpler implementation for now.
+     * Only take the 1st entry into consideration */
+    st->bord = abs(cs->text_shadow[0].xpos) + abs(cs->text_shadow[0].ypos);
+    st->bord_color = util_colorname_to_rgb(cs->text_shadow[0].color);
+
+    /* will get ignored, but w/e */
+    st->bord_color_set = st->bord_set = true;
+}
+
+static void create_default_style(struct ass_params *ap)
+{
+    struct ass_style *s = ass_styles_add(ap->styles, "Default");
+    s->fontpath = strdup(ap->fontpath);
+    s->fsp = 0;
+    /* Rest can be zero */
+
+    /* Set every element as set */
+    s->fsp_set = s->bord_set = s->bord_color_set = s->italic_set = 
+        s->bold_set = s->underline_set = true;
+
+    if (opts_ass_border_size != -1) {
+        /* Have border option, use that */
+        s->bord = opts_ass_border_size;
+        return;
+    }
+
+    /* Otherwise, try select it from the vtt styles */
+    const struct cue_style *cs = cuestyle_get_by_selector(ap->cuestyles, "::cue()");
+    if (cs) {
+        text_shadow_to_bord(cs, s, ap);
+    } else {
+        /* Set a default border size */
+        s->bord_color = 0; /* black */
+        s->bord = (int)(vinf->height / 300.0f);
+    }
+
+}
+
+int ass_write(struct dyna *cues, struct dyna *cstyles, const struct video_info *video_info, const char *fontpath, const char *fname)
+{
+    struct ass_params ap = {
+        .fontpath = fontpath,
+        .cuestyles = cstyles,
+    };
+    FILE *f = fopen(fname, "w");
+    if (f == NULL)
+        return -1;
+
+    cuepos_set_video_info(video_info);
+
+    ap.styles = ass_styles_create();
+    create_default_style(&ap);
+
+    ap.ass_nodes = dyna_create_size(sizeof(struct ass_node), 256);
+    dyna_set_free_fn(ap.ass_nodes, ass_node_free);
+
+    for (int i = 0; i < cues->e_idx; i++) {
+        struct cue *c = dyna_elem(cues, i);
+        struct ass_node anode = {0};
+
+        ass_cue2ass(c, &ap);
+    }
+
+    printf("Sorting lines...\n");
+    /* Maybe we could do an array of pointers here, like a view */
+    qsort(ap.ass_nodes->data, ap.ass_nodes->e_idx, ap.ass_nodes->e_size, ass_node_compar);
+
+    ass_write_header(f);
+    ass_write_styles(f, &ap);
+    ass_write_events_header(f);
+    ass_write_ass_nodes(f, ap.ass_nodes);
+
+    fclose(f);
+    dyna_destroy(ap.ass_nodes);
+    ass_styles_destroy(ap.styles);
+    return 0;
+}
--- a/src/ass.h
+++ b/src/ass.h
@ -0,0 +1,49 @@
+#ifndef _VTT2ASS_ASS_H
+#define _VTT2ASS_ASS_H
+#include "parser.h"
+#include "cuepos.h"
+#include "dyna.h"
+#include "ass_style.h"
+#include "cuestyle.h"
+#include "stack.h"
+
+#define IS_ASS_ALIGN_LEFT(al) (al == 1 || al == 4 || al == 7)
+#define IS_ASS_ALIGN_RIGHT(al) (al == 3 || al == 6 || al == 9)
+#define IS_ASS_ALIGN_TOP(al) (al == 7 || al == 8 || al == 9)
+#define IS_ASS_ALIGN_BOTTOM(al) (al == 1 || al == 2 || al == 3)
+
+struct ass_node {
+    char *text; /* Free */
+    int layer;
+    int64_t start_ms, end_ms;
+    struct ass_style *style; /* Can be NULL, pointer into the styles dyna */
+    //const char *style; /* Can be NULL */
+};
+
+struct ass_cue_pos {
+    int align; /* numpad style ass align */
+    int logical_align; /* The align for internal calculations (only for vertical text) */
+    //int margin_l, margin_r, margin_v;
+    int posx, posy;
+    int fs;
+
+    //int width, height;
+};
+
+struct ass_params {
+    const char *fontpath;
+    struct dyna *ass_nodes, *styles;
+    struct dyna *cuestyles;
+};
+
+int ass_write(struct dyna *cues, struct dyna *cstyles, const struct video_info *video_info, const char *fontpath, const char *fname);
+
+void ass_append_box(const struct cue *c, const struct ass_cue_pos *an7pos,
+        const struct cuepos_box *box, const char *color, struct ass_params *ap);
+
+int style_to_inline_tags(const struct ass_style *style, int out_len, char out[out_len]);
+
+void ass_node_to_style(const struct vtt_node *node, const struct ass_params *ap, struct ass_style *out);
+void ass_push_style_stack(struct stack *style_stack, const struct ass_style *style);
+
+#endif /* _VTT2ASS_ASS_H */
--- a/src/ass_ruby.c
+++ b/src/ass_ruby.c
@ -0,0 +1,706 @@
+#include "ass_ruby.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <sys/param.h>
+#include <wchar.h>
+
+#include "cuetext.h"
+#include "util.h"
+#include "textextents.h"
+#include "ass.h"
+#include "opts.h"
+
+#define MAX_RUBY_IN_LINE 128
+
+#define DEBUG_RUBYBOX 1
+
+struct ass_ruby {
+    struct text_extents extents;
+    const char *rubytext; /* no free */
+    bool resize; /* if ruby base is bigger */
+
+    /* True if this ruby should be rendered under the base text */
+    bool under;
+    float fsp;
+};
+
+struct ass_parts {
+    size_t start_off, len;
+    int line;
+    const char *rubytext;
+    struct text_extents extents; /* including spacing, but not padding_before */
+    bool resize; /* if ruby text is bigger */
+
+    /* Padding that will affect the next part basically */
+    float last_char_fsp;
+    bool has_last_char_fsp;
+
+    /* For ruby padding */
+    float fsp;
+
+    bool is_ruby;
+    struct ass_ruby ruby; /* If available */
+
+    /* Inline tags for this part of text */
+    struct ass_style inline_tags;
+};
+
+struct ass_ruby_params {
+    /* The raw text of the current node
+     * Ruby text not included */
+    char text[1024];
+    int text_len;
+    /* Text with inline tags */
+    char tags_text[4096];
+    int tags_text_len;
+
+    /* Each part contains a text part with, or without ruby. 
+     * Newlines are always cause a part split */
+    struct ass_parts parts[MAX_RUBY_IN_LINE];
+    int parts_count;
+
+    /* Text extents of each individual line after justify resize */
+    struct text_extents line_ext[MAX_RUBY_IN_LINE];
+    int line_ext_count;
+
+    int base_fs, ruby_fs;
+    const struct ass_params *ap;
+    const struct ass_cue_pos *olpos;
+    struct te_obj base_te_obj;
+
+    struct stack *style_stack;
+
+    /* private tmp elems */
+    int current_line_off;
+};
+
+static void ass_ruby_text_and_parts(const struct vtt_node *node, struct ass_ruby_params *arp)
+{
+    if (node->type == VNODE_TIMESTAMP)
+        return;
+    if (node->type == VNODE_TEXT) {
+        if (node->parent && node->parent->type == VNODE_RUBY_TEXT) {
+            assert(strchr(node->text, '\n') == NULL);
+            assert(arp->parts_count > 0);
+
+            struct ass_ruby *rb = &arp->parts[arp->parts_count - 1].ruby;
+            struct ass_style *top = stack_top(arp->style_stack);
+
+            rb->rubytext = node->text;
+            if (top->ruby_under_set)
+                rb->under = top->ruby_under;
+            return;
+        }
+
+        const char *start = node->text, *end = NULL;
+        for (;;) {
+            /* Split lines into different parts */
+            int line_len;
+            assert(arp->parts_count < ARRSIZE(arp->parts));
+            struct ass_parts *part = &arp->parts[arp->parts_count];
+
+            end = strchr(start, '\n');
+            if (end)
+                line_len = end - start;
+            else
+                line_len = strlen(start);
+
+            if (line_len == 0)
+                goto next; /* skip empty text */
+
+            arp->parts_count++;
+            memset(part, 0, sizeof(*part));
+            part->start_off = arp->text_len;
+            part->len = line_len;
+            part->is_ruby = node->parent && node->parent->type == VNODE_RUBY;
+            part->line = arp->current_line_off;
+            part->inline_tags = *(struct ass_style*)stack_top(arp->style_stack);
+            //printf("Copied under: %d\n", part->inline_tags.ruby_under);
+
+            arp->text_len += snprintf(arp->text + arp->text_len,
+                    sizeof(arp->text) - arp->text_len, "%.*s", line_len, start);
+            assert(arp->text_len < sizeof(arp->text));
+
+next:
+            if (end == NULL)
+                break;
+            assert(*end != '\0');
+            arp->current_line_off++;
+            start = end + 1;
+        }
+
+        return;
+    }
+
+    /* These nodes (not timestamp and text) could signify styles */
+    struct ass_style nodestyle;
+    ass_node_to_style(node, arp->ap, &nodestyle);
+    ass_push_style_stack(arp->style_stack, &nodestyle);
+    //printf("nodestyle ruby under: %d\n", nodestyle.ruby_under);
+    //struct ass_style *tmps = (struct ass_style*)stack_top(arp->style_stack);
+    //printf("stacktop ruby under: %d\n", tmps->ruby_under);
+
+    for (int i = 0; node->childs && i < node->childs->e_idx; i++) {
+        struct vtt_node *cn = dyna_elem(node->childs, i);
+
+        ass_ruby_text_and_parts(cn, arp);
+    }
+
+    stack_pop(arp->style_stack);
+
+#if 0
+    if (node->type == VNODE_RUBY) {
+        /* This node was a ruby text, mark the last elem in parts as ruby */
+        assert(*parts_len > 0);
+        parts[*parts_len - 1].is_ruby = true;
+    }
+#endif
+
+    return;
+}
+
+#if 0
+static void ass_ruby_calc_box(const struct cue *c, const struct ass_cue_pos *olpos, int rubyfs,
+        int *ruby_count, struct ass_ruby rubys[MAX_RUBY_IN_LINE], struct text_extents *out_full_ext)
+{
+    char txt[1024];
+    struct ass_parts parts[MAX_RUBY_IN_LINE];
+    struct text_extents full_ext;
+    struct text_extents line_exts[MAX_RUBY_IN_LINE];
+    struct ass_collect_ruby_priv collpriv = { 0 };
+    int parts_len = 0;
+    int line_exts_len = 0;
+    size_t consumed = 0;
+    float x_offset_fact = 0.5f; /* def for center */
+    *ruby_count = 0;
+
+    consumed = ctxt_text(c->text_node, sizeof(txt), txt);
+    assert(consumed < sizeof(txt));
+    util_get_text_extents_lines(NULL, txt, olpos->fs, MAX_RUBY_IN_LINE, line_exts, &line_exts_len);
+    util_combine_extents(line_exts_len, line_exts, &full_ext);
+
+    ass_ruby_split_to_parts(c->text_node, &collpriv, MAX_RUBY_IN_LINE, parts, &parts_len);
+    assert(parts_len < MAX_RUBY_IN_LINE);
+
+    if (IS_ASS_ALIGN_RIGHT(olpos->align))
+        x_offset_fact = 1.0f;
+    else if (IS_ASS_ALIGN_LEFT(olpos->align))
+        x_offset_fact = 0.0f;
+
+    for (int i = 0; i < parts_len; i++) {
+        struct text_extents before_ext;
+        struct text_extents ex = {0};
+
+        util_get_text_extents_line(NULL, txt, collpriv.consumed, parts[i].line_start_off, (parts[i].start_off - parts[i].line_start_off), olpos->fs, &before_ext);
+        util_get_text_extents_line(NULL, txt, collpriv.consumed, parts[i].start_off, parts[i].len, olpos->fs, &ex);
+        printf("Before: '%.*s'\n", (int)(parts[i].start_off - parts[i].line_start_off), txt + parts[i].line_start_off);
+        printf("Ruby: '%.*s'\n", (int)(parts[i].len), txt + parts[i].start_off);
+        printf("Ruby text: '%s'\n", parts[i].rubytext);
+
+        int width_diff = (full_ext.width - line_exts[parts[i].line].width) * x_offset_fact;
+        rubys[i].base_box.left = before_ext.width + width_diff;
+        rubys[i].base_box.top = (full_ext.height / line_exts_len) * parts[i].line;
+        rubys[i].base_box.width = ex.width;
+        rubys[i].base_box.height = ex.height;
+        rubys[i].rubytext = parts[i].rubytext;
+        util_get_text_extents(NULL, txt, rubyfs, &rubys[i].rubytext_ext);
+    }
+
+    *ruby_count = parts_len;
+    *out_full_ext = full_ext;
+}
+#endif
+
+static void ass_ruby_calc_parts_extents(struct ass_ruby_params *arp)
+{
+    const struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+    for (int i = 0; i < arp->parts_count; i++) {
+        te_get_at(&arp->base_te_obj, arp->parts[i].start_off, arp->parts[i].len, style->fsp, &arp->parts[i].extents);
+
+        if (arp->parts[i].is_ruby) {
+            te_simple(arp->ap->fontpath, arp->parts[i].ruby.rubytext, arp->ruby_fs, style->fsp, false, &arp->parts[i].ruby.extents);
+        }
+    }
+}
+
+static void ass_ruby_mark_resize(struct ass_ruby_params *arp)
+{
+    for (int i = 0; i < arp->parts_count; i++) {
+        if (arp->parts[i].is_ruby == false)
+            continue;
+
+        int diff = arp->parts[i].extents.width - arp->parts[i].ruby.extents.width;
+        if (diff > 0) {
+            arp->parts[i].ruby.resize = true;
+            //arp->parts[i].ruby.extents = arp->parts[i].extents;
+        } else if (diff < 0) {
+            arp->parts[i].resize = true;
+            //arp->parts[i].extents = arp->parts[i].ruby.extents;
+        }
+    }
+}
+
+static void ass_ruby_calc_line_exts(struct ass_ruby_params *arp)
+{
+    int sum_w = 0, sum_h = 0, idx = 0;
+    for (int i = 0; i < arp->parts_count; i++) {
+        assert(idx < ARRSIZE(arp->line_ext));
+
+        const struct text_extents *calc_ext = &arp->parts[i].extents;
+        if (arp->parts[i].resize) {
+            /* Use the size of the ruby text, because this will be resized */
+            calc_ext = &arp->parts[i].ruby.extents;
+        }
+
+        if (i > 0 && arp->parts[i - 1].line != arp->parts[i].line) {
+            memset(&arp->line_ext[idx], 0, sizeof(arp->line_ext[idx]));
+
+            arp->line_ext[idx].width = sum_w;
+            arp->line_ext[idx].height = sum_h;
+            idx++;
+            sum_w = sum_h = 0;
+        }
+
+        /* again, horizontal writing only */
+        sum_w += calc_ext->width;
+        sum_h = MAX(arp->parts[i].extents.height, sum_h);
+
+    }
+
+    assert(sum_w && sum_h);
+    assert(idx < ARRSIZE(arp->line_ext));
+    memset(&arp->line_ext[idx], 0, sizeof(arp->line_ext[idx]));
+
+    arp->line_ext[idx].width = sum_w;
+    arp->line_ext[idx].height = sum_h;
+    idx++;
+    arp->line_ext_count = idx;
+}
+
+/* space is - before the 1st char
+ *          - between each char
+ *          - after the last char
+ */
+static float ass_ruby_calc_justify_amount(
+        int char_exts_count, const struct text_extents char_exts[char_exts_count],
+        const struct text_extents *fspace)
+{
+    /* horiz only */
+    int space_width;
+    int sum_width = 0;
+    for (int i = 0; i < char_exts_count; i++) {
+        sum_width += char_exts[i].width;
+    }
+
+    space_width = fspace->width - sum_width;
+
+    return space_width / (float)(char_exts_count + 1);
+}
+
+/* Calculates the space between characters to fill up the given space with \fsp tags
+ * target > current
+ * space is - before the 1st char (if outer is true)
+ *          - between each char
+ *          - after the last char (if outer is true)
+ */
+static float calc_fsp_amount(const char *text, int text_len, bool outer, float def_spacing, const struct text_extents *current, const struct text_extents *target)
+{
+    /* Not the best impl, but will do for now */
+    int ccount = util_utf8_ccount(text_len, text);
+    /* The widths include the fsp as well, remove that here */
+    float diff = target->width - (current->width - ccount * def_spacing);
+    /* Technically, if outer is false, there will be a padding on the right side still */
+    if (outer)
+        return diff / (ccount + 1);
+    else
+        return diff / (ccount);
+}
+
+static void render_rubytext(const struct cue *c, struct ass_ruby_params *arp,
+        int cursor_x, int cursor_y, struct ass_parts *part,
+        const struct ass_cue_pos *an7pos, const struct text_extents *full_ext)
+{
+    char text[1024];
+    int text_len = 0;
+    struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+    struct ass_node anode = {
+        .layer = 4,
+        .start_ms = c->time_start,
+        .end_ms = c->time_end,
+        .style = style,
+    };
+
+    int align = 1;
+    int x_off = 0;
+    int y_off = 0;
+    if (part->ruby.resize)
+        x_off += part->ruby.fsp;
+    if (part->ruby.under) {
+        align = 7;
+        y_off += part->extents.height;
+    }
+
+    text_len += snprintf(text, sizeof(text),
+            "{\\an%d\\pos(%d,%d)\\fs%d",
+            align, an7pos->posx + cursor_x + x_off, an7pos->posy + cursor_y + y_off,
+            arp->ruby_fs);
+    assert(text_len < sizeof(text));
+
+    if (part->ruby.resize) {
+        text_len += snprintf(text + text_len, sizeof(text) - text_len,
+                "\\fsp%g", part->ruby.fsp + 0);
+        assert(text_len < sizeof(text));
+    }
+
+    text_len += snprintf(text + text_len, sizeof(text) - text_len,
+            "}%s", part->ruby.rubytext);
+    assert(text_len < sizeof(text));
+
+    anode.text = strdup(text);
+    dyna_append(arp->ap->ass_nodes, &anode);
+
+#if DEBUG_RUBYBOX == 1
+    if (opts_ass_debug_boxes) {
+        struct ass_cue_pos boxpos = {
+            .posx = an7pos->posx + cursor_x,
+            .posy = an7pos->posy + cursor_y,
+        };
+        
+        struct text_extents *adv_ext = &part->extents;
+        if (part->resize)
+            adv_ext = &part->ruby.extents;
+        struct cuepos_box box = {
+            .width = adv_ext->width,
+            .height = part->extents.height,
+        };
+        ass_append_box(c, &boxpos, &box, "0BE57F", (struct ass_params*)arp->ap);
+    }
+#endif
+}
+
+#if 0
+static void set_part_inline_tags(const struct cue *c, int cursor_x, int cursor_y, const struct ass_cue_pos *an7pos,
+        struct ass_ruby_params *arp, int text_len, const char text[text_len],
+        struct ass_parts *part, struct dyna *ass_nodes)
+{
+    if (part->is_ruby == false) {
+        return;
+        /* Nothing to do for non-ruby parts for now */
+    }
+
+    char ntext[1024];
+    struct text_extents char_exts[128];
+    int ntext_len = 0, char_exts_count = 0;
+    int len, coff, fs, align;
+    const char *cbase;
+    mbstate_t mbstate = {0};
+    float space;
+    int space_cursor = 0;
+    struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+    struct ass_node anode = {
+        .layer = 4,
+        .start_ms = c->time_start,
+        .end_ms = c->time_end,
+        .style = style,
+    };
+
+    /* Simply add the non-resized part without resizing */
+    if (part->resize) {
+        /* Ruby base is larger */
+        //int r = asprintf(&anode.text, "{\\an1\\fs%d\\pos(%d,%d)}%s",
+                //arp->ruby_fs, an7pos->posx + cursor_x, an7pos->posy + cursor_y, part->ruby.rubytext);
+        //assert(r != -1);
+
+        /* Calculate text extents so that it will fill the given space. x_off is used here */
+        //te_get_at_chars_justify(&arp->base_te_obj, part->start_off, part->len, part->ruby.extents.width, ARRSIZE(char_exts), char_exts, &char_exts_count);
+
+        //render_resized_part(c, part->len, text + part->start_off, an7pos->fs, 0x69, an7pos->posx + cursor_x, an7pos->posy + cursor_y,
+                //7, char_exts_count, char_exts, ass_nodes);
+        float fsp = calc_fsp_amount(text + part->start_off, part->len, &part->extents, &part->ruby.extents);
+        fsp += style->fsp; /* This spacing is in addition to the one definited by style */
+        part->inline_tags.fsp = fsp;
+        part->inline_tags.fsp_set = true;
+
+    } else if (part->ruby.resize) {
+        /* Ruby text is larger */
+        //int r = asprintf(&anode.text, "{\\an7\\fs%d\\pos(%d,%d)}%.*s",
+                //an7pos->fs, an7pos->posx + cursor_x, an7pos->posy + cursor_y, (int)part->len, text + part->start_off);
+        //assert(r != -1);
+
+        size_t len = strlen(part->ruby.rubytext);
+
+        /* Calculate text extents so that it will fill the given space. x_off is used here */
+        //te_simple_justify_chars(arp->ap->fontpath, part->ruby.rubytext, arp->ruby_fs, part->extents.width, ARRSIZE(char_exts), char_exts, &char_exts_count);
+
+        //render_resized_part(c, len, part->ruby.rubytext, arp->ruby_fs, 0x69, an7pos->posx + cursor_x, an7pos->posy + cursor_y,
+                //1, char_exts_count, char_exts, ass_nodes);
+
+        float fsp = calc_fsp_amount(part->ruby.rubytext, len, &part->ruby.extents, &part->extents);
+        fsp += style->fsp; /* This spacing is in addition to the one definited by style */
+        part->ruby.inline_tags.fsp = fsp;
+        part->ruby.inline_tags.fsp_set = true;
+    } else {
+        //assert(false && "None of the parts are resized????");
+    }
+    //dyna_append(ass_nodes, &anode);
+
+}
+#endif
+
+static void ass_ruby_render_parts(const struct cue *c, struct ass_ruby_params *arp)
+{
+    struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+    struct ass_node anode = {
+        .layer = 4,
+        .start_ms = c->time_start,
+        .end_ms = c->time_end,
+        .style = style,
+    };
+    struct text_extents full_ext = {0};
+    struct ass_cue_pos an7pos = {0};
+    bool space_1st = false;
+    char text[1024 * 4];
+    int text_len = 0;
+    int line_height = arp->olpos->fs;
+
+    /* Calculate the larger bounding box from the bounding box of each line */
+    util_combine_extents(arp->line_ext_count, arp->line_ext, &full_ext);
+
+    /* Convert \anx position to \an7 pos */
+    util_cue_pos_to_an7(arp->olpos, &full_ext, &an7pos);
+    //an7pos.posy -= olpos->fs * 2;
+
+#if DEBUG_RUBYBOX == 1
+    if (opts_ass_debug_boxes) {
+        struct cuepos_box box = {
+            .width = full_ext.width,
+            .height = full_ext.height,
+        };
+        ass_append_box(c, &an7pos, &box, "FC19DA", (struct ass_params*)arp->ap);
+    }
+#endif
+
+    /* Set tags for the rest of the base line*/
+    text_len += sprintf(text, "{\\an%d\\pos(%d,%d)\\fs%d}",
+            arp->olpos->align, arp->olpos->posx, arp->olpos->posy, arp->olpos->fs);
+
+    float x_offset_fact = 0.5f;
+    if (IS_ASS_ALIGN_RIGHT(arp->olpos->align))
+        x_offset_fact = 1.0f;
+    else if (IS_ASS_ALIGN_LEFT(arp->olpos->align))
+        x_offset_fact = 0.0f;
+
+    int cursor_x = 0, cursor_y = 0;
+    /* Render base text */
+    for (int i = 0; i < arp->parts_count; i++) {
+        struct ass_parts *part = &arp->parts[i];
+
+        if (i > 0 && arp->parts[i - 1].line != part->line) {
+            /* Line switch, append a \N */
+            text_len += snprintf(text + text_len, sizeof(text) - text_len,
+                    "%s", "\\N");
+            assert(text_len < sizeof(text));
+            cursor_x = 0;
+            cursor_y += arp->line_ext[arp->parts[i - 1].line].height;
+        }
+
+        int last_char_off = part->len;
+        if (part->has_last_char_fsp) {
+            const char *str = arp->text + part->start_off;
+            for (int i = part->len - 1; i >= 0; i--) {
+                if (util_is_utf8_start(str[i])) {
+                    last_char_off = i;
+                    break;
+                }
+            }
+        }
+
+        float fsp = style->fsp;
+        if (part->resize) {
+            fsp = part->fsp;
+        }
+
+        /* Write inline tags for this part */
+        text_len += style_to_inline_tags(&part->inline_tags, sizeof(text) - text_len, text + text_len);
+        assert(text_len < sizeof(text));
+
+        /* Render fsp align + actual text */
+        const char *ps_start = arp->text + part->start_off;
+        text_len += snprintf(text + text_len, sizeof(text) - text_len,
+                "{\\fsp%g}%.*s", fsp, last_char_off, arp->text + part->start_off);
+        assert(text_len < sizeof(text));
+
+        if (part->has_last_char_fsp) {
+            text_len += snprintf(text + text_len, sizeof(text) - text_len,
+                    "{\\fsp%g}%.*s", part->last_char_fsp, (int)(part->len - last_char_off), arp->text + part->start_off + last_char_off);
+            assert(text_len < sizeof(text));
+        }
+
+        if (part->is_ruby) {
+            int line_diff = full_ext.width - arp->line_ext[part->line].width;
+            /* Render ruby text */
+            render_rubytext(c, arp, cursor_x + line_diff * x_offset_fact, cursor_y, part, &an7pos, &full_ext);
+        }
+
+        //if (part->has_last_char_fsp)
+            //cursor_x -= part->last_char_fsp;
+
+        if (part->resize)
+            cursor_x += part->ruby.extents.width;
+        else
+            cursor_x += part->extents.width;
+    }
+    anode.text = strdup(text);
+    dyna_append(arp->ap->ass_nodes, &anode);
+
+}
+
+#if 0
+static void split_part_on_last_char(struct ass_ruby_params *arp, int part_idx)
+{
+    struct ass_parts *part = &arp->parts[part_idx];
+
+    assert(part->len > 1);
+    const char *str = arp->text + part->start_off;
+
+    int split_off = -1;
+    for (int i = arp->len - 1; i >= 0; i--) {
+        if (util_is_utf8_start(str[i])) {
+            /* Last UTF8 character position, split here */
+            split_off = i;
+            break;
+        }
+    }
+    assert(split_off != -1);
+
+    struct ass_parts newp = *part;
+
+    for (int j = arp->parts[i].len - 1; j >= 0; j--) {
+        char ch = arp->text[arp->parts[i].start_off + j];
+        if ((ch & 0xC0) != 0x80) {
+            /* This is the starting char, mark it */
+            last_char_len = arp->parts[i].len - j;
+            break;
+        }
+    }
+}
+#endif
+
+static void ass_ruby_align_resized(struct ass_ruby_params *arp)
+{
+    struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+    for (int i = 0; i < arp->parts_count; i++) {
+        struct ass_parts *part = &arp->parts[i];
+
+        if (part->resize) {
+            /* Only do this if this is not the 1st part */
+            bool outer_fsp = i > 0;
+            float fsp = calc_fsp_amount(arp->text + part->start_off, part->len, outer_fsp,
+                    style->fsp, &part->extents, &part->ruby.extents);
+            part->fsp = fsp;
+            if (outer_fsp) {
+                float def_fsp = style->fsp;
+                if (arp->parts[i - 1].resize)
+                    def_fsp = arp->parts[i - 1].fsp;
+                arp->parts[i - 1].last_char_fsp = def_fsp + fsp;
+                arp->parts[i - 1].has_last_char_fsp = true;
+            }
+        } else if (part->ruby.resize) {
+            /* On ruby texts, we can always do outer spacing, because that's just a position addition */
+            float fsp = calc_fsp_amount(part->ruby.rubytext, strlen(part->ruby.rubytext), true,
+                    style->fsp, &part->ruby.extents, &part->extents);
+            part->ruby.fsp = fsp;
+        }
+    }
+}
+
+#if 0
+// TODO: this is wrong, should apply each inline tag separatly in redner_parts 
+static void fill_tags_text(const struct cue *c, struct ass_ruby_params *arp)
+{
+    struct ass_style *style = ass_styles_get(arp->ap->styles, "Default");
+
+    stack_init(style_stack, sizeof(struct ass_style), 30);
+    stack_push(&style_stack, style);
+
+    arp->tags_text_len = ass_text_collect_tags_and_escape(c->text_node, sizeof(arp->tags_text), arp->tags_text, &style_stack, NULL, arp->ap);
+    assert(arp->tags_text_len < sizeof(arp->tags_text));
+}
+#endif
+
+void ass_ruby_write(const struct cue *c, const struct ass_cue_pos *olpos, const struct ass_params *ap)
+{
+    struct ass_style *style = ass_styles_get(ap->styles, "Default");
+
+    stack_init(style_stack, sizeof(struct ass_style), 30);
+    stack_push(&style_stack, style);
+    /* Struct to hold all info about the ruby rendering process,
+     * because otherwise there are too many arguments to functions lol */
+    struct ass_ruby_params arp = {
+        .base_fs = olpos->fs,
+        .ruby_fs = (int)(olpos->fs * 0.55f),
+        .olpos = olpos,
+        .ap = ap,
+        .style_stack = &style_stack,
+    };
+
+    /* Copy the full raw text, and split the text into parts based on ruby and newlines
+     * Fills: arp.text, arp.parts */
+    ass_ruby_text_and_parts(c->text_node, &arp);
+
+    /* Setup object for calculating text extents later */
+    te_create_obj(ap->fontpath, arp.text, arp.text_len, arp.base_fs, false, &arp.base_te_obj);
+
+    /* Calculate text extents for each part, as they are, without any resizing
+     * Fills: arp.parts and .ruby .extents */
+    ass_ruby_calc_parts_extents(&arp);
+
+#if 1
+    for (int i = 0; i < arp.parts_count; i++) {
+        printf("Part: [%d%s] '%.*s'\n", arp.parts[i].line, (char*[]){"       ", " - RUBY"}[arp.parts[i].is_ruby], (int)arp.parts[i].len, arp.text + arp.parts[i].start_off);
+        if (arp.parts[i].is_ruby) {
+            printf("  ^\\- Ruby text: %s\n", arp.parts[i].ruby.rubytext);
+        }
+    }
+    printf("\n");
+#endif
+
+    /* Either mark the ruby base, or the ruby text as the one to be resized
+     * depending on whichever is larger
+     * Fills: arp.parts and .ruby .resize */
+    ass_ruby_mark_resize(&arp);
+
+    /* Calculate text extents for each line, with resized sizes
+     * Fills arp.line_ext */
+    ass_ruby_calc_line_exts(&arp);
+
+    /* Center the resized parts
+     * Fills arp.part and .ruby .fsp and last_char_fsp */
+    ass_ruby_align_resized(&arp);
+
+    /* Render base and ruby text */
+    ass_ruby_render_parts(c, &arp);
+
+#if 0
+    text_len = ass_draw_box(sizeof(text), text, &an7pos, &rb_box, "D63E73");
+    assert(text_len < sizeof(text));
+    anode.text = strdup(text);
+    dyna_append(ass_nodes, &anode);
+#endif
+
+#if 0
+    for (int i = 0; i < ruby_count; i++) {
+        text_len = snprintf(text, sizeof(text), "{\\an2\\fs%d\\pos(%d,%d)}%s", (int)(olpos->fs * 0.55f),
+                an7pos.posx + rubys[i].base_box.left + rubys[i].base_box.width / 2, an7pos.posy + rubys[i].base_box.top, rubys[i].rubytext);
+        assert(text_len < sizeof(text));
+        anode.text = strdup(text);
+        dyna_append(ass_nodes, &anode);
+    }
+#endif
+    
+    te_destroy_obj(&arp.base_te_obj);
+}
+
--- a/src/ass_ruby.h
+++ b/src/ass_ruby.h
@ -0,0 +1,11 @@
+#ifndef _VTT2ASS_ASS_RUBY_H
+#define _VTT2ASS_ASS_RUBY_H
+#include "dyna.h"
+#include "ass.h"
+#include "cuetext.h"
+#include "cuepos.h"
+
+
+void ass_ruby_write(const struct cue *c, const struct ass_cue_pos *olpos, const struct ass_params *ap);
+
+#endif /* _VTT2ASS_ASS_RUBY_H */
--- a/src/ass_style.c
+++ b/src/ass_style.c
@ -0,0 +1,51 @@
+#include "ass_style.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <byteswap.h>
+#include <stdio.h>
+
+#include "util.h"
+
+void ass_style_free(struct ass_style *s)
+{
+    SAFE_FREE(s->name);
+    SAFE_FREE(s->fontpath);
+    //memset(style, 0, sizeof(*style));
+}
+
+struct dyna *ass_styles_create()
+{
+    struct dyna *styles = dyna_create_size_flags(sizeof(struct ass_style), 4, DYNAFLAG_HEAPCOPY);
+    dyna_set_free_fn(styles, (dyna_free_fn)ass_style_free);
+    return styles;
+}
+
+void ass_styles_destroy(struct dyna *ds)
+{
+    dyna_destroy(ds);
+}
+
+struct ass_style *ass_styles_get(const struct dyna *styles, const char *name)
+{
+    for (int i = 0; i < styles->e_idx; i++) {
+        struct ass_style *s = dyna_elem(styles, i);
+        if (strcmp(s->name, name) == 0)
+            return s;
+    }
+    return NULL;
+}
+
+struct ass_style *ass_styles_add(struct dyna *styles, const char *name)
+{
+    struct ass_style *s = dyna_emplace(styles);
+    memset(s, 0, sizeof(*s));
+    s->name = strdup(name);
+    return s;
+}
+
+int ass_style_rgb_to_str(uint32_t rgb, char out[12])
+{
+    uint32_t bgr = bswap_32(rgb);
+    return snprintf(out, 12, "&H%08X", bgr);
+}
--- a/src/ass_style.h
+++ b/src/ass_style.h
@ -0,0 +1,46 @@
+#ifndef _VTT2ASS_ASS_STYLE_H
+#define _VTT2ASS_ASS_STYLE_H
+#include "dyna.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct ass_style {
+    char *name; /* free */
+    char *fontpath; /* free */
+    float frz;
+    float fsp; /* font spacing */
+    float bord;
+    float shad;
+    bool italic : 1;
+    bool bold : 1;
+    bool underline : 1;
+
+    /* not really ass style members, but it's easier to add it here */
+    bool ruby_under : 1;
+
+    uint32_t bord_color; /* in rgb */
+
+    /* These are only used in this is used as an inline tag
+     * for normal styles, all is assumed to be used */
+    bool fsp_set : 1;
+    bool bord_set : 1;
+    bool bord_color_set : 1;
+    bool italic_set : 1;
+    bool bold_set : 1;
+    bool underline_set : 1;
+
+    /* set fields for extra elements */
+    bool ruby_under_set : 1;
+};
+
+struct dyna *ass_styles_create();
+void ass_styles_destroy(struct dyna *ds);
+void ass_style_free(struct ass_style *style);
+
+struct ass_style *ass_styles_get(const struct dyna *styles, const char *name);
+struct ass_style *ass_styles_add(struct dyna *styles, const char *name);
+
+int ass_style_rgb_to_str(uint32_t rgb, char out[12]);
+
+#endif /* _VTT2ASS_ASS_STYLE_H */
--- a/src/cuepos.c
+++ b/src/cuepos.c
@ -0,0 +1,348 @@
+#include "cuepos.h"
+
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/param.h>
+
+const struct video_info *vinf = NULL;
+
+void cuepos_set_video_info(const struct video_info *vi)
+{
+    vinf = vi;
+}
+
+/* https://www.w3.org/TR/webvtt1/#cue-computed-position */
+static float cuepos_compute_pos(const struct cue *c)
+{
+    if (!IS_AUTO(c->position)) {
+        return c->position;
+    }
+
+    switch (c->text_align) {
+    case TEXT_ALIGN_START: // own
+    case TEXT_ALIGN_LEFT:
+        return 0;
+    case TEXT_ALIGN_END: // own
+    case TEXT_ALIGN_RIGHT:
+        return 1;
+    default:
+        return 0.5;
+    }
+}
+
+/* https://www.w3.org/TR/webvtt1/#cue-computed-line */
+static float cuepos_compute_line(const struct cue *c)
+{
+    // 1
+    if (IS_AUTO(c->line) == false && c->snap_to_lines == false && (c->line < 0.0f || c->line > 1.0f)) {
+        return 1.0f;
+    }
+
+    // 2
+    if (IS_AUTO(c->line) == false) {
+        return c->line;
+    }
+
+    // 3
+    if (c->snap_to_lines == false)
+        return 1.0f;
+
+    //assert(0 && "snap to line thing is half-baked lol");
+    // This would basically return the integer number of lines currently showing
+    return -1;
+}
+
+/* https://www.w3.org/TR/webvtt1/#webvtt-cue-position-alignment */
+enum cue_pos_align cuepos_compute_pos_align(const struct cue *c)
+{
+    if (c->pos_align != POS_ALIGN_AUTO) {
+        return c->pos_align;
+    }
+
+    switch (c->text_align) {
+        case TEXT_ALIGN_LEFT:
+            return POS_ALIGN_LINE_LEFT;
+        case TEXT_ALIGN_RIGHT:
+            return POS_ALIGN_LINE_RIGHT;
+        case TEXT_ALIGN_START:
+            return (c->base_direction == BDIR_LTR) ? POS_ALIGN_LINE_LEFT : POS_ALIGN_LINE_RIGHT;
+        case TEXT_ALIGN_END:
+            return (c->base_direction == BDIR_LTR) ? POS_ALIGN_LINE_RIGHT : POS_ALIGN_LINE_LEFT;
+    }
+    return POS_ALIGN_CENTER;
+}
+
+void cuepos_apply_cue_settings_mine(const struct cue *c, struct cuepos_box *op)
+{
+    memset(op, 0, sizeof(*op));
+
+    float size = -100;
+    float max_size = -100;
+    /* In screen coords */
+    float width = -100, height = -100;
+    float xpos = -100, ypos = -100;
+    /* In screen coords */
+    float left = -100, top = -100;
+    float full_dimension = -100;
+    float step = -100;
+
+    bool is_wdh = c->writing_direction == WD_HORIZONTAL;
+
+    enum cue_pos_align comp_pos_align = cuepos_compute_pos_align(c);
+
+    if (IS_AUTO(c->position))
+        left = 0;
+    else
+        left = c->position;
+
+    assert(fabs(c->line) <= 1.0);
+    top = 0;
+    if (IS_AUTO(c->line))
+        height = 1;
+    else
+        switch (c->line_align) {
+        case LINE_ALIGN_END:
+            height = c->line;
+            break;
+        case LINE_ALIGN_CENTER:
+            height = c->line * 2;
+            break;
+        default:
+        case LINE_ALIGN_START:
+            top = c->line;
+            height = 1 - top;
+            break;
+        }
+
+    switch (c->pos_align) {
+    case POS_ALIGN_LINE_RIGHT:
+        left = left - c->size;
+        break;
+    case POS_ALIGN_CENTER:
+        left = left - c->size / 2.0f;
+    default:
+    case POS_ALIGN_LINE_LEFT:
+        break;
+    }
+
+    op->left = left * vinf->width;
+    op->top = top * vinf->height;
+    op->width = c->size * vinf->width;
+    op->height = height * vinf->height;
+}
+
+/* https://www.w3.org/TR/webvtt1/#apply-webvtt-cue-settings */
+void cuepos_apply_cue_settings(const struct cue *c, struct cuepos_box *op)
+{
+    memset(op, 0, sizeof(*op));
+
+    /* top, left, width, height are the css box sizes */
+    float size = -100;
+    float max_size = -100;
+    /* In screen coords */
+    float width = -100, height = -100;
+    float xpos = -100, ypos = -100;
+    /* In screen coords */
+    float left = -100, top = -100;
+    float full_dimension = -100;
+    float step = -100;
+
+    bool is_wdh = c->writing_direction == WD_HORIZONTAL;
+
+    // 1
+
+    //asm("int $3");
+    enum cue_pos_align comp_pos_align = cuepos_compute_pos_align(c);
+    float comp_pos = cuepos_compute_pos(c);
+#if 0
+    printf("Comp pos: %f   %s\n", comp_pos, str_cue_text_align[c->text_align]);
+    printf("posalign: %s  comp posalign: %s - ", str_cue_pos_align[c->pos_align], str_cue_pos_align[comp_pos_align]);
+    ass_write_text(stdout, c->text_node);
+    printf("\n");
+#endif
+    switch (comp_pos_align) {
+    case POS_ALIGN_LINE_LEFT:
+        max_size = 1.0f - comp_pos;
+        break;
+    case POS_ALIGN_LINE_RIGHT:
+        max_size = comp_pos;
+        break;
+    case POS_ALIGN_CENTER:
+        if (comp_pos <= 0.5f)
+            max_size = comp_pos * 2;
+        else
+            max_size = (1.0f - comp_pos) * 2;
+        break;
+    default:
+        assert(0 && "Unknown comp_pos_align");
+    }
+
+    // 3
+    if (c->size < max_size)
+        size = c->size;
+    else
+        size = max_size;
+    //asm("int $3");
+
+    // 4
+    if (is_wdh) {
+        width = size * vinf->width;
+        height = CUE_AUTO;
+    } else {
+        width = CUE_AUTO;
+        height = size * vinf->height;
+    }
+
+    //asm("int $3");
+    // 5
+    float *xypos_ptr, *xypos_other_ptr;
+    if (is_wdh) {
+        xypos_ptr = &xpos;
+        xypos_other_ptr = &ypos;
+    } else {
+        xypos_ptr = &ypos;
+        xypos_other_ptr = &xpos;
+    }
+    switch (comp_pos_align) {
+    case POS_ALIGN_LINE_LEFT:
+        // xpos = 0.5
+        *xypos_ptr = comp_pos;
+        break;
+    case POS_ALIGN_CENTER:
+        *xypos_ptr = comp_pos - (size / 2.0f);
+        break;
+    case POS_ALIGN_LINE_RIGHT:
+        *xypos_ptr = (comp_pos - size);
+        break;
+    default:
+        assert(0 && "Not handled def case 1");
+    }
+
+    // 6
+    float comp_line = cuepos_compute_line(c);
+    //asm("int $3");
+    if (c->snap_to_lines == false) {
+        *xypos_other_ptr = comp_line;
+        if (c->line_align == LINE_ALIGN_START && is_wdh) // mine
+            *xypos_other_ptr = 1 - *xypos_other_ptr;
+    } else {
+        *xypos_other_ptr = 0;
+    }
+
+    // 7
+    left = xpos * vinf->width;
+    top = ypos * vinf->height;
+
+    if (is_wdh) {
+        if (IS_AUTO(width)) {
+            width = size * vinf->width;
+        }
+        //asm("int $3");
+        if (IS_AUTO(height)) {
+            // mine
+            if (c->line_align == LINE_ALIGN_START)
+                height = vinf->height - top;
+            else if (c->line_align == LINE_ALIGN_CENTER)
+                height = MIN(vinf->height - top, top) * 2;
+                //height = vinf->height / 2 - top;
+            else if (c->line_align == LINE_ALIGN_END)
+                height = top;
+
+            //height = vinf->height - (top * fact); // kinda mine as well
+            //printf("%f\n", top + height);
+        }
+    } else {
+        if (IS_AUTO(height)) {
+            assert(false);
+        }
+        //asm("int $3");
+        if (IS_AUTO(width)) {
+            // mine
+            if (c->line_align == LINE_ALIGN_START)
+                width = vinf->width - left;
+            else if (c->line_align == LINE_ALIGN_CENTER)
+                width = MIN(vinf->width - left, left) * 2;
+            else if (c->line_align == LINE_ALIGN_END)
+                width = left;
+
+            //height = vinf->height - (top * fact); // kinda mine as well
+            //printf("%f\n", top + height);
+        }
+    }
+
+    // 8
+    /* Obtain CSS boxes */
+
+    // 9
+    /* skipped */
+
+    // 10
+    if (c->snap_to_lines) {
+        full_dimension = (is_wdh) ? vinf->height : vinf->width;
+        /* not quite */
+        //step = (is_wdh) ? op->fs : op->fs/2.5f;
+        step = 0;
+        if (step == 0) {
+            goto done_pos;
+        }
+        /* We could do something like storing all visible
+         * lines and calculate this based on the sizes of those */
+        assert(0);
+
+    } else {
+        /* TODO: make this apply to all currently shown cues */
+        float *shiftptr = (is_wdh) ? &top : &left;
+        float *offptr = (is_wdh) ? &height : &width;
+        switch (c->line_align) {
+        case LINE_ALIGN_START: // mine
+            if (is_wdh) {
+                height = vinf->height - top;
+                top = 0;
+            } else if (c->writing_direction == WD_VERTICAL_GROW_RIGHT) {
+                width = vinf->width - left;
+                left = 0;
+            }
+            break;
+        case LINE_ALIGN_CENTER:
+            *shiftptr -= *offptr / 2;
+            break;
+        case LINE_ALIGN_END: // spec
+            *shiftptr -= *offptr;
+            break;
+        }
+
+        /* Skip overlap fixing here */
+    }
+
+done_pos:
+    /*
+    obox->size = size;
+    obox->width = width;
+    obox->height = height;
+    obox->xpos = xpos;
+    obox->ypos = ypos;
+    obox->left = left;
+    obox->top = top;
+    obox->full_dimension = full_dimension;
+    */
+
+    /*
+    float l;
+    if (IS_AUTO(c->line)) {
+        l = 1;
+    } else {
+        l = 1 - c->line;
+    }
+    op->posy = vinf->height - ypos - vinf->height * l;
+    */
+
+    //BP;
+    op->left = left;
+    op->top = top;
+    op->width = width;
+    op->height = height;
+    //asm("int $3");
+    return;
+}
--- a/src/cuepos.h
+++ b/src/cuepos.h
@ -0,0 +1,21 @@
+#ifndef _VTT2ASS_CUEPOS_H
+#define _VTT2ASS_CUEPOS_H
+
+#include "parser.h"
+
+struct video_info {
+    int width, height;
+};
+extern const struct video_info *vinf;
+void cuepos_set_video_info(const struct video_info *vi);
+
+struct cuepos_box {
+    int left, top, width, height;
+};
+
+/* https://www.w3.org/TR/webvtt1/#apply-webvtt-cue-settings */
+void cuepos_apply_cue_settings(const struct cue *c, struct cuepos_box *op);
+
+enum cue_pos_align cuepos_compute_pos_align(const struct cue *c);
+
+#endif /* _VTT2ASS_CUEPOS_H */
--- a/src/cuestyle.c
+++ b/src/cuestyle.c
@ -0,0 +1,154 @@
+#include "cuestyle.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <regex.h>
+#include <assert.h>
+
+#include "tokenizer.h"
+#include "util.h"
+
+static void cuestyle_free(void *data)
+{
+    struct cue_style *cs = data;
+    SAFE_FREE(cs->selector);
+}
+
+static void parse_text_shadow(const char *val, struct cue_style *cs)
+{
+    regex_t reg;
+    regmatch_t regm[12];
+    int xpos, ypos;
+    char color[24];
+    int consumed;
+    int r, shad_count;
+    size_t val_off = 0;
+
+    r = regcomp(&reg, "(-?[[:digit:]]+)(px)? (-?[[:digit:]]+)(px)? ([^,]+),?", REG_EXTENDED);
+    assert(r == 0);
+    assert(reg.re_nsub < ARRSIZE(regm));
+
+    for (shad_count = 0; shad_count < ARRSIZE(cs->text_shadow); shad_count++) {
+        r = regexec(&reg, val + val_off, reg.re_nsub + 1, regm, 0);
+        assert(r == 0);
+
+        cs->text_shadow[shad_count].xpos = strtol(val + val_off + regm[1].rm_so, NULL, 10);
+        cs->text_shadow[shad_count].ypos = strtol(val + val_off + regm[3].rm_so, NULL, 10);
+        int colorlen = regm[5].rm_eo - regm[5].rm_so;
+        assert(colorlen < sizeof(cs->text_shadow[0].color));
+        memcpy(cs->text_shadow[shad_count].color, val + val_off + regm[5].rm_so, colorlen);
+        cs->text_shadow[shad_count].color[colorlen] = '\0';
+
+        val_off += regm[0].rm_eo;
+    }
+
+    cs->text_shadow_count = shad_count;
+
+    regfree(&reg);
+}
+
+static void parse_keyval(struct token *tok, struct cue_style *cs)
+{
+    const char *key = tok->style_keyval.key;
+    const char *val = tok->style_keyval.value;
+
+    if (strcmp(key, "ruby-position") == 0) {
+        if (strcmp(val, "under") == 0) {
+            cs->ruby_position = RUBYPOS_UNDER;
+        }
+    } else if (strcmp(key, "x-ttml-shear") == 0) {
+        cs->italic = true;
+    } else if (strcmp(key, "text-shadow") == 0) {
+        parse_text_shadow(val, cs);
+    }
+}
+
+static int parse_group(struct dyna *tokens, int tok_idx, struct dyna *styles)
+{
+#define ADVANCE() { \
+    i++; \
+    if (i >= tokens->e_idx) goto err; \
+    tok = dyna_elem(tokens, i); \
+}
+#define EXP(exp_token_type) if (tok->type != exp_token_type) { \
+    fprintf(stderr, "Unexpected token: %s  expected %s\n", tok_type2str(tok->type), tok_type2str(exp_token_type)); \
+    goto err; \
+}
+
+    int i = tok_idx;
+    struct token *tok = dyna_elem(tokens, i);
+    struct cue_style cs = {0};
+
+    EXP(TOK_STYLE_SELECTOR);
+    /* Move string instead of copy */
+    cs.selector = tok->style_selector.str;
+    tok->style_selector.str = NULL;
+
+    ADVANCE(); EXP(TOK_STYLE_OPEN_BRACE);
+
+    ADVANCE();
+    while (tok->type == TOK_STYLE_KEYVAL) {
+        parse_keyval(tok, &cs);
+        ADVANCE();
+    }
+    EXP(TOK_STYLE_CLOSE_BRACE);
+
+    dyna_append(styles, &cs);
+
+err:
+    return i;
+#undef ADVANCE
+#undef EXP
+}
+
+struct dyna *cuestyle_parse(struct dyna *tokens)
+{
+    struct dyna *styles = dyna_create_size(sizeof(struct cue_style), 4);
+    dyna_set_free_fn(styles, cuestyle_free);
+
+    for (int i = 0; i < tokens->e_idx; i++) {
+        struct token *tok = dyna_elem(tokens, i);
+
+        if (tok->type == TOK_STYLE_SELECTOR) {
+            i = parse_group(tokens, i, styles);
+            continue;
+        }
+
+        if (tok->type == TOK_TIMESTAMP) {
+            /* According to the spec, style tags are only valid before
+             * the first cue, so stop parsing there */
+            break;
+        }
+    }
+
+    if (styles->e_idx == 0) {
+        /* No styles definied, return NULL */
+        dyna_destroy(styles);
+        styles = NULL;
+    }
+
+    return styles;
+}
+
+int cuestyle_print(int o_text_size, char o_text[o_text_size], struct cue_style *cs)
+{
+    int r = snprintf(o_text, o_text_size,
+            "Selector: %s\nruby-position: %d\nitalic: %d\n",
+            cs->selector, cs->ruby_position, cs->italic);
+
+    return r;
+}
+
+const struct cue_style *cuestyle_get_by_selector(const struct dyna *styles, const char *selector)
+{
+    if (styles == NULL)
+        return NULL;
+
+    for (int i = 0; i < styles->e_idx; i++) {
+        const struct cue_style *cs = dyna_elem(styles, i);
+
+        if (cs->selector && strcmp(cs->selector, selector) == 0)
+            return cs;
+    }
+    return NULL;
+}
--- a/src/cuestyle.h
+++ b/src/cuestyle.h
@ -0,0 +1,29 @@
+#ifndef _VTT2ASS_CUESTYLE_H
+#define _VTT2ASS_CUESTYLE_H
+#include <stdbool.h>
+
+#include "dyna.h"
+
+struct cue_style {
+    char *selector; /* free */
+
+    enum {
+        RUBYPOS_UNSET = 0,
+        RUBYPOS_OVER,
+        RUBYPOS_UNDER,
+    } ruby_position;
+
+    struct {
+        int xpos, ypos;
+        char color[12];
+    } text_shadow[4];
+    int text_shadow_count;
+
+    bool italic : 1;
+};
+
+struct dyna *cuestyle_parse(struct dyna *tokens);
+const struct cue_style *cuestyle_get_by_selector(const struct dyna *styles, const char *selector);
+int cuestyle_print(int o_text_size, char o_text[o_text_size], struct cue_style *cs);
+
+#endif /* _VTT2ASS_CUESTYLE_H */
--- a/src/cuetext.c
+++ b/src/cuetext.c
@ -0,0 +1,616 @@
+#include "cuetext.h"
+
+#include <stddef.h>
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dyna.h"
+#include "util.h"
+
+#define SAFE_FREE(x) if (x) free(x);
+
+struct ctxt_class {
+    char *name;
+};
+
+#define x(n, ...) #n,
+static const char *ctxt_token_str_map[] = {
+    TEXT_TOKEN_DEF(x)
+};
+#undef x
+
+#define ex(n) #n,
+static const char *ctxt_node_type_str_map[] = {
+    VTT_NODE_TYPE_DEF(ex)
+};
+#undef ex
+
+static void ctxt_token_free(struct ctxt_token *tok)
+{
+    switch (tok->type) {
+        case TTOK_STRING:
+            SAFE_FREE(tok->ttok_string.value);
+            break;
+        case TTOK_TAG_START:
+            SAFE_FREE(tok->ttok_tag_start.tag_name);
+            if (tok->ttok_tag_start.classes)
+                dyna_destroy(tok->ttok_tag_start.classes);
+            SAFE_FREE(tok->ttok_tag_start.annotation);
+            break;
+        case TTOK_TAG_END:
+            SAFE_FREE(tok->ttok_tag_end.tag_name);
+            break;
+        case TTOK_TIMESTAMP:
+            break;
+    }
+}
+
+static void ctxt_print_node_inner(const struct vtt_node *node, int nest, int *n, char *out[*n])
+{
+    int wr = 0;
+#define CN() *n -= wr; if (*n <= 0) return; *out += wr;
+    for (int i = 0; i < nest && *n > 0; i++) {
+        **out = ' ';
+        (*out)++;
+        (*n)--;
+    }
+    CN();
+
+    wr = snprintf(*out, *n, "%s [", ctxt_node_type_str_map[node->type]);
+    CN();
+    for (int i = 0; node->class_names && i < node->class_names->e_idx; i++) {
+        char **cn = dyna_elem(node->class_names, i);
+        wr = snprintf(*out, *n, ".%s", *cn);
+        CN();
+    }
+    wr = snprintf(*out, *n, "] ");
+    CN();
+    if (node->parent) {
+        wr = snprintf(*out, *n, " p: %s ", ctxt_node_type_str_map[node->parent->type]);
+        CN();
+    }
+    if (node->type == VNODE_TEXT) {
+        wr = snprintf(*out, *n, " .text: %s", node->text);
+        CN();
+    } else if (node->type == VNODE_VOICE) {
+        wr = snprintf(*out, *n, " .annotation: %s", node->annotation);
+        CN();
+    }
+    wr = snprintf(*out, *n, "\n");
+    CN();
+    for (int i = 0; (node->type != VNODE_TEXT && node->type != VNODE_TIMESTAMP) && node->childs && i < node->childs->e_idx; i++) {
+        const struct vtt_node *cn = dyna_elem(node->childs, i);
+        ctxt_print_node_inner(cn, nest + 4, n, out);
+    }
+#undef CN
+}
+
+void ctxt_print_node(const struct vtt_node *root, int n, char out[n])
+{
+    assert(root->type == VNODE_ROOT);
+    int cn = n;
+    char *co = out;
+    ctxt_print_node_inner(root, 0, &cn, &co);
+
+#if 0
+    printf("%s\n", ctxt_node_type_str_map[root->type]);
+    printf("clen: %ld\n", root->childs->e_idx);
+    struct vtt_node *n = dyna_elem(root->childs, 0);
+    printf("    %s\n", ctxt_node_type_str_map[n->type]);
+    printf("    nchilds %p\n", n->childs);
+#endif
+}
+
+static void ctxt_normalize_annotation_str(int *pbufi, char buffer[*pbufi])
+{
+    int bufi = *pbufi;
+    /* This needs some testing */
+    int i;
+    for (i = 0; i < bufi; i++) {
+        /* not techinally valid, but will do for now */
+        if (!isspace(buffer[i]))
+            break;
+    }
+    /* remove leading whitespace */
+    if (i > 0) {
+        bufi -= i;
+        memmove(buffer, &buffer[i], bufi);
+    }
+
+    for (i = 0; i < bufi; i++) {
+        if (isspace(buffer[i])) {
+            int j;
+            for (j = i + 1; j < bufi; j++) {
+                if (!isspace(buffer[j])) {
+                    break;
+                }
+            }
+            if (j - i > 1) {
+                buffer[i] = ' ';
+                memmove(&buffer[i+1], &buffer[j], bufi - j);
+                bufi -= (j - i);
+                i = j;
+            }
+        }
+    }
+
+    *pbufi = bufi;
+}
+
+enum ctxt_states {
+    STATE_DATA,
+    STATE_TAG,
+    STATE_START_TAG_CLASS,
+    STATE_END_TAG,
+    STATE_START_TAG,
+    STATE_TAG_ANNOTATION,
+    STATE_TIMESTAMP,
+    STATE_HTML_CHAR_REF_IN_DATA_STATE,
+};
+
+// https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
+struct html_charref {
+    const char *name, *chars;
+};
+static const struct html_charref html_charrefs[] = {
+    //{ .name = "lrm", .chars = "\u200E" },
+    { .name = "lrm", .chars = "" }, /* Just strip it out for now */
+};
+
+const struct html_charref *find_html_charref(const char *name)
+{
+    for (int i = 0; i < ARRSIZE(html_charrefs); i++) {
+        const struct html_charref *cr = &html_charrefs[i];
+        if (strcmp(cr->name, name) == 0) {
+            return cr;
+        }
+    }
+    return NULL;
+}
+
+/* Returns the characters advanced - 1
+ * out_buff, is the output buffer for charaters
+ * out_buff_idx is the index into out_buff (this will be advanced)
+ * out_buff_size is the max size of it */
+int read_html_character_references(const char *txt, char *out_buff, int *out_buff_idx, size_t out_buff_size)
+{
+#define MAX_CHARREF_SIZE 16
+    char buff[MAX_CHARREF_SIZE];
+    int buffi = 0;
+    const struct html_charref *htmlchar;
+
+    for (int i = 0; i < sizeof(buff); i++) {
+        char c = txt[i];
+
+        switch (c) {
+        case ';':
+            buff[buffi] = '\0';
+            htmlchar = find_html_charref(buff);
+            if (htmlchar == NULL) {
+                /* Don't add anything if not found, but consume the escape sequence */
+                fprintf(stderr, "[Warning] HTML escape character not found with name '%s' skipping...\n", buff);
+                return buffi;
+            }
+            /* If found, copy it into the output buffer */
+            strcpy(out_buff + *out_buff_idx, htmlchar->chars);
+            *out_buff_idx += strlen(htmlchar->chars);
+            assert(*out_buff_idx < out_buff_size);
+            return buffi;
+        default:
+            buff[buffi++] = c;
+        }
+    }
+
+    //printf("smth: %16s\n", txt);
+    assert(buffi < sizeof(buff));
+
+    return buffi;
+}
+
+struct dyna *ctxt_tokenize(const char *txt)
+{
+    char result[1024] = {0};
+    int resi = 0;
+    char buffer[1024] = {0};
+    int bufi = 0;
+    struct dyna *classes = dyna_create_size(sizeof(char*), 4);
+    dyna_set_free_fn(classes, deref_free);
+
+    struct dyna *tokens = dyna_create_size_flags(sizeof(struct ctxt_token), 6, DYNAFLAG_HEAPCOPY);
+    dyna_set_free_fn(tokens, (dyna_free_fn)ctxt_token_free);
+
+    struct ctxt_token tok = {0};
+
+    enum ctxt_states state = STATE_DATA;
+
+    for (;;) {
+        char c = *txt;
+
+        switch (state) {
+
+        case STATE_DATA:
+            switch (c) {
+            case '<':
+                if (resi == 0) {
+                    state = STATE_TAG;
+                    goto next;
+                }
+                txt--;
+            /* Fall */
+            case '\0':
+                if (resi > 0) {
+                    tok.type = TTOK_STRING;
+                    tok.ttok_string.value = strndup(result, resi);
+                    dyna_append(tokens, &tok);
+                    resi = 0;
+                }
+                goto next;
+                break;
+            case '&':
+                state = STATE_HTML_CHAR_REF_IN_DATA_STATE;
+                goto next;
+                break;
+            default:
+                result[resi++] = c;
+                goto next;
+            }
+            break;
+
+        case STATE_TAG:
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\f':
+                case ' ':
+                    state = STATE_TAG_ANNOTATION;
+                    goto next;
+                    break;
+                case '.':
+                    state = STATE_START_TAG_CLASS;
+                    goto next;
+                case '/':
+                    state = STATE_END_TAG;
+                    goto next;
+                case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
+                    assert(0 && "No timestamp");
+                    break;
+                case '>':
+                    //txt++;
+                case '\0':
+                    state = STATE_DATA;
+                    tok.type = TTOK_TAG_START;
+                    tok.ttok_tag_start.tag_name = tok.ttok_tag_start.annotation = NULL;
+                    tok.ttok_tag_start.classes = NULL;
+                    dyna_append(tokens, &tok);
+                    goto next;
+                default:
+                    result[resi++] = c;
+                    state = STATE_START_TAG;
+                    goto next;
+            }
+            break;
+
+        case STATE_START_TAG:
+            switch (c) {
+                case '\t':
+                case '\f':
+                case ' ':
+                    state = STATE_TAG_ANNOTATION;
+                    goto next;
+                case '\n':
+                    assert(0 && "Newline annotation not supported!");
+                    break;
+                case '.':
+                    state = STATE_START_TAG_CLASS;
+                    goto next;
+                case '>':
+                    //txt++;
+                case '\0':
+                    state = STATE_DATA;
+                    tok.type = TTOK_TAG_START;
+                    tok.ttok_tag_start.tag_name = strndup(result, resi);
+                    resi = 0;
+                    tok.ttok_tag_start.annotation = NULL;
+                    tok.ttok_tag_start.classes = NULL;
+                    dyna_append(tokens, &tok);
+                    goto next;
+                default:
+                    result[resi++] = c;
+            }
+            break;
+
+        case STATE_START_TAG_CLASS:
+            switch (c) {
+                case '\t':
+                case '\f':
+                case ' ':
+                    state = STATE_TAG_ANNOTATION;
+                    *(char**)dyna_emplace(classes) = strndup(buffer, bufi);
+                    bufi = 0;
+                    goto next;
+                case '\n':
+                    assert(0 && "Newline annotation not supported!");
+                    break;
+                case '.':
+                    *(char**)dyna_emplace(classes) = strndup(buffer, bufi);
+                    bufi = 0;
+                    goto next;
+                case '>':
+                    //txt++;
+                case '\0':
+                    state = STATE_DATA;
+                    *(char**)dyna_emplace(classes) = strndup(buffer, bufi);
+                    tok.type = TTOK_TAG_START;
+                    tok.ttok_tag_start.tag_name = strndup(result, resi);
+                    tok.ttok_tag_start.classes = classes;
+                    tok.ttok_tag_start.annotation = NULL;
+                    dyna_append(tokens, &tok);
+
+                    classes = dyna_create_size(sizeof(char*), 4);
+                    dyna_set_free_fn(classes, deref_free);
+                    resi = bufi = 0;
+                    goto next;
+                default:
+                    buffer[bufi++] = c;
+            }
+            break;
+
+        case STATE_TAG_ANNOTATION:
+            switch (c) {
+            case '&':
+                assert(0 && "No HTML references yet!");
+                break;
+            case '>':
+            case '\0': {
+                state = STATE_DATA;
+                tok.type = TTOK_TAG_START;
+                tok.ttok_tag_start.tag_name = strndup(result, resi);
+                if (classes->e_idx > 0) {
+                    tok.ttok_tag_start.classes = classes;
+                    classes = dyna_create_size(sizeof(char*), 4);
+                    dyna_set_free_fn(classes, deref_free);
+                } else {
+                    tok.ttok_tag_start.classes = NULL;
+                }
+
+                ctxt_normalize_annotation_str(&bufi, buffer);
+                if (bufi)
+                    tok.ttok_tag_start.annotation = strndup(buffer, bufi);
+                else
+                    tok.ttok_tag_start.annotation = NULL;
+
+                dyna_append(tokens, &tok);
+                resi = 0;
+                bufi = 0;
+            }
+                break;
+            default:
+                buffer[bufi++] = c;
+            }
+            break;
+
+        case STATE_END_TAG:
+            switch (c) {
+            case '>':
+                //txt++;
+            case '\0':
+                state = STATE_DATA;
+                tok.type = TTOK_TAG_END;
+                tok.ttok_tag_end.tag_name = strndup(result, resi);
+                dyna_append(tokens, &tok);
+                resi = 0;
+                break;
+            default:
+                result[resi++] = c;
+                goto next;
+            }
+            break;
+
+        case STATE_TIMESTAMP:
+            assert(0 && "No timestamp state");
+            break;
+        case STATE_HTML_CHAR_REF_IN_DATA_STATE:
+            txt += read_html_character_references(txt, result, &resi, sizeof(result));
+            state = STATE_DATA;
+            goto next;
+        }
+
+next:
+        if (c == '\0')
+            break;
+        txt++;
+    }
+
+end:
+    dyna_destroy(classes);
+    return tokens;
+}
+
+void ctxt_token_print(const struct ctxt_token *tok, int len, char out[len])
+{
+    int i = 0;
+    i += snprintf(out, len - i, "type: %s", ctxt_token_str_map[tok->type]);
+    switch (tok->type) {
+        case TTOK_STRING:
+            i += snprintf(out + i, len - i, "\n .value = %s", tok->ttok_string.value);
+            break;
+        case TTOK_TAG_START: {
+            i += snprintf(out + i, len - i, "\n .tag_name = %s\n .classes = ", tok->ttok_tag_start.tag_name);
+            struct dyna *cl = tok->ttok_tag_start.classes;
+            for (int c = 0; cl && c < cl->e_idx; c++) {
+                i += snprintf(out + i, len - i, "%s ", *(char**)dyna_elem(cl, c));
+            }
+            break;
+        }
+        case TTOK_TAG_END:
+            i += snprintf(out + i, len - i, "\n .tag_name = %s", tok->ttok_tag_end.tag_name);
+            break;
+    }
+}
+
+static void ctxt_free_node_inner(void *data)
+{
+    struct vtt_node *node = data;
+    if (node->type == VNODE_TEXT) {
+        if (node->text)
+            free(node->text);
+    } else { 
+        if (node->childs) {
+            dyna_destroy(node->childs);
+        }
+        if (node->class_names) {
+            dyna_destroy(node->class_names);
+        }
+        if (node->annotation) {
+            free(node->annotation);
+        }
+    }
+}
+
+void ctxt_free_node(struct vtt_node *node)
+{
+    ctxt_free_node_inner(node);
+    free(node);
+}
+
+/* https://www.w3.org/TR/webvtt1/#cue-text-parsing-rules */
+static struct vtt_node *ctxt_parse_nodes(struct dyna *tokens)
+{
+    struct vtt_node node;
+    struct vtt_node *root = calloc(1, sizeof(*root));
+    struct vtt_node *current = root;
+    current->parent = root;
+
+    for (int i = 0; i < tokens->e_idx; i++) {
+        struct ctxt_token *tok = dyna_elem(tokens, i);
+
+        char *tn;
+        switch (tok->type) {
+        case TTOK_STRING:
+            /* Move instead of copy */
+            memset(&node, 0, sizeof(node));
+            node.type = VNODE_TEXT;
+            node.text = tok->ttok_string.value;
+            tok->ttok_string.value = NULL;
+
+            node.parent = current;
+            if (current->childs == NULL) {
+                current->childs = dyna_create_size_flags(sizeof(struct vtt_node), 3, DYNAFLAG_HEAPCOPY);
+                dyna_set_free_fn(current->childs, ctxt_free_node_inner);
+            }
+            dyna_append(current->childs, &node);
+            break;
+        case TTOK_TAG_START:
+            tn = tok->ttok_tag_start.tag_name;
+            memset(&node, 0, sizeof(node));
+            if (strcmp(tn, "c") == 0) {
+                node.type = VNODE_CLASS;
+            } else if (strcmp(tn, "i") == 0) {
+                node.type = VNODE_ITALIC;
+            } else if (strcmp(tn, "b") == 0) {
+                node.type = VNODE_BOLD;
+            } else if (strcmp(tn, "u") == 0) {
+                node.type = VNODE_UNDERLINE;
+            } else if (strcmp(tn, "ruby") == 0) {
+                node.type = VNODE_RUBY;
+            } else if (strcmp(tn, "rt") == 0) {
+                node.type = VNODE_RUBY_TEXT;
+            } else if (strcmp(tn, "v") == 0) {
+                node.type = VNODE_VOICE;
+                node.annotation = tok->ttok_tag_start.annotation;
+                tok->ttok_tag_start.annotation = NULL;
+            } else if (strcmp(tn, "lang") == 0) {
+                assert(0 && "Lang tags are not supported");
+                break;
+            } else {
+                continue;
+            }
+            node.class_names = tok->ttok_tag_start.classes;
+            tok->ttok_tag_start.classes = NULL;
+            node.childs = NULL;
+            node.parent = current;
+
+            if (current->childs == NULL) {
+                current->childs = dyna_create_size_flags(sizeof(struct vtt_node), 3, DYNAFLAG_HEAPCOPY);
+                dyna_set_free_fn(current->childs, ctxt_free_node_inner);
+            }
+            current = dyna_append(current->childs, &node);
+            break;
+        case TTOK_TAG_END:
+            tn = tok->ttok_tag_end.tag_name;
+            if (    (strcmp(tn, "c") == 0 && current->type == VNODE_CLASS) ||
+                    (strcmp(tn, "i") == 0 && current->type == VNODE_ITALIC) ||
+                    (strcmp(tn, "b") == 0 && current->type == VNODE_BOLD) ||
+                    (strcmp(tn, "u") == 0 && current->type == VNODE_UNDERLINE) ||
+                    (strcmp(tn, "ruby") == 0 && current->type == VNODE_RUBY) ||
+                    (strcmp(tn, "rt") == 0 && current->type == VNODE_RUBY_TEXT) ||
+                    (strcmp(tn, "v") == 0 && current->type == VNODE_VOICE)) {
+                current = current->parent;
+            } else if ((strcmp(tn, "lang") == 0 && current->type == VNODE_LANGUAGE)) {
+                assert(0 && "Language tag not supported");
+                return NULL;
+            } else if ((strcmp(tn, "ruby") == 0 && current->type == VNODE_RUBY_TEXT)) {
+                current = current->parent->parent;
+            }
+            break;
+        case TTOK_TIMESTAMP:
+            assert(0 && "Timestamp not supported");
+            return NULL;
+        }
+    }
+    
+    root->parent = NULL;
+    return root;
+}
+
+
+struct vtt_node *ctxt_parse(const char *txt)
+{
+    struct dyna *tokens = ctxt_tokenize(txt);
+
+    #if 0
+    char buf[1024];
+    for (int i = 0; i < tokens->e_idx; i++) {
+        ctxt_token_print(dyna_elem(tokens, i), sizeof(buf), buf);
+        printf("%s\n\n", buf);
+    }
+#endif
+
+    //struct dyna *nodes = ctxt_parse_nodes(tokens);
+    struct vtt_node *root_node = ctxt_parse_nodes(tokens);
+    dyna_destroy(tokens);
+
+    //ctxt_print_node(root_node);
+
+    //ctxt_free_node(root_node);
+    return root_node;
+}
+
+static int ctxt_text_inner(const struct vtt_node *node, int size, char out[size], int idx)
+{
+    if (node->type == VNODE_TIMESTAMP)
+        return 0;
+
+    if (node->type == VNODE_TEXT) {
+        if (node->parent && node->parent->type == VNODE_RUBY_TEXT)
+            return 0; /* Skip including ruby text */
+        int n = snprintf(&out[idx], size - idx, "%s", node->text);
+        assert(n < size - idx);
+        return n;
+    }
+    
+    int n = 0;
+    for (int i = 0; node->childs && i < node->childs->e_idx; i++) {
+        n += ctxt_text_inner(dyna_elem(node->childs, i), size, out, idx + n);
+    }
+
+    return n;
+}
+
+int ctxt_text(const struct vtt_node *root, int size, char out[size])
+{
+    return ctxt_text_inner(root, size, out, 0);
+}
--- a/src/cuetext.h
+++ b/src/cuetext.h
@ -0,0 +1,83 @@
+#ifndef _VTT2ASS_CUETEXT_H
+#define _VTT2ASS_CUETEXT_H
+#include <stdint.h>
+#include "dyna.h"
+
+#define TEXT_TOKEN_DEF(ex_compl) \
+    ex_compl(TTOK_STRING, ttok_string, { char *value; /* free */ }) \
+    ex_compl(TTOK_TAG_START, ttok_tag_start, { char *tag_name; struct dyna *classes; char *annotation; /* free */ }) \
+    ex_compl(TTOK_TAG_END, ttok_tag_end, { char *tag_name; /* free */ }) \
+    ex_compl(TTOK_TIMESTAMP, ttok_timestamp, { float value; }) \
+
+#define ex_compl(n, ...) n,
+enum ctxt_token_type {
+    TEXT_TOKEN_DEF(ex_compl)
+};
+#undef ex_compl
+
+#define ex_compl(tok_name, struct_name, ...) \
+    struct struct_name __VA_ARGS__ struct_name;
+struct ctxt_token {
+    enum ctxt_token_type type;
+    union {
+        TEXT_TOKEN_DEF(ex_compl)
+    };
+};
+#undef ex_compl
+
+#define VTT_NODE_TYPE_DEF(ex) \
+    ex(VNODE_ROOT) \
+    ex(VNODE_CLASS) \
+    ex(VNODE_ITALIC) \
+    ex(VNODE_BOLD) \
+    ex(VNODE_UNDERLINE) \
+    ex(VNODE_RUBY) \
+    ex(VNODE_RUBY_TEXT) \
+    ex(VNODE_VOICE) \
+    ex(VNODE_LANGUAGE) \
+\
+    /* Leaves */ \
+    ex(VNODE_TEXT) \
+    ex(VNODE_TIMESTAMP) \
+
+#define ex(n) n,
+enum vtt_node_type {
+    VTT_NODE_TYPE_DEF(ex)
+};
+#undef ex
+
+/* WebVTT Internal Node Object */
+struct vtt_node {
+    enum vtt_node_type type;
+
+    /* Applicable class names, or NULL */
+    struct dyna *class_names;
+
+    /* annotation string, only for VNODE_VOICE, can be NULL */
+    char *annotation;
+
+    /* language tag not supported */
+
+    struct vtt_node *parent;
+    union {
+        /* The ordered list of child WebVTT Node Objects */
+        struct dyna *childs; /* struct vtt_node */
+
+        /* Only in the case of VNODE_TEXT */
+        char *text;
+
+        /* Only in the case of VNODE_TIMESTAMP (unsupported) */
+        int64_t timestamp;
+    };
+
+};
+
+struct vtt_node *ctxt_parse(const char *txt);
+struct dyna *ctxt_tokenize(const char *txt);
+void ctxt_free_node(struct vtt_node *node);
+
+int ctxt_text(const struct vtt_node *root, int size, char out[size]);
+void ctxt_token_print(const struct ctxt_token *tok, int len, char out[len]);
+void ctxt_print_node(const struct vtt_node *root, int n, char out[n]);
+
+#endif /* _VTT2ASS_CUETEXT_H */
--- a/src/dyna.c
+++ b/src/dyna.c
@ -0,0 +1,120 @@
+#include "dyna.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+static void *dyna_elem_(const struct dyna *d, int64_t idx)
+{
+    if (d->flags & DYNAFLAG_HEAPCOPY) {
+        return ((uint8_t*)d->data) + (sizeof(void*) * idx);
+    }
+    return ((uint8_t*)d->data) + (d->e_size * idx);
+}
+
+struct dyna *dyna_create(size_t e_size)
+{
+    return dyna_create_size_flags(e_size, 64, DYNAFLAG_NONE);
+}
+
+struct dyna *dyna_create_size(size_t e_size, size_t init_size)
+{
+    return dyna_create_size_flags(e_size, init_size, DYNAFLAG_NONE);
+}
+
+struct dyna *dyna_create_size_flags(size_t e_size, size_t init_size, enum dyna_flags flags)
+{
+    size_t delem_size = e_size;
+    struct dyna *d = calloc(1, sizeof(*d));
+    if (d == NULL)
+        return NULL;
+
+    d->e_cap = init_size;
+    d->e_size = e_size;
+    d->flags = flags;
+    if (flags & DYNAFLAG_HEAPCOPY)
+        delem_size = sizeof(void*);
+
+    d->data = reallocarray(NULL, d->e_cap, delem_size);
+    if (d->data == NULL) {
+        free(d);
+        return NULL;
+    }
+
+    return d;
+}
+
+
+void dyna_destroy(struct dyna *dyna)
+{
+    if (dyna->e_free_fn) {
+        for (int i = 0; i < dyna->e_idx; i++) {
+            void *elem = dyna_elem(dyna, i);
+            dyna->e_free_fn(elem);
+            if (dyna->flags & DYNAFLAG_HEAPCOPY)
+                free(elem);
+        }
+    }
+
+    if (dyna->data)
+        free(dyna->data);
+    if (dyna)
+        free(dyna);
+}
+
+void dyna_set_free_fn(struct dyna *d, dyna_free_fn fn)
+{
+    d->e_free_fn = fn;
+}
+
+static int dyna_grow(struct dyna *d)
+{
+    int new_cap = d->e_cap * 2;
+    size_t delem_size = (d->flags & DYNAFLAG_HEAPCOPY) ? sizeof(void*) : d->e_size;
+    void *new_data = reallocarray(d->data, new_cap, delem_size);
+    if (new_data == NULL)
+        return -1;
+
+    d->e_cap = new_cap;
+    d->data = new_data;
+    return 0;
+}
+
+void *dyna_append(struct dyna *d, void *elem)
+{
+    void *dst = dyna_emplace(d);
+    memcpy(dst, elem, d->e_size);
+    return dst;
+}
+
+void *dyna_emplace(struct dyna *d)
+{
+    if (d->e_idx >= d->e_cap) {
+        int e = dyna_grow(d);
+        assert(e == 0);
+    }
+    void *ptr = dyna_elem_(d, d->e_idx);
+
+    if (d->flags & DYNAFLAG_HEAPCOPY) {
+        void *hptr = malloc(d->e_size);
+        assert(hptr);
+
+        *(void**)ptr = hptr;
+        ptr = hptr;
+    }
+    d->e_idx++;
+    return ptr;
+}
+
+void *dyna_elem(const struct dyna *d, int64_t idx)
+{
+    if (idx >= d->e_idx)
+        return NULL;
+    void *elemptr = dyna_elem_(d, idx);
+
+    if (d->flags & DYNAFLAG_HEAPCOPY) {
+        return *(void**)elemptr;
+    }
+    return elemptr;
+}
+
--- a/src/dyna.h
+++ b/src/dyna.h
@ -0,0 +1,36 @@
+#ifndef _VTT2ASS_DYNA_H
+#define _VTT2ASS_DYNA_H
+#include <stdint.h>
+#include <stddef.h>
+
+#define DYNA_ELEM(type, dyna, i) ((type)dyna_elem(dyna, i))
+
+enum dyna_flags {
+    DYNAFLAG_NONE       = 0u,       /* Works normally, aka copy e_size bytes to data */
+    DYNAFLAG_HEAPCOPY   = 1u << 0,  /* Heap alloc e_size bytes, and copy the pointer to data */
+};
+
+typedef void(*dyna_free_fn)(void *elem);
+
+struct dyna {
+    size_t e_size; /* size of 1 element */
+    int64_t e_cap; /* The maximum amount of elements that can be stored */
+    int64_t e_idx; /* The current index of the next location to place elements in */
+    dyna_free_fn e_free_fn; /* This function will be called for each element on dyna_destroy */
+    enum dyna_flags flags; /* Had to add this because I messed up somewhere lol */
+
+    void* data; /* The memory location where the element array is stored */
+};
+
+struct dyna *dyna_create(size_t e_size);
+struct dyna *dyna_create_size(size_t e_size, size_t init_size);
+struct dyna *dyna_create_size_flags(size_t e_size, size_t init_size, enum dyna_flags flags);
+void dyna_destroy(struct dyna *dyna);
+void dyna_set_free_fn(struct dyna *dyna, dyna_free_fn fn);
+
+void *dyna_append(struct dyna *dyna, void *elem);
+/* Just returns the pointer to the next element, and increments e_idx, asserts on error */
+void *dyna_emplace(struct dyna *dyna);
+void *dyna_elem(const struct dyna *dyna, int64_t idx);
+
+#endif /* _VTT2ASS_DYNA_H */
--- a/src/font.c
+++ b/src/font.c
@ -0,0 +1,77 @@
+#include "font.h"
+
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "util.h"
+
+static bool font_did_init = false;
+FT_Library ftlib = NULL;
+
+struct font_cache {
+    char *fontpath;
+    FT_Face face;
+};
+static struct font_cache caches[32] = {0};
+int caches_count = 0;
+
+void font_init()
+{
+    if (font_did_init)
+        font_dinit();
+
+    int err = FT_Init_FreeType(&ftlib);
+    assert(err == FT_Err_Ok);
+    font_did_init = true;
+}
+
+void font_dinit()
+{
+    if (!font_did_init) {
+        assert(false && "Calling font_dinit() without font_ini()");
+        return;
+    }
+
+    for (int i = 0; i < caches_count; i++) {
+        FT_Done_Face(caches[i].face);
+        free(caches[i].fontpath);
+    }
+
+    caches_count = 0;
+    font_did_init = false;
+}
+
+FT_Face font_get_face(const char *fontpath)
+{
+    int err;
+    struct font_cache *cfc;
+
+    for (int i = 0; i < caches_count; i++) {
+        if (strcmp(caches[i].fontpath, fontpath) == 0) {
+            return caches[i].face;
+        }
+    }
+
+    assert(caches_count < ARRSIZE(caches));
+    cfc = &caches[caches_count];
+    err = FT_New_Face(ftlib, fontpath, 0, &cfc->face);
+    if (err != FT_Err_Ok) {
+        return NULL;
+    }
+    cfc->fontpath = strdup(fontpath);
+    
+    caches_count++;
+    return cfc->face;
+}
+
+const char *font_get_name(FT_Face face)
+{
+    return FT_Get_Postscript_Name(face);
+}
+
+
+FT_Library font_get_lib()
+{
+    return ftlib;
+}
--- a/src/font.h
+++ b/src/font.h
@ -0,0 +1,18 @@
+#ifndef _VTT2ASS_FONT_H
+#define _VTT2ASS_FONT_H
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include FT_GLYPH_H
+
+void font_init();
+void font_dinit();
+
+/* Returned face will be free'd with font_dinit() */
+FT_Face font_get_face(const char *fontpath);
+
+const char *font_get_name(FT_Face face);
+
+/* debug */
+FT_Library font_get_lib();
+
+#endif /* _VTT2ASS_FONT_H */
--- a/src/main.c
+++ b/src/main.c
@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <sys/param.h>
+
+#include "reader.h"
+#include "tokenizer.h"
+#include "parser.h"
+
+#include "cuetext.h"
+#include "srt.h"
+#include "ass.h"
+#include "util.h"
+#include "font.h"
+#include "opts.h"
+
+
+#include <locale.h>
+
+
+#include <unistd.h>
+int main(int argc, const char **argv)
+{
+    setlocale(LC_ALL, "en_US.utf8");
+
+    int en;
+    en = opts_parse(argc, argv);
+    if (en == -1)
+        return 1;
+
+    util_init();
+    font_init();
+
+    // TODO: cont. with vertical rendering fixes and vertical ruby
+
+    en = rdr_init(opts_infile);
+    if (en != 0)
+        return 1;
+
+    struct dyna *tokens = tok_tokenize();
+    if (tokens == NULL) {
+        printf("Failed to tokenize\n");
+        return 2;
+    }
+
+    struct dyna *cues = NULL, *styles = NULL;
+    en = prs_parse_tokens(tokens, &cues, &styles);
+    if (en != 0)
+        goto end;
+
+    /* For debug */
+#if 0
+    printf("Token array size: %ld\n", tokens->e_idx);
+    char tokstr[1024];
+    for (int i = 0; i < MIN(1000000, tokens->e_idx); i++) {
+        tok_2str((struct token*)dyna_elem(tokens, i), 512, tokstr);
+        printf("Token %d: %s\n", i, tokstr);
+    }
+
+    for (int i = 0; i < cues->e_idx; i++) {
+        struct cue *c = dyna_elem(cues, i);
+        prs_cue2str(sizeof(tokstr), tokstr, c);
+        printf("%s\n\n", tokstr);
+
+        //struct dyna *ctxt_tokens = ctxt_parse(c->text);
+    }
+
+    for (int i = 0; styles && i < styles->e_idx; i++) {
+        struct cue_style *cs = dyna_elem(styles, i);
+        cuestyle_print(sizeof(tokstr), tokstr, cs);
+        printf("%s\n", tokstr);
+    }
+#endif
+
+    if (opts_ass) {
+        struct video_info vinf = {
+            .width = opts_ass_vid_w,
+            .height = opts_ass_vid_h,
+        };
+        //ass_write(cues, styles, &vinf, "ipaexg.ttf", opts_ass_outfile);
+        ass_write(cues, styles, &vinf, opts_ass_fontfile, opts_ass_outfile);
+    }
+    if (opts_srt) {
+        srt_write(cues, styles, opts_srt_outfile);
+    }
+    printf("Conversion done\n");
+
+end:
+    if (cues)
+        dyna_destroy(cues);
+    if (styles)
+        dyna_destroy(styles);
+    dyna_destroy(tokens);
+    rdr_free();
+    font_dinit();
+    return 0;
+}
--- a/src/opts.c
+++ b/src/opts.c
@ -0,0 +1,151 @@
+#include "opts.h"
+
+#include <argparse.h>
+#include <stdio.h>
+#include <string.h>
+
+static const char *const usage[] = {
+    "v2a ass [-h] srt [-h] input_file",
+    NULL,
+};
+static const char *const ass_usage[] = {
+    "v2a ass [ass options] input_file...",
+    NULL,
+};
+static const char *const srt_usage[] = {
+    "v2a srt [srt options] input_file...",
+    NULL,
+};
+
+bool opts_srt = false;
+bool opts_ass = false;
+const char *opts_ass_outfile = NULL;
+const char *opts_srt_outfile = NULL;
+const char *opts_infile = NULL;
+int opts_ass_vid_w = 0, opts_ass_vid_h = 0;
+const char *opts_ass_fontfile = NULL;
+bool opts_ass_debug_boxes = false;
+int opts_ass_border_size = -1;
+
+static int cmd_ass(int *argc, const char **argv)
+{
+    char *outpath = NULL, *fontfile = NULL;
+    int width = 0, height = 0, border = -1;
+    bool debug = false;
+
+    struct argparse argp;
+    struct argparse_option opts[] = {
+        OPT_HELP(),
+        OPT_STRING('o', "output", &outpath, "output file", NULL, 0, 0),
+        OPT_INTEGER('W', "width", &width, "Width of the video file", NULL, 0, 0),
+        OPT_INTEGER('H', "height", &height, "Height of the video file", NULL, 0, 0),
+        OPT_STRING('f', "font", &fontfile, "The fontfile to use. This font should be embedded in the .mkv", NULL, 0, 0),
+        OPT_INTEGER('B', "border", &border, "Set the border size to use", NULL, 0, 0),
+        OPT_BOOLEAN('D', "debug", &debug, "If set, debug boxes will be included in the output", NULL, 0, 0),
+        OPT_END(),
+    };
+    argparse_init(&argp, opts, ass_usage, ARGPARSE_STOP_AT_NON_OPTION);
+    *argc = argparse_parse(&argp, *argc, argv);
+
+    if (outpath == NULL) {
+        printf("Output file option is required\n");
+        argparse_usage(&argp);
+        return -1;
+    }
+    if (width == 0 || height == 0) {
+        printf("Video width and height is required\n");
+        argparse_usage(&argp);
+        return -1;
+    }
+    if (fontfile == NULL) {
+        printf("The font file option is required\n");
+        argparse_usage(&argp);
+        return -1;
+    }
+
+    opts_ass = true;
+    opts_ass_outfile = outpath;
+    opts_ass_vid_w = width;
+    opts_ass_vid_h = height;
+    opts_ass_fontfile = fontfile;
+    opts_ass_debug_boxes = debug;
+    opts_ass_border_size = border;
+    return 0;
+}
+
+static int cmd_srt(int *argc, const char **argv)
+{
+    char *outpath = NULL;
+
+    struct argparse argp;
+    struct argparse_option opts[] = {
+        OPT_HELP(),
+        OPT_STRING('o', "output", &outpath, "output file", NULL, 0, 0),
+        OPT_END(),
+    };
+    argparse_init(&argp, opts, srt_usage, ARGPARSE_STOP_AT_NON_OPTION);
+    *argc = argparse_parse(&argp, *argc, argv);
+
+    if (outpath == NULL) {
+        printf("Output file option is required\n");
+        argparse_usage(&argp);
+        return -1;
+    }
+    opts_srt = true;
+    opts_srt_outfile = outpath;
+    return 0;
+}
+
+int opts_parse(int argc, const char **argv)
+{
+    struct argparse argp;
+    struct argparse_option opts[] = {
+        OPT_HELP(),
+        OPT_END(),
+    };
+    int r = argparse_init(&argp, opts, usage, ARGPARSE_STOP_AT_NON_OPTION);
+
+    argc = argparse_parse(&argp, argc, argv);
+    if (argc < 1) {
+        argparse_usage(&argp);
+        return -1;
+    }
+
+    const char *filepath = NULL;
+    const char *subcname = argv[0];
+
+    while (true) {
+        if (strcmp(subcname, "ass") == 0) {
+            r = cmd_ass(&argc, argv);
+        } else if (strcmp(subcname, "srt") == 0) {
+            r = cmd_srt(&argc, argv);
+        } else {
+            if (argc < 1)
+                break;
+            if (argc > 1) {
+                subcname = argv[0];
+                continue;
+            }
+            filepath = argv[0];
+            break;
+        }
+        if (r != 0)
+            return r;
+        if (argc < 1)
+            break;
+        subcname = argv[0];
+    }
+
+    if (filepath == NULL) {
+        printf("Input file is not given\n");
+        return -1;
+    }
+
+    if (!opts_ass && !opts_srt) {
+        printf("At least ass or srt conversaton needs to be specified\n");
+        return -1;
+    }
+
+    opts_infile = filepath;
+    return 0;
+}
--- a/src/opts.h
+++ b/src/opts.h
@ -0,0 +1,22 @@
+#ifndef _VTT2ASS_OPTS_H
+#define _VTT2ASS_OPTS_H
+#include <stdbool.h>
+
+/* global options */
+extern bool opts_srt;
+extern bool opts_ass;
+extern const char *opts_infile;
+
+/* ass options */
+extern const char *opts_ass_outfile;
+extern int opts_ass_vid_w, opts_ass_vid_h;
+extern int opts_ass_border_size;
+extern const char *opts_ass_fontfile;
+extern bool opts_ass_debug_boxes;
+
+/* srt options */
+extern const char *opts_srt_outfile;
+
+int opts_parse(int argc, const char **argv);
+
+#endif /* _VTT2ASS_OPTS_H */
--- a/src/parser.c
+++ b/src/parser.c
@ -0,0 +1,373 @@
+#include "parser.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+#include "tokenizer.h"
+#include "cuestyle.h"
+
+#define PARSER_LEAN 1
+
+const char *str_cue_writing_direction[] = {
+    [WD_HORIZONTAL] = "horizontal",
+    [WD_VERTICAL_GROW_LEFT] = "vertical grow left",
+    [WD_VERTICAL_GROW_RIGHT] = "vertical grow right",
+};
+const char *str_cue_line_align[] = {
+    [LINE_ALIGN_START] = "start",
+    [LINE_ALIGN_CENTER] = "center",
+    [LINE_ALIGN_END] = "end",
+};
+const char *str_cue_pos_align[] = {
+    [POS_ALIGN_AUTO] = "auto",
+    [POS_ALIGN_LINE_LEFT] = "line-left",
+    [POS_ALIGN_CENTER] = "center",
+    [POS_ALIGN_LINE_RIGHT] = "line-right",
+};
+const char *str_cue_text_align[] = {
+    [TEXT_ALIGN_CENTER] = "center",
+    [TEXT_ALIGN_START] = "start",
+    [TEXT_ALIGN_END] = "end",
+    [TEXT_ALIGN_LEFT] = "left",
+    [TEXT_ALIGN_RIGHT] = "right",
+};
+
+static enum cue_line_align prs_cue_line_align(const char *str)
+{
+    if (strcmp(str, "start") == 0)
+        return LINE_ALIGN_START;
+    if (strcmp(str, "center") == 0)
+        return LINE_ALIGN_CENTER;
+    if (strcmp(str, "end") == 0)
+        return LINE_ALIGN_END;
+    return -1;
+}
+
+
+static void prs_cue_free(void *data)
+{
+    struct cue *cue = data;
+    if (cue->ident) {
+        free(cue->ident);
+    }
+    if (cue->text_node)
+        ctxt_free_node(cue->text_node);
+}
+
+static void prs_default_cue(struct cue *cue)
+{
+    memset(cue, 0, sizeof(*cue));
+    cue->line = cue->position = CUE_AUTO;
+    cue->size = 1;
+    cue->snap_to_lines = true;
+    //cue->line_align = LINE_ALIGN_END; // mine
+    cue->line_align = LINE_ALIGN_START;
+}
+#define ADVANCE() { \
+    i++; \
+    if (i >= tokens->e_idx) goto err; \
+    tok = dyna_elem(tokens, i); \
+}
+#define EXP(exp_token_type) if (tok->type != exp_token_type) { \
+    fprintf(stderr, "Unexpected token: %s  expected %s\n", tok_type2str(tok->type), tok_type2str(exp_token_type)); \
+    goto err; \
+}
+
+/* Returns NAN on error */
+static float prs_percentage(const char *str)
+{
+    char *end;
+    errno = 0;
+    float f = strtof(str, &end);
+    if (errno != 0 || end == str)
+        return NAN;
+    if (*end != '%')
+        return NAN;
+    if (*(end + 1) != '\0')
+        return NAN;
+    return f;
+}
+
+static int prs_cue_settings_line(struct token *tok, struct cue *cue)
+{
+    char *val = tok->cue_setting.value;
+    char *sep = strchr(val, ',');
+
+    if (sep) {
+        *sep = '\0';
+        enum cue_line_align line_align = prs_cue_line_align(sep + 1);
+        if (line_align == -1)
+            return -1;
+        cue->line_align = line_align;
+    }
+
+    float lineval;
+    if (val[strlen(val) - 1] == '%') {
+        cue->snap_to_lines = false;
+        lineval = prs_percentage(val);
+        if (isnan(lineval)) {
+            char *fend;
+            errno = 0;
+            lineval = strtof(val, &fend);
+            if (errno != 0 || *fend != '\0')
+                return -1;
+        } else {
+            cue->line = lineval / 100.0f;
+        }
+    }
+
+    if (sep)
+        *sep = ',';
+    return 0;
+}
+
+/* Return -1 on error */
+static enum cue_pos_align prs_cue_pos_align(const char *str)
+{
+    /* This is kind of weird, the spec says that this cannot be "middle"
+     * but the example vtt i have have this value as middle... */
+    if (strcmp(str, "line-left") == 0)
+        return POS_ALIGN_LINE_LEFT;
+    if (strcmp(str, "center") == 0
+#if PARSER_LEAN == 1
+            || strcmp(str, "middle") == 0
+#endif
+       )
+        return POS_ALIGN_CENTER;
+    if (strcmp(str, "line-right") == 0)
+        return POS_ALIGN_LINE_RIGHT;
+    return -1;
+}
+
+static int prs_cue_settings_position(struct token *tok, struct cue *cue)
+{
+    char *colpos = tok->cue_setting.value;
+    char *sep = strchr(colpos, ',');
+    enum cue_pos_align align = POS_ALIGN_AUTO;
+    if (sep) {
+        *sep = '\0';
+        char *colalign = sep + 1;
+        align = prs_cue_pos_align(colalign);
+        if (align == -1)
+            return -1;
+            //align = POS_ALIGN_AUTO; /* uuuuh not in spec */
+    }
+
+    float pos = prs_percentage(colpos);
+    if (pos == NAN)
+        return -1;
+
+    if (sep)
+        *sep = ',';
+    cue->position = pos / 100.0f;
+    cue->pos_align = align;
+    return 0;
+}
+
+static int prs_cue_settings_size(struct token *tok, struct cue *cue)
+{
+    char *val = tok->cue_setting.value;
+    float size = prs_percentage(val);
+    if (size == NAN)
+        return -1;
+    /* TODO: invalidate region here, when we decide to support that */
+    cue->size = size / 100.0f;
+    return 0;
+}
+
+static int prs_cue_settings_align(struct token *tok, struct cue *cue)
+{
+    char *val = tok->cue_setting.value;
+
+    if (strcmp(val, "start") == 0) {
+        cue->text_align = TEXT_ALIGN_START;
+    } else if (strcmp(val, "center") == 0
+#if PARSER_LEAN == 1
+            || strcmp(val, "middle") == 0
+#endif
+            ) {
+        cue->text_align = TEXT_ALIGN_CENTER;
+    } else if (strcmp(val, "end") == 0) {
+        cue->text_align = TEXT_ALIGN_END;
+    } else if (strcmp(val, "left") == 0) {
+        cue->text_align = TEXT_ALIGN_LEFT;
+    } else if (strcmp(val, "right") == 0) {
+        cue->text_align = TEXT_ALIGN_RIGHT;
+    } else {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int prs_cue_settings_vertical(struct token *tok, struct cue *cue)
+{
+    char *val = tok->cue_setting.value;
+
+    if (strcmp(val, "rl") == 0) {
+        cue->writing_direction = WD_VERTICAL_GROW_LEFT;
+    } else if (strcmp(val, "lr") == 0) {
+        cue->writing_direction = WD_VERTICAL_GROW_RIGHT;
+    } else {
+        return -1;
+    }
+
+    /* TODO: set region here to NULL */
+    return 0;
+}
+
+/* Returns an updated i */
+static int prs_cue_settings(struct dyna *tokens, int i, struct cue *cue)
+{
+    int en;
+    //struct token *tok = dyna_elem(tokens, i);
+    struct token *tok;
+    for (; i < tokens->e_idx && (tok = dyna_elem(tokens, i))->type == TOK_CUE_SETTING; i++) {
+        char *skey = tok->cue_setting.key;
+
+        if (strcmp(skey, "vertical") == 0) {
+            en = prs_cue_settings_vertical(tok, cue);
+        } else if (strcmp(skey, "line") == 0) {
+            en = prs_cue_settings_line(tok, cue);
+        } else if (strcmp(skey, "position") == 0) {
+            en = prs_cue_settings_position(tok, cue);
+        } else if (strcmp(skey, "size") == 0) {
+            en = prs_cue_settings_size(tok, cue);
+        } else if (strcmp(skey, "align") == 0) {
+            en = prs_cue_settings_align(tok, cue);
+        } else {
+            printf("cue setting key '%s' not handled!\n", skey);
+            return -1;
+        }
+        if (en != 0) {
+            /* We should skip invalid settings */
+            fprintf(stderr, "Failed to parse setting with key '%s', skipping\n", skey);
+            continue;
+        }
+    }
+    return i;
+}
+
+static int prs_parse_cue_text(struct dyna *tokens, int i, struct cue *cue)
+{
+    struct token *tok;
+    int oi = i;
+    size_t len = 0;
+    for (; i < tokens->e_idx && (tok = dyna_elem(tokens, i))->type == TOK_CUE_TEXT; i++) {
+        len += strlen(tok->cue_text.str);
+        len += 1; /* for \n and \0 */
+    }
+    char full_txt[len];
+    char *ptr = full_txt;
+    *ptr = '\0';
+    i = oi;
+    for (; i < tokens->e_idx && (tok = dyna_elem(tokens, i))->type == TOK_CUE_TEXT; i++) {
+        /* no care about performance righ now */
+        ptr = stpcpy(ptr, tok->cue_text.str);
+        if (ptr - full_txt < len - 1) {
+            *ptr = '\n';
+            ptr++;
+            *ptr = '\0';
+        }
+    }
+
+    cue->text_node = ctxt_parse(full_txt);
+    return i;
+}
+
+int prs_parse_tokens(struct dyna *tokens, struct dyna **out_cues, struct dyna **out_styles)
+{
+    struct dyna *cues = dyna_create(sizeof(struct cue));
+    dyna_set_free_fn(cues, prs_cue_free);
+
+    bool in_cue = false;
+    struct cue cc;
+    for (int i = 0; i < tokens->e_idx;) {
+        struct token *tok = dyna_elem(tokens, i);
+        if (tok->type != TOK_TIMESTAMP && tok->type != TOK_IDENT) {
+            i++;
+            continue;
+        }
+        if (in_cue) {
+            dyna_append(cues, &cc);
+            in_cue = false;
+        }
+        in_cue = true;
+        prs_default_cue(&cc);
+
+        if (tok->type == TOK_IDENT) {
+            //cc.ident = strdup(tok->ident.str);
+            /* Move the string into the cue */
+            cc.ident = tok->ident.str;
+            tok->ident.str = NULL;
+            ADVANCE(); EXP(TOK_TIMESTAMP);
+        }
+        
+        cc.time_start = tok->timestamp.ms;
+        ADVANCE(); EXP(TOK_ARROW);
+        ADVANCE(); EXP(TOK_TIMESTAMP);
+        cc.time_end = tok->timestamp.ms;
+
+        ADVANCE();
+        if (tok->type == TOK_CUE_SETTING) {
+            int curr_i = prs_cue_settings(tokens, i, &cc);
+            if (curr_i == -1)
+                goto err;
+            i = curr_i;
+            tok = dyna_elem(tokens, i);
+        }
+
+        EXP(TOK_CUE_TEXT);
+        int consumed = prs_parse_cue_text(tokens, i, &cc);
+        if (consumed == -1)
+            goto err;
+        i = consumed;
+    }
+    if (in_cue)
+        dyna_append(cues, &cc);
+
+
+    *out_styles = cuestyle_parse(tokens);
+    *out_cues = cues;
+    return 0;
+err:
+    fprintf(stderr, "Exiting from parse_tokens\n");
+    dyna_destroy(cues);
+    return -1;
+
+}
+
+#undef EXP
+#undef ADVANCE
+
+void prs_cue2str(int size, char out_str[size], const struct cue *cue)
+{
+    char linestr[20], posstr[20];
+    char nodestr[1024] = { [0] = '\0' };
+    if (IS_AUTO(cue->line))
+        sprintf(linestr, "%s", "auto");
+    else
+        sprintf(linestr, "%.4f", cue->line * 100);
+    if (IS_AUTO(cue->position))
+        sprintf(posstr, "%s", "auto");
+    else
+        sprintf(posstr, "%.4f", cue->position * 100);
+
+    if (cue->text_node)
+        ctxt_print_node(cue->text_node, sizeof(nodestr), nodestr);
+
+    snprintf(out_str, size, "ident: %s\n"
+            "%ld --> %ld\n"
+            "line: %s line_align: %s\n"
+            "pos: %s pos_align: %s\n"
+            "size: %.4f text_align: %s wr_dir: %s\n"
+            "text: '%s'", cue->ident, cue->time_start, cue->time_end,
+            linestr, str_cue_line_align[cue->line_align],
+            posstr, str_cue_pos_align[cue->pos_align],
+            cue->size * 100.0f, str_cue_text_align[cue->text_align], str_cue_writing_direction[cue->writing_direction],
+            nodestr);
+}
--- a/src/parser.h
+++ b/src/parser.h
@ -0,0 +1,91 @@
+#ifndef _VTT2ASS_PARSER_H
+#define _VTT2ASS_PARSER_H
+#include "dyna.h"
+#include <math.h>
+#include <stdbool.h>
+
+#include "cuetext.h"
+#include "cuestyle.h"
+
+enum cue_writing_direction {
+    WD_HORIZONTAL = 0, /* def */
+    WD_VERTICAL_GROW_LEFT,
+    WD_VERTICAL_GROW_RIGHT,
+};
+
+enum cue_line_align {
+    LINE_ALIGN_START = 0, /* def */
+    LINE_ALIGN_CENTER,
+    LINE_ALIGN_END,
+};
+
+enum cue_pos_align {
+    POS_ALIGN_AUTO = 0, /* def */
+    POS_ALIGN_LINE_LEFT,
+    POS_ALIGN_CENTER,
+    POS_ALIGN_LINE_RIGHT,
+};
+
+enum cue_text_align {
+    TEXT_ALIGN_CENTER = 0, /* def */
+    TEXT_ALIGN_START,
+    TEXT_ALIGN_END,
+    TEXT_ALIGN_LEFT,
+    TEXT_ALIGN_RIGHT,
+};
+
+enum cue_base_direction {
+    BDIR_LTR = 0, /* default */
+    BDIR_RTL,
+};
+
+extern const char *str_cue_pos_align[];
+extern const char *str_cue_text_align[];
+
+#define CUE_AUTO NAN
+#define IS_AUTO(f) (isnan(f))
+struct cue {
+    char *ident; /* def NULL */
+    /* true if 'lines' is an integer, false if it is a percentage. def true */
+    bool snap_to_lines;
+
+    /* This is the offset of the cue box in video percentage
+     * from the sides, in the way different from the writing_direction */
+    float line; /* def AUTO */
+    /* Where to calculate the line percentage at.
+     * start: left/top side
+     * center: middle
+     * end: right/bottom side */
+    enum cue_line_align line_align; /* def START */
+
+    /* https://www.w3.org/TR/webvtt1/#webvtt-cue-position */
+    /* Specifies the indent from the sides of the viewport
+     * in the same direction as the writing_direction */
+    float position; /* def AUTO */
+    /* Where to calculate the position at:
+     * line-left: left/top side
+     * center: middle
+     * line-right: right/bottom side */
+    enum cue_pos_align pos_align; /* def AUTO */
+
+    /* The size of the box in the writing_direction */
+    float size; /* def 100 */
+    enum cue_writing_direction writing_direction; /* def HORIZONTAL */
+
+    /* Sets the alignment within the cue box
+     * start and end can mean different things based on the base_direction */
+    enum cue_text_align text_align; /* 'align:' def CENTER */
+
+    struct vtt_node *text_node; /* root node of the text nodes, def NULL */
+
+    enum cue_base_direction base_direction; /* Text writing direction, def BDIR_LTR */
+
+    int64_t time_start, time_end; /* start and end time in ms */
+};
+
+/* return -1 on error */
+int prs_parse_tokens(struct dyna *tokens, struct dyna **cues, struct dyna **styles);
+
+void prs_cue2str(int size, char out_str[size], const struct cue *cue);
+
+#endif /* _VTT2ASS_PARSER_H */
--- a/src/reader.c
+++ b/src/reader.c
@ -0,0 +1,191 @@
+#include "reader.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+static const uint8_t *file_data = NULL;
+static int64_t file_index = 0, file_size = 0;
+
+int rdr_init(const char *filename)
+{
+    int en, fd = -1;
+    void *mm = MAP_FAILED;
+    struct stat fs;
+
+    en = stat(filename, &fs);
+    if (en != 0) {
+        perror("stat() on filename");
+        return -1;
+    }
+
+    fd = open(filename, O_RDONLY, 0);
+    if (fd == -1) {
+        perror("open() on filename");
+        return -1;
+    }
+    
+    mm = mmap(NULL, fs.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (mm == MAP_FAILED) {
+        perror("mmap()");
+        return -1;
+    }
+    close(fd);
+
+    file_index = 0;
+    file_size = fs.st_size;
+    file_data = mm;
+
+    //printf("---\n%.30s\n---\n", mm);
+
+    return 0;
+}
+
+int rdr_free()
+{
+    int en = munmap((void*)file_data, file_size);
+    if (en != 0) {
+        perror("munmap()");
+        return -1;
+    }
+    file_index = file_size = 0;
+    file_data = NULL;
+    return 0;
+}
+
+int rdr_peek()
+{
+    if (file_index >= file_size)
+        return EOF;
+    int c = file_data[file_index];
+    if (c == '\r') {
+        if (file_index >= file_size)
+            return c;
+        if (file_data[file_index + 1] == '\n') {
+            /* Convert \r\n to \n */
+            file_index++;
+            return '\n';
+        }
+    }
+}
+
+void rdr_skip(int64_t n)
+{
+    file_index += n;
+}
+
+int64_t rdr_peekn(int64_t n, char out[n])
+{
+    int64_t to_read = MIN(file_size - file_index, n);
+    if (to_read == 0)
+        return EOF;
+
+    int64_t offset = 0;
+    int64_t i = 0;
+    for (; i < n && file_index + i + offset - 1 < file_size; i++) {
+        out[i] = file_data[file_index + i + offset];
+        if (out[i] == '\r') {
+            if (file_data[file_index + i + offset + 1] == '\n') {
+                out[i] = '\n';
+                offset++;
+            }
+        }
+    }
+    if (i == 0)
+        return EOF;
+
+    out[i-1] = '\0';
+    return i;
+}
+
+int rdr_getc()
+{
+    int c = rdr_peek();
+    file_index++;
+    return c;
+}
+
+int64_t rdr_readn(int64_t n, char out[n])
+{
+    int64_t read = rdr_peekn(n, out);
+    rdr_skip(read);
+    return read;
+}
+
+int64_t rdr_pos()
+{
+    return file_index;
+}
+
+const char *rdr_curptr()
+{
+    return (char*)&file_data[file_index];
+}
+
+int64_t rdr_line_peek(int64_t n, char out[n], int64_t *opt_skipcount)
+{
+    if (rdr_peek() == EOF)
+        return EOF;
+    int64_t rem_len = file_size - file_index;
+    assert(rem_len > 0);
+
+    const uint8_t *end = memchr(file_data + file_index, '\n', rem_len);
+    if (end == NULL) {
+        /* File is not newline terminated, copy until the end */
+        size_t copy_amount = MIN(n - 1, rem_len);
+        memcpy(out, file_data + file_index, copy_amount);
+        out[copy_amount] = '\0';
+        if (opt_skipcount)
+            *opt_skipcount = copy_amount;
+        return copy_amount;
+    }
+
+    if (end == file_data + file_index + -1) {
+        /* Empty line */
+        printf("empty\n");
+    }
+
+    /* Newline found, copy until that */
+    if (opt_skipcount)
+        *opt_skipcount = (end + 1) - (file_data + file_index);
+    if (*(end - 1) == '\r')
+        end--;
+    ssize_t copy_amount = MIN(n - 1, (end) - (file_data + file_index));
+    if (copy_amount <= 0) {
+        out[0] = '\0';
+        return 0;
+    }
+    memcpy(out, file_data + file_index, copy_amount);
+    out[copy_amount] = '\0';
+    return copy_amount;
+}
+
+void rdr_skip_line(void)
+{
+    uint8_t *endl = memchr(file_data + file_index, '\n', file_size - file_index);
+    if (endl == NULL) {
+        /* No endl termination, skip until EOF */
+        file_index = file_size;
+        return;
+    }
+
+    file_index += endl - (file_data + file_index) + 1;
+}
+
+void rdr_skip_whitespace(void)
+{
+    int c = rdr_peek();
+    if (c == EOF)
+        return;
+    while (isspace(c)) {
+        rdr_skip(1);
+        c = rdr_peek();
+    }
+}
--- a/src/reader.h
+++ b/src/reader.h
@ -0,0 +1,32 @@
+#ifndef _VTT2ASS_READER_H
+#define _VTT2ASS_READER_H
+#include <stdint.h>
+
+int rdr_init(const char *filename);
+int rdr_free();
+
+int rdr_getc();
+int rdr_peek();
+int64_t rdr_readn(int64_t n, char out[n]);
+/* Returns number of items peeked (can be less than n), or -1 on error */
+int64_t rdr_peekn(int64_t n, char out[n]);
+
+/* Peeks until a newline character, or EOF.
+ * The newline is not copied to out
+ * Assumes that we are at the start of the line
+ * Return EOF in case of EOF
+ * Returns the characters copied to out, will 0 terminate */
+/* opt_skipcount will contain the value, such that rdr_skip(opt_skipcount)
+ * will skip over the entire line */
+int64_t rdr_line_peek(int64_t n, char out[n], int64_t *opt_skipcount);
+
+/* Skip n characters */
+void rdr_skip(int64_t n);
+/* Skips the current line */
+void rdr_skip_line(void);
+void rdr_skip_whitespace(void);
+
+int64_t rdr_pos();
+const char *rdr_curptr();
+
+#endif /* _VTT2ASS_READER_H */
--- a/src/srt.c
+++ b/src/srt.c
@ -0,0 +1,181 @@
+#include "srt.h"
+
+#include "cuetext.h"
+#include <stdio.h>
+#include <string.h>
+
+#include "util.h"
+
+static void srt_ms_to_str(int64_t tms, int n, char out[n])
+{
+    int h, m, s, ms;
+    h = tms / H_IN_MS;
+    tms %= H_IN_MS;
+
+    m = tms / M_IN_MS;
+    tms %= M_IN_MS;
+
+    s = tms / S_IN_MS;
+    ms = tms % S_IN_MS;
+
+    snprintf(out, n, "%02d:%02d:%02d,%03d", h, m, s, ms);
+}
+
+static void srt_write_timestamp(FILE *f, struct cue *c)
+{
+    char tsb[32];
+
+    srt_ms_to_str(c->time_start, sizeof(tsb), tsb);
+    fprintf(f, "%s --> ", tsb);
+
+    srt_ms_to_str(c->time_end, sizeof(tsb), tsb);
+    fprintf(f, "%s\n", tsb);
+}
+
+enum tag_position {
+    TAG_START = 0,
+    TAG_END,
+};
+
+static void handle_position_tags(FILE *f, const struct cue *c, enum tag_position tpos)
+{
+    /* Only handle left to right horizontal text for now */
+    
+    static const int alignmap[3][3] = {
+        { 7, 8, 9 },
+        { 4, 5, 6 },
+        { 1, 2, 3 },
+    };
+    int xal = 1, yal = 2;
+    char alignstr[16];
+
+    if (tpos == TAG_END)
+        return; // This dosn't need an ending tag
+
+    switch (c->text_align) {
+        case TEXT_ALIGN_START:
+        case TEXT_ALIGN_LEFT:
+            xal = 0;
+            break;
+        case TEXT_ALIGN_END:
+        case TEXT_ALIGN_RIGHT:
+            xal = 2;
+            break;
+    }
+#if 0
+    switch (c->line_align) {
+        case LINE_ALIGN_START:
+            yal = 0;
+            break;
+        case LINE_ALIGN_CENTER:
+            yal = 1;
+            break;
+    }
+#endif
+
+    if (xal == 1 && yal == 2)
+        return; // default
+
+    sprintf(alignstr, "{\\an%d}", alignmap[yal][xal]);
+    fputs(alignstr, f);
+}
+
+static const char *tag_map[][2] = {
+    [VNODE_RUBY_TEXT][0] = "(",
+    [VNODE_RUBY_TEXT][1] = ")",
+    [VNODE_ITALIC][0] = "<i>",
+    [VNODE_ITALIC][1] = "</i>",
+    [VNODE_BOLD][0] = "<b>",
+    [VNODE_BOLD][1] = "</b>",
+    [VNODE_UNDERLINE][0] = "<u>",
+    [VNODE_UNDERLINE][1] = "</u>",
+};
+
+static void srt_write_tag(FILE *f, const struct cue *c, struct vtt_node *node, const struct dyna *cstyles, enum tag_position pos)
+{
+    enum vtt_node_type type = node->type;
+
+    if (type == VNODE_ROOT) {
+        handle_position_tags(f, c, pos);
+        return;
+    }
+
+    if (type == VNODE_CLASS) {
+        // NOTE: only 1 class name is handled here, and we should do it another way anyway
+        if (node->class_names) {
+            const char *classname = *(char**)dyna_elem(node->class_names, 0);
+            char cname_with_cue[128];
+
+            snprintf(cname_with_cue, sizeof(cname_with_cue), "::cue(.%s)", classname);
+            //printf("Searching for class name '%s'\n", cname_with_cue);
+            const struct cue_style *cs = cuestyle_get_by_selector(cstyles, cname_with_cue);
+            if (cs) {
+                if (cs->italic)
+                    type = VNODE_ITALIC;
+            }
+        }
+    }
+
+    switch (type) {
+    case VNODE_RUBY_TEXT:
+    case VNODE_ITALIC:
+    case VNODE_BOLD:
+    case VNODE_UNDERLINE:
+        fputs(tag_map[type][pos], f);
+        break;
+    }
+}
+
+static void srt_write_text(FILE *f, const struct cue *c, struct vtt_node *node, const struct dyna *cstyles)
+{
+    //assert(node->type == VNODE_ROOT);
+
+    /* These two cannot have childrens */
+    if (node->type == VNODE_TEXT) {
+        fputs(node->text, f);
+        return;
+    }
+    if (node->type == VNODE_TIMESTAMP)
+        return;
+
+    srt_write_tag(f, c, node, cstyles, TAG_START);
+
+    for (int i = 0; node->childs && i < node->childs->e_idx; i++) {
+        struct vtt_node *cn = dyna_elem(node->childs, i);
+
+        srt_write_text(f, c, cn, cstyles);
+
+#if 0
+        else if (tok->type == TTOK_TAG_START && strcmp(tok->ttok_tag_start.tag_name, "rt") == 0)
+            fputc('(', f);
+        else if (tok->type == TTOK_TAG_END && strcmp(tok->ttok_tag_start.tag_name, "rt") == 0)
+            fputc(')', f);
+#endif
+
+    }
+
+    srt_write_tag(f, c, node, cstyles, TAG_END);
+}
+
+int srt_write(struct dyna *cues, struct dyna *cstyles, const char *fname)
+{
+    FILE *f = fopen(fname, "w");
+    if (f == NULL)
+        return -1;
+
+    for (int i = 0; i < cues->e_idx; i++) {
+        struct cue *c = dyna_elem(cues, i);
+        if (c->text_node == NULL)
+            continue;
+
+        fprintf(f, "%d\n", i + 1);
+        srt_write_timestamp(f, c);
+
+        srt_write_text(f, c, c->text_node, cstyles);
+        fputc('\n', f);
+        fputc('\n', f);
+    }
+
+    fclose(f);
+    return 0;
+}
--- a/src/srt.h
+++ b/src/srt.h
@ -0,0 +1,9 @@
+#ifndef _VTT2ASS_SRT_H
+#define _VTT2ASS_SRT_H
+#include "parser.h"
+#include "cuestyle.h"
+#include "dyna.h"
+
+int srt_write(struct dyna *cues, struct dyna *cstyles, const char *fname);
+
+#endif /* _VTT2ASS_SRT_H */
--- a/src/stack.c
+++ b/src/stack.c
@ -0,0 +1,29 @@
+#include "stack.h"
+
+#include <assert.h>
+#include <string.h>
+
+void stack_push(struct stack *stack, void *val)
+{
+    stack->e_idx++;
+    assert(stack->e_idx < stack->e_cap);
+    void *dst = stack_top(stack);
+    memcpy(dst, val, stack->e_size);
+}
+
+void *stack_top(struct stack *stack)
+{
+    return stack->buff + (stack->e_idx * stack->e_size);
+}
+
+void stack_pop(struct stack *stack)
+{
+    assert(stack->e_idx > 0);
+    stack->e_idx--;
+}
+
+int stack_count(struct stack *stack)
+{
+    return stack->e_idx;
+}
+
--- a/src/stack.h
+++ b/src/stack.h
@ -0,0 +1,23 @@
+#ifndef _VTT2ASS_STACK_H
+#define _VTT2ASS_STACK_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define stack_init(var, v_item_size, v_max_size) \
+    char stackbuff_##var[v_item_size * v_max_size]; \
+    struct stack var = { .buff = stackbuff_##var, .e_cap = v_max_size, .e_size = v_item_size }; 
+
+struct stack {
+    char *buff;
+    size_t e_size; /* size of 1 element */
+    int64_t e_cap; /* The maximum amount of elements that can be stored */
+    int64_t e_idx; /* The current index of the next location to place elements in */
+};
+
+void stack_push(struct stack *stack, void *val);
+void *stack_top(struct stack *stack);
+void stack_pop(struct stack *stack);
+int stack_count(struct stack *stack);
+
+#endif /* _VTT2ASS_STACK_H */
--- a/src/textextents.c
+++ b/src/textextents.c
@ -0,0 +1,267 @@
+#include "textextents.h"
+
+#include <wchar.h>
+#include <assert.h>
+#include <math.h>
+#include <sys/param.h>
+#include <hb-ft.h>
+
+#if 1 // debug
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include FT_GLYPH_H
+#include FT_BITMAP_H
+#include FT_TRUETYPE_TABLES_H
+#endif
+
+// https://github.com/libass/libass/blob/ad42889c85fc61a003ad6d4cdb985f56de066f91/libass/ass_font.c#L278
+static void set_font_metrics(FT_Face ftface)
+{
+    TT_OS2 *os2 = FT_Get_Sfnt_Table(ftface, FT_SFNT_OS2);
+    if (os2 && ((short)os2->usWinAscent + (short)os2->usWinDescent != 0)) {
+        ftface->ascender  =  (short)os2->usWinAscent;
+        ftface->descender = -(short)os2->usWinDescent;
+        ftface->height    = ftface->ascender - ftface->descender;
+    }
+    if (ftface->ascender - ftface->descender == 0 || ftface->height == 0) {
+        if (os2 && (os2->sTypoAscender - os2->sTypoDescender) != 0) {
+            ftface->ascender = os2->sTypoAscender;
+            ftface->descender = os2->sTypoDescender;
+            ftface->height = ftface->ascender - ftface->descender;
+        } else {
+            ftface->ascender = ftface->bbox.yMax;
+            ftface->descender = ftface->bbox.yMin;
+            ftface->height = ftface->ascender - ftface->descender;
+        }
+    }
+}
+
+void te_create_obj(const char *fontpath, const char *text, int text_len, int fs, bool kern, struct te_obj *out_te)
+{
+    FT_Face ftface = NULL;
+    hb_buffer_t *hbuf = NULL;
+    hb_font_t *hfont = NULL;
+    hb_feature_t features[8] = {0};
+    int feat_idx = 0;
+    int err;
+
+    // https://github.com/libass/libass/blob/master/libass/ass_render.c#L2039
+    //double fs = 256.0;
+    //double fs_mul = o_fs / fs;
+    //printf("Glyph scale: %f\n", fs_mul);
+
+    ftface = font_get_face(fontpath);
+    assert(ftface);
+
+    hbuf = hb_buffer_create();
+    assert(hbuf);
+
+    set_font_metrics(ftface);
+
+    FT_Size_RequestRec rq = {
+        .type = FT_SIZE_REQUEST_TYPE_REAL_DIM,
+        .width = 0,
+        .height = lrint(fs * 64),
+    };
+    err = FT_Request_Size(ftface, &rq);
+    assert(!err);
+
+    hfont = hb_ft_font_create_referenced(ftface);
+    assert(hfont);
+    //hb_ft_font_set_funcs(hfont);
+    //hb_face_set_upem(hb_font_get_face(hfont), ftface->units_per_EM);
+    hb_font_set_scale(hfont, 
+            ((uint64_t)ftface->size->metrics.x_scale * (uint64_t)ftface->units_per_EM) >> 16,
+            ((uint64_t)ftface->size->metrics.y_scale * (uint64_t)ftface->units_per_EM) >> 16);
+    hb_font_set_ppem(hfont, ftface->size->metrics.x_ppem, ftface->size->metrics.y_ppem);
+
+    hb_buffer_set_direction(hbuf, HB_DIRECTION_LTR);
+    hb_buffer_set_script(hbuf, hb_script_from_string("Jpan", -1));
+    hb_buffer_set_language(hbuf, hb_language_from_string("jp", -1));
+
+    /* Also do some if text is vertical https://github.com/libass/libass/blob/master/libass/ass_shaper.c#L175 */
+
+    /* Ligatures should be disabled when spacing > 0 but we don't have that info here yet */
+    features[feat_idx++] = (hb_feature_t){
+        .tag = HB_TAG('l', 'i', 'g', 'a'),
+        .end = HB_FEATURE_GLOBAL_END,
+        .start = HB_FEATURE_GLOBAL_START,
+        .value = 0,
+    };
+    features[feat_idx++] = (hb_feature_t){
+        .tag = HB_TAG('c', 'l', 'i', 'g'),
+        .end = HB_FEATURE_GLOBAL_END,
+        .start = HB_FEATURE_GLOBAL_START,
+        .value = 0,
+    };
+
+    /* Keming */
+    features[feat_idx++] = (hb_feature_t){
+        .tag = HB_TAG('k', 'e', 'r', 'n'),
+        .end = HB_FEATURE_GLOBAL_END,
+        .start = HB_FEATURE_GLOBAL_START,
+        .value = kern,
+    };
+
+
+    hb_buffer_add_utf8(hbuf, text, text_len, 0, text_len);
+    hb_shape(hfont, hbuf, features, feat_idx);
+
+    *out_te = (struct te_obj){
+        .hbuf = hbuf,
+        .hfont = hfont,
+        .fs = fs,
+        .fs_mul = 1,
+    };
+}
+
+void te_destroy_obj(struct te_obj *te)
+{
+    hb_buffer_destroy(te->hbuf);
+    hb_font_destroy(te->hfont);
+}
+
+static void find_cluster_indexes(unsigned int gcount, hb_glyph_info_t gi[gcount],
+        int offset, int len, int *out_first_idx, int *out_last_idx)
+{
+    int cluster_start = -1, cluster_end = -1;
+    if (len == -1)
+        cluster_end = gcount - 1;
+    for (unsigned int i = 0; i < gcount; i++) {
+        if (gi[i].cluster == offset) {
+            cluster_start = i;
+        }
+
+        if (cluster_end == -1 && gi[i].cluster >= offset + len) {
+            /* Because the point-at-end problem described below */
+            cluster_end = i - 1;
+        }
+
+        if (cluster_start != -1 && cluster_end != -1) {
+            break;
+        }
+    }
+    if (cluster_end == -1) {
+        /* If len is != -1, but no cluster index is found, it's probably
+         * because len points to at the end of the string, while the
+         * cluster index points at the start. So this basically a
+         * off by one error, but 1 character is multiple bytes.
+         * So let's just treat it as the whole string */
+        cluster_end = gcount - 1;
+    }
+    assert(cluster_start != -1 && cluster_end != -1);
+
+    *out_first_idx = cluster_start;
+    *out_last_idx = cluster_end;
+}
+
+void te_get_at(struct te_obj *te, int offset, int len, float spacing, struct text_extents *out_ext)
+{
+    memset(out_ext, 0, sizeof(*out_ext));
+
+    unsigned int glyph_count;
+    int cluster_start, cluster_end;
+    hb_glyph_info_t *glyph_info    = hb_buffer_get_glyph_infos(te->hbuf, &glyph_count);
+    hb_glyph_position_t *glyph_pos = hb_buffer_get_glyph_positions(te->hbuf, &glyph_count);
+
+    find_cluster_indexes(glyph_count, glyph_info, offset, len, &cluster_start, &cluster_end);
+
+    FT_Face ftf = hb_ft_font_get_face(te->hfont);
+    int width = 0, height = 0;
+    for (int i = cluster_start; i <= cluster_end; i++) {
+        width += (glyph_pos[i].x_advance * te->fs_mul) + (spacing * 64);
+#if 0
+        printf("Offset x: %d  y: %d\n", glyph_pos[i].x_offset, glyph_pos[i].y_offset);
+        int r = FT_Load_Glyph(ftf, glyph_info[i].codepoint, 0);
+        assert(r == 0);
+        r = FT_Render_Glyph(ftf->glyph, 0);
+        assert(r == 0);
+        FT_Bitmap *bmpo = &ftf->glyph->bitmap;
+        //FT_Bitmap bmp;
+        //FT_Bitmap_Init(&bmp);
+        //r = FT_Bitmap_Convert(font_get_lib(), bmpo, &bmp, 4);
+        //assert(r == 0);
+
+        //BP;
+        FILE* f = fopen("./img.bin", "wb");
+        fwrite(bmpo->buffer, 1, bmpo->rows * bmpo->width, f);
+        printf("img w: %d  h: %d\n", bmpo->width, bmpo->rows);
+        fclose(f);
+
+        //FT_Bitmap_Done(font_get_lib(), &bmp);
+#endif
+    }
+
+    out_ext->width = (width / 64.0f);
+    out_ext->height = te->fs * te->fs_mul;
+}
+
+void te_get_at_chars(struct te_obj *te, int offset, int len,
+        int out_ext_size, struct text_extents out_ext[out_ext_size], int *out_ext_count)
+{
+    int cluster_start, cluster_end, gc;
+    hb_glyph_info_t *gi     = hb_buffer_get_glyph_infos(te->hbuf, &gc);
+    hb_glyph_position_t *gp = hb_buffer_get_glyph_positions(te->hbuf, &gc);
+
+    find_cluster_indexes(gc, gi, offset, len, &cluster_start, &cluster_end);
+
+    *out_ext_count = 0;
+    for (int i = cluster_start; i <= cluster_end; i++) {
+        assert(*out_ext_count < out_ext_size);
+        struct text_extents *curr_ext = &out_ext[*out_ext_count];
+
+        *curr_ext = (struct text_extents){
+            .width = gp[i].x_advance / 64,
+            .height = te->fs,
+        };
+        (*out_ext_count)++;
+    }
+}
+
+void te_get_at_chars_justify(struct te_obj *te, int offset, int len, int target_justify,
+        int out_ext_size, struct text_extents out_ext[out_ext_size], int *out_ext_count)
+{
+    int unjust_size = 0;
+    float space = 0;
+
+    /* 1st calculate all of the sizes of each char */
+    te_get_at_chars(te, offset, len, out_ext_size, out_ext, out_ext_count);
+
+    /* 2nd calculate the total size of the unjustified text */
+    for (int i = 0; i < *out_ext_count; i++) {
+        unjust_size += out_ext[i].width;
+    }
+
+    /* 3rd calculate the spaces needed on
+     * - before the 1st char
+     * - between each char
+     * - after the last char
+     */
+    if (target_justify <= unjust_size)
+        return;
+    space = (target_justify - unjust_size) / (*out_ext_count + 1);
+
+    /* 4th modify x_off of text_extents */
+    for (int i = 0; i < *out_ext_count; i++) {
+        out_ext[i].x_off = space;
+    }
+}
+
+void te_simple(const char *fontpath, const char *text, int fs, float spacing, bool kern, struct text_extents *out_ext)
+{
+    struct te_obj te;
+    te_create_obj(fontpath, text, -1, fs, kern, &te);
+    te_get_at(&te, 0, -1, spacing, out_ext);
+    te_destroy_obj(&te);
+}
+
+#if 0
+void te_simple_justify_chars(const char *fontpath, const char *text, int fs, int target_justify,
+        int out_ext_size, struct text_extents out_ext[out_ext_size], int *out_ext_count)
+{
+    struct te_obj te;
+    te_create_obj(fontpath, text, -1, fs, spacing, &te);
+    te_get_at_chars_justify(&te, 0, -1, target_justify, out_ext_size, out_ext, out_ext_count);
+    te_destroy_obj(&te);
+}
+#endif
--- a/src/textextents.h
+++ b/src/textextents.h
@ -0,0 +1,32 @@
+#ifndef _VTT2ASS_TEXTEXTENTS_H
+#define _VTT2ASS_TEXTEXTENTS_H
+#include "util.h"
+#include "font.h"
+#include <hb.h>
+
+struct te_obj {
+    //FT_Face ftface;
+    hb_buffer_t *hbuf;
+    hb_font_t *hfont;
+    int fs;
+
+    double fs_mul;
+};
+
+void te_create_obj(const char *fontpath, const char *text, int text_len, int fs, bool kern, struct te_obj *out_te);
+void te_destroy_obj(struct te_obj *te);
+
+/* out_ext includes the spacing width as well! */
+void te_get_at(struct te_obj *te, int offset, int len, float spacing, struct text_extents *out_ext);
+void te_get_at_chars(struct te_obj *te, int offset, int len,
+        int out_exts_size, struct text_extents out_ext[out_exts_size], int *out_ext_count);
+void te_get_at_chars_justify(struct te_obj *te, int offset, int len, int target_justify,
+        int out_ext_size, struct text_extents out_ext[out_ext_size], int *out_ext_count);
+
+void te_simple(const char *fontpath, const char *text, int fs, float spacing, bool kern, struct text_extents *out_ext);
+#if 0
+void te_simple_justify_chars(const char *fontpath, const char *text, int fs, int target_justify,
+        int out_ext_size, struct text_extents out_ext[out_ext_size], int *out_ext_count);
+#endif
+
+#endif /* _VTT2ASS_TEXTEXTENTS_H */
--- a/src/tokenizer.c
+++ b/src/tokenizer.c
@ -0,0 +1,491 @@
+#include "tokenizer.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "reader.h"
+
+#define x(n, ...) #n,
+static const char *token_str_map[] = {
+    TOKEN_DEF(x, x)
+};
+#undef x
+
+static int64_t tok_cline = 1;
+
+static void tok_free_inner(void *data)
+{
+    struct token *tok = data;
+    switch (tok->type) {
+    case TOK_IDENT:
+        free(tok->ident.str);
+        break;
+    case TOK_CUE_SETTING:
+        free(tok->cue_setting.key);
+        free(tok->cue_setting.value);
+        break;
+    case TOK_CUE_TEXT:
+        free(tok->cue_text.str);
+        break;
+    case TOK_STYLE_SELECTOR:
+        free(tok->style_selector.str);
+        break;
+    case TOK_STYLE_KEYVAL:
+        free(tok->style_keyval.value);
+        free(tok->style_keyval.key);
+        break;
+    }
+}
+
+static int tok_read_magic(struct dyna *tokens)
+{
+    char buf[16] = {0};
+    const char *exp = "WEBVTT";
+    int64_t lineskip;
+
+    int64_t r = rdr_line_peek(strlen(exp) + 1, buf, &lineskip);
+    if (r == EOF) {
+        fprintf(stderr, "Found EOF while parsing magic bytes\n");
+        return -1;
+    }
+
+    if (r != strlen(exp) || memcmp(buf, exp, r) != 0) {
+        fprintf(stderr, "Not a WEBVTT file: %s\n", buf);
+        return -1;
+    }
+
+    ((struct token*)dyna_emplace(tokens))->type = TOK_FILE_MAGIC;
+    rdr_skip(lineskip);
+
+    return 0;
+}
+
+static void tok_skip_bom()
+{
+    uint8_t bom[3] = {0xef, 0xbb, 0xbf};
+    uint8_t buf[4];
+    int64_t read = rdr_peekn(sizeof(buf), buf);
+    //for (int i = 0; i < read; i++) { printf("%X\n", buf[i]); }
+    if (read == 4 && memcmp(bom, buf, 3) == 0) {
+        rdr_skip(3);
+    }
+}
+
+static void tok_skip_line()
+{
+    int c;
+    while ((c = rdr_getc()) != EOF) {
+        if (c == '\n') {
+            tok_cline++;
+            return;
+        }
+    }
+}
+
+static int tok_parse_note(struct dyna *tokens, int64_t li, char line[li])
+{
+    char rl[1024];
+    int64_t lineskip, rli;
+
+    rli = rdr_line_peek(sizeof(rl), rl, &rli);
+    while (rli > 0) {
+        rdr_skip_line();
+        rli = rdr_line_peek(sizeof(rl), rl, &rli);
+    }
+    rdr_skip_line();
+
+    ((struct token*)dyna_emplace(tokens))->type = TOK_NOTE;
+    return 0;
+}
+
+static int tok_parse_timestamp(struct dyna *tokens, int64_t li, char line[li])
+{
+    int8_t hour, min, sec;
+    int64_t ms;
+    int consumed = 0;
+
+    int read = sscanf(line, "%hhd:%hhd:%hhd.%ld%n", &hour, &min, &sec, &ms, &consumed);
+    if (read != 4) {
+        hour = 0;
+
+        read = sscanf(line, "%hhd:%hhd.%ld%n", &min, &sec, &ms, &consumed);
+        if (read != 3) {
+            fprintf(stderr, "Timestamp parse, failed on sscanf: %d (%s)\n", read, line);
+            return -1;
+        }
+    }
+    ms += ((sec + ((min + hour * 60) * 60)) * 1000);
+
+    struct token tok = { .type = TOK_TIMESTAMP, .timestamp.ms = ms };
+    dyna_append(tokens, &tok);
+
+    return consumed;
+}
+
+static int tok_parse_style_group(struct dyna *tokens)
+{
+    int64_t bi = 0;
+    char *pos;
+    char buff[1024] = {0};
+    int c;
+
+    while (true) {
+        c = rdr_getc();
+        if (c == EOF)
+            return -1;
+        if (isspace(c))
+            break;
+        buff[bi++] = c;
+    }
+
+    struct token tok = { .type = TOK_STYLE_SELECTOR };
+    tok.style_selector.str = strndup(buff, bi);
+    dyna_append(tokens, &tok);
+
+    rdr_skip_whitespace();
+    c = rdr_getc();
+    if (c != '{') {
+        fprintf(stderr, "Expected '{' in style after selector, got %c\n", c);
+        return -1;
+    }
+    tok.type = TOK_STYLE_OPEN_BRACE;
+    dyna_append(tokens, &tok);
+
+    rdr_skip_whitespace();
+    bi = 0;
+
+    tok.type = TOK_STYLE_KEYVAL;
+    bool in_elem = false, have_key = false, have_value = false;
+    while (rdr_peek() != '}' && rdr_peek() != EOF) {
+        int c = rdr_getc();
+
+        in_elem = true;
+
+        if (c == ':') {
+            if (in_elem == false) {
+                fprintf(stderr, "Style parse error: not in elem, when :\n");
+                return -1;
+            }
+            if (have_key) {
+                fprintf(stderr, "Style parse error: multiple keys?\n");
+                return -1;
+            }
+            tok.style_keyval.key = strndup(buff, bi);
+            have_key = true;
+            bi = 0;
+            rdr_skip_whitespace();
+        } else if (c == ';') {
+            if (in_elem == false) {
+                fprintf(stderr, "Style parse error: not in elem, when :\n");
+                return -1;
+            }
+            if (have_key == false) {
+                fprintf(stderr, "Style parse error: value without key?\n");
+                return -1;
+            }
+            tok.style_keyval.value = strndup(buff, bi);
+            dyna_append(tokens, &tok);
+            in_elem = have_key = have_value = false;
+            bi = 0;
+            rdr_skip_whitespace();
+        } else {
+            buff[bi++] = (char)c;
+        }
+    }
+    if (rdr_peek() == EOF) {
+        fprintf(stderr, "Style parse error: End of file inside block\n");
+        return -1;
+    }
+    rdr_skip(1); /* skip the '}' */
+    rdr_skip_line(); /* Skip until the next line */
+    tok.type = TOK_STYLE_CLOSE_BRACE;
+    dyna_append(tokens, &tok);
+
+    return 0;
+}
+
+static int tok_parse_style(struct dyna *tokens)
+{
+    char buff[1024];
+    int64_t bi;
+    int en;
+
+    bi = rdr_line_peek(sizeof(buff), buff, NULL);
+    while (bi > 0) {
+        en = tok_parse_style_group(tokens);
+        if (en == -1)
+            return -1;
+        //rdr_skip_whitespace();
+        bi = rdr_line_peek(sizeof(buff), buff, NULL);
+        //printf("Bi: %ld - %s %.20s\n", bi, buff, rdr_curptr());
+    }
+    return 0;
+
+    while (true) {
+        char p2[2];
+        if (rdr_peekn(2, p2) != 2)
+            return -1;
+        //printf("Read: '%X' and '%X'\n", p2[0], p2[1]);
+        if (p2[0] == '\n' && p2[1] == '\n') {
+            /* Double newline, end the block */
+            rdr_skip(2);
+            return 0;
+        }
+        //printf("Read: ");
+        //for (int i = 0; i < 5; i++)
+            //putchar(rdr_getc());
+        en = tok_parse_style_group(tokens);
+        if (en == -1)
+            return -1;
+    }
+
+    return 0;
+}
+
+static int tok_parse_cue_attrib(struct dyna *tokens, int64_t li, char pos[li])
+{
+    if (li == 0)
+        return 0; /* no attribs */
+
+    while(li > 0 && isspace(*pos)) {
+        /* Skip whitespace */
+        li--;
+        pos++;
+    }
+
+    while (li > 0) {
+        struct token tok = { .type = TOK_CUE_SETTING };
+        char *sep = strchr(pos, ':');
+        if (sep == NULL)
+            return -1;
+        tok.cue_setting.key = strndup(pos, sep - pos);
+        li -= sep - pos;
+        pos = sep + 1;
+        sep = strchr(pos, ' ');
+        if (sep == NULL) {
+            sep = pos + li;
+        }
+        tok.cue_setting.value = strndup(pos, sep - pos);
+        li -= sep - pos;
+        pos = sep + 1;
+
+        dyna_append(tokens, &tok);
+    }
+
+    return 0;
+}
+
+static int tok_parse_cue_text(struct dyna *tokens)
+{
+    int64_t li, lineskip;
+    char line[1024];
+
+    while (true) {
+        li = rdr_line_peek(sizeof(line), line, &lineskip);
+        if (li == EOF)
+            return 0;
+        if (li == 0) {
+            /* End of cue */
+            //printf("ENd of cue: %s %ld\n", line, lineskip);
+            rdr_skip(lineskip);
+            return 0;
+        }
+
+        struct token tok = { .type = TOK_CUE_TEXT };
+        tok.cue_text.str = strdup(line);
+        dyna_append(tokens, &tok);
+        rdr_skip(lineskip);
+    }
+
+    return 0;
+}
+
+static int tok_parse_cue(struct dyna *tokens, int64_t li, char line[li])
+{
+    char *pos = line;
+    int en;
+
+    en = tok_parse_timestamp(tokens, li, line);
+    if (en == -1)
+        return -1;
+    li -= en;
+    pos += en;
+
+    if (memcmp(pos, " --> ", strlen(" --> ")) == 0) {
+        pos += strlen(" --> ");
+        li -= strlen(" --> ");
+        struct token tok = { .type = TOK_ARROW };
+        dyna_append(tokens, &tok);
+    } else {
+        return -1;
+    }
+
+    en = tok_parse_timestamp(tokens, li, pos);
+    if (en == -1)
+        return -1;
+    li -= en;
+    pos += en;
+
+    /* TODO: line attrib */
+    en = tok_parse_cue_attrib(tokens, li, pos);
+    if (en == -1)
+        return -1;
+
+    rdr_skip_line();
+    en = tok_parse_cue_text(tokens);
+    if (en == -1)
+        return -1;
+
+    return 0;
+}
+
+static int tok_parse_line_ident(struct dyna *tokens, int64_t li, char line[li])
+{
+    /* line is an ident, make an TOK_IDENT and copy the line contents */
+    struct token tok = { .type = TOK_IDENT };
+    char *ident_str = strdup(line);
+    if (ident_str == NULL)
+        return -1;
+    tok.ident.str = ident_str;
+
+    dyna_append(tokens, &tok);
+
+    return 0;
+}
+
+static void tok_skip_whitespace(int64_t *li, char *line)
+{
+    /* My test file has 114 spaces on an empty line for some reason lol */
+    int prev_space = 0;
+    for (; prev_space < *li && isspace(line[prev_space]); prev_space++)
+        ;
+    if (prev_space == *li) {
+        /* all whitespace */
+        *li = 0;
+        line[0] = '\0';
+        return;
+    }
+
+    /* Move non-whitespace chars to the front */
+    memmove(line, line + prev_space, *li - prev_space);
+    *li -= prev_space;
+}
+
+struct dyna *tok_tokenize()
+{
+    struct dyna *tokens = dyna_create(sizeof(struct token));
+    int en, c;
+
+    dyna_set_free_fn(tokens, tok_free_inner);
+    if (rdr_pos() != 0) {
+        goto error;
+    }
+
+    tok_skip_bom();
+
+    en = tok_read_magic(tokens);
+    if (en != 0)
+        goto error;
+
+    char line[1024];
+    int64_t li = 0, lineskip;
+    bool in_ts_parse = false;
+    while ((li = rdr_line_peek(sizeof(line), line, &lineskip)) != EOF) {
+        tok_cline++;
+        tok_skip_whitespace(&li, line);
+        if (li == 0)
+            goto next_skip; /* empty line */
+        //printf("Line: '%s'\n", line);
+
+        if (li >= 4 && strncmp(line, "NOTE", 4) == 0) {
+            rdr_skip(lineskip);
+            en = tok_parse_note(tokens, li, line);
+            if (en == -1)
+                goto error;
+            goto next_noskip;
+        }
+
+        if (li >= 5 && strncmp(line, "STYLE", strlen("STYLE")) == 0) {
+            rdr_skip(lineskip);
+            en = tok_parse_style(tokens);
+            if (en == -1)
+                goto error;
+            goto next_noskip;
+        }
+
+        if (strstr(line, "-->") != NULL) {
+            en = tok_parse_cue(tokens, li, line);
+            if (en == -1)
+                goto error;
+            goto next_noskip;
+        } else {
+            en = tok_parse_line_ident(tokens, li, line);
+            if (en == -1)
+                goto error;
+            goto next_skip;
+        }
+
+#if 0
+        if (isdigit(c)) {
+            en = tok_parse_numline(tokens);
+            if (en != 0)
+                goto error;
+            goto next_noskip;
+        }
+#endif
+        printf("Unknown line at linenum %ld: '%s'\n", tok_cline, line);
+        goto end;
+
+next_skip:
+        rdr_skip(lineskip);
+        continue;
+next_noskip:
+        continue;
+    }
+
+end:
+    return tokens;
+
+error:
+    printf("Failed parse on line %ld: '%s'\n", tok_cline, line);
+    dyna_destroy(tokens);
+    return NULL;
+}
+
+const char *tok_type2str(enum token_type type)
+{
+    return token_str_map[type];
+}
+
+char *tok_2str(struct token *tok, int maxn, char out[maxn])
+{
+    int n = sprintf(out, "%s: ", tok_type2str(tok->type));
+    switch (tok->type) {
+        case TOK_IDENT:
+            n += sprintf(out + n, ".str = '%s'", tok->ident.str);
+            break;
+        case TOK_TIMESTAMP:
+            n += sprintf(out + n, ".ms = %ld", tok->timestamp.ms);
+            break;
+        case TOK_CUE_SETTING:
+            n += sprintf(out + n, "%s = %s", tok->cue_setting.key, tok->cue_setting.value);
+            break;
+        case TOK_CUE_TEXT:
+            n += sprintf(out + n, "%s", tok->cue_text.str);
+            break;
+        case TOK_STYLE_SELECTOR:
+            n += sprintf(out + n, "%s", tok->style_selector.str);
+            break;
+        case TOK_STYLE_KEYVAL:
+            n += sprintf(out + n, "%s = %s", tok->style_keyval.key, tok->style_keyval.value);
+            break;
+    }
+
+    return out;
+}
+
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -0,0 +1,46 @@
+#ifndef _VTT2ASS_TOKENIZER_H
+#define _VTT2ASS_TOKENIZER_H
+#include "dyna.h"
+
+#define TOKEN_DEF(ex_simpl, ex_compl) \
+    ex_simpl(TOK_EOF) \
+    ex_simpl(TOK_FILE_MAGIC) /* 1st characters are WEBVTT */ \
+    ex_simpl(TOK_NOTE) /* is a note */ \
+    ex_compl(TOK_IDENT, ident, { char *str; /*free*/ }) /* An indentifier before a timestamp line */ \
+    ex_compl(TOK_TIMESTAMP, timestamp, { int64_t ms; }) /* A timestamp line, ms is the miliseconds since the beginning */ \
+    ex_simpl(TOK_ARROW) /* --> */ \
+    ex_compl(TOK_CUE_SETTING, cue_setting, { char *key, *value; /* free both */ }) /* A timestamp line, ms is the miliseconds since the beginning */ \
+    ex_compl(TOK_CUE_TEXT, cue_text, { char *str; /* free */ }) /* Text contents of a cue (one line) */ \
+    ex_compl(TOK_STYLE_SELECTOR, style_selector, { char *str; /* free */ }) /* The selector string of the STYLE element */ \
+    ex_simpl(TOK_STYLE_OPEN_BRACE) /* '{' */ \
+    ex_simpl(TOK_STYLE_CLOSE_BRACE) /* '}' */ \
+    ex_compl(TOK_STYLE_KEYVAL, style_keyval, { char *key, *value; /* free */ }) /* The selector string of the STYLE element */ \
+
+#define ex_simpl(n) n,
+#define ex_compl(n, ...) n,
+enum token_type {
+    TOKEN_DEF(ex_simpl, ex_compl)
+};
+#undef ex_simpl
+#undef ex_compl
+
+#define ex_simpl(tok_name)
+#define ex_compl(tok_name, struct_name, ...) \
+    struct struct_name __VA_ARGS__ struct_name;
+struct token {
+    enum token_type type;
+    union {
+        TOKEN_DEF(ex_simpl, ex_compl)
+    };
+};
+#undef ex_simpl
+#undef ex_compl
+
+
+/* Needs to have rdr_init() called before this */
+struct dyna *tok_tokenize();
+
+char *tok_2str(struct token *tok, int maxn, char out[maxn]);
+const char *tok_type2str(enum token_type type);
+
+#endif /* _VTT2ASS_TOKENIZER_H */
--- a/src/util.c
+++ b/src/util.c
@ -0,0 +1,111 @@
+#include "util.h"
+
+#include <stdlib.h>
+
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include <hb.h>
+
+#include "font.h"
+
+void deref_free(void *arg)
+{
+    free(*(char**)arg);
+}
+
+static void util_count_node_lines_inner(const struct vtt_node *node, int *lc)
+{
+    if (node->type == VNODE_TEXT) {
+        const char *pos = node->text;
+        while ((pos = strchr(pos, '\n'))) {
+            (*lc)++;
+            pos++;
+        }
+        return;
+    }
+    if (node->type == VNODE_TIMESTAMP)
+        return;
+
+    if (node->childs == NULL)
+        return;
+    for (int i = 0; i < node->childs->e_idx; i++)
+        util_count_node_lines_inner(dyna_elem(node->childs, i), lc);
+}
+
+int util_count_node_lines(const struct vtt_node *root)
+{
+    int lc = 1;
+    util_count_node_lines_inner(root, &lc);
+    return lc;
+}
+
+
+void util_combine_extents(int ex_len, const struct text_extents ex[ex_len], struct text_extents *out)
+{
+    memset(out, 0, sizeof(*out));
+    if (ex_len < 1) {
+        assert(ex_len > 0);
+        return;
+    }
+
+    out->width = ex[0].width;
+    out->height = ex[0].height;
+    for (int i = 1; i < ex_len; i++) {
+        if (ex[i].width > out->width)
+            out->width = ex[i].width;
+        out->height += ex[i].height;
+    }
+}
+
+void util_init()
+{
+
+} 
+
+int util_utf8_ccount(int s_len, const char s[s_len])
+{
+  int count = 0;
+  for (int i = 0; i < s_len; i++) {
+    if ((s[i] & 0xC0) != 0x80)
+        count++;
+  }
+
+  return count;
+}
+
+void util_cue_pos_to_an7(const struct ass_cue_pos *pos, const struct text_extents *ext, struct ass_cue_pos *an7_pos)
+{
+    float x_sub_fact = 0.5f;
+    if (IS_ASS_ALIGN_RIGHT(pos->align)) {
+        x_sub_fact = 1;
+    } else if (IS_ASS_ALIGN_LEFT(pos->align)) {
+        x_sub_fact = 0;
+    }
+
+    float y_sub_fact = 0.5f;
+    if (IS_ASS_ALIGN_BOTTOM(pos->align)) {
+        y_sub_fact = 1;
+    } else if (IS_ASS_ALIGN_TOP(pos->align)) {
+        y_sub_fact = 0;
+    }
+
+    an7_pos->align = 7;
+    an7_pos->logical_align = an7_pos->align;
+    an7_pos->fs = pos->fs;
+    an7_pos->posx = pos->posx - x_sub_fact * ext->width;
+    an7_pos->posy = pos->posy - y_sub_fact * ext->height;
+}
+
+bool util_is_utf8_start(char chr)
+{
+    return ((chr & 0xC0) != 0x80);
+}
+
+uint32_t util_colorname_to_rgb(const char *name)
+{
+    if (strcmp(name, "black") == 0)
+        return 0x00000000;
+
+    return 0x00000000;
+}
--- a/src/util.h
+++ b/src/util.h
@ -0,0 +1,38 @@
+#ifndef _VTT2ASS_UTIL_H
+#define _VTT2ASS_UTIL_H
+#include "cuetext.h"
+#include "ass.h"
+
+#define S_IN_MS (1000)
+#define M_IN_MS (S_IN_MS * 60)
+#define H_IN_MS (M_IN_MS * 60)
+
+#define ARRSIZE(x) (sizeof(x)/sizeof(*x))
+#define SAFE_FREE(x) if (x) free(x);
+#define STRINGIFY(x) #x
+
+#define BP asm("int $3")
+
+struct text_extents {
+    double x_adv, y_adv, x_off, y_off, width, height;
+};
+
+void deref_free(void *arg);
+
+int util_count_node_lines(const struct vtt_node *root);
+
+void util_get_text_extents(const char *fontname, const char *text, int fs, struct text_extents *out_ex);
+void util_get_text_extents_line(const char *fontname, const char *text, int text_len, unsigned int text_offset, int item_len, int fs, struct text_extents *out_ex);
+void util_get_text_extents_lines(const char *fontname, const char *text, int fs, int ex_max_size, struct text_extents out_ex[ex_max_size], int *ex_len);
+void util_combine_extents(int ex_len, const struct text_extents ex[ex_len], struct text_extents *out);
+
+int util_utf8_ccount(int s_len, const char s[s_len]);
+bool util_is_utf8_start(char chr);
+void util_cue_pos_to_an7(const struct ass_cue_pos *pos, const struct text_extents *ext, struct ass_cue_pos *an7_pos);
+
+uint32_t util_colorname_to_rgb(const char *name);
+
+void util_init();
+
+
+#endif /* _VTT2ASS_UTIL_H */
--- a/subm/argparse
+++ b/subm/argparse
@ -0,0 +1 @@
+Subproject commit 682d4520b4bc2b646cdfcf078b2fed00b3d2da30
				`@ -0,0 +1 @@`
				`Subproject commit 682d4520b4bc2b646cdfcf078b2fed00b3d2da30`