Rewrite the auto-matcher for the karaoke timing copier

Operate on characters rather than bytes in the dialog so that it actually works with Kanji. Rewrite the auto-matcher to handle more cases and add unit tests for it.
2025-04-11 22:56:02 +02:00 · 2013-07-13 08:26:09 -07:00 · 2013-07-13 08:26:09 -07:00 · 80c9f67ce8
commit 80c9f67ce8
parent 3e3bd37a26
18 changed files with 1165 additions and 529 deletions
--- a/aegisub/build/Aegisub/Aegisub.vcxproj
+++ b/aegisub/build/Aegisub/Aegisub.vcxproj
@ -198,7 +198,6 @@
    <ClInclude Include="$(SrcDir)include\aegisub\toolbar.h" />
    <ClInclude Include="$(SrcDir)include\aegisub\video_provider.h" />
    <ClInclude Include="$(SrcDir)initial_line_state.h" />
-    <ClInclude Include="$(SrcDir)kana_table.h" />
    <ClInclude Include="$(SrcDir)lpeg.h" />
    <ClInclude Include="$(SrcDir)main.h" />
    <ClInclude Include="$(SrcDir)mkv_wrap.h" />
@ -393,7 +392,6 @@
    <ClCompile Include="$(SrcDir)hotkey.cpp" />
    <ClCompile Include="$(SrcDir)hotkey_data_view_model.cpp" />
    <ClCompile Include="$(SrcDir)initial_line_state.cpp" />
-    <ClCompile Include="$(SrcDir)kana_table.cpp" />
    <ClCompile Include="$(SrcDir)lpeg.c">
        <PrecompiledHeader>NotUsing</PrecompiledHeader>
        <ForcedIncludeFiles></ForcedIncludeFiles>
--- a/aegisub/build/Aegisub/Aegisub.vcxproj.filters
+++ b/aegisub/build/Aegisub/Aegisub.vcxproj.filters
@ -339,9 +339,6 @@
    <ClInclude Include="$(SrcDir)gl_text.h">
      <Filter>Video\UI</Filter>
    </ClInclude>
-    <ClInclude Include="$(SrcDir)kana_table.h">
-      <Filter>Features\Karaoke copier</Filter>
-    </ClInclude>
    <ClInclude Include="$(SrcDir)subtitle_format.h">
      <Filter>Subtitle formats</Filter>
    </ClInclude>
@ -992,9 +989,6 @@
    <ClCompile Include="$(SrcDir)gl_wrap.cpp">
      <Filter>Video\UI</Filter>
    </ClCompile>
-    <ClCompile Include="$(SrcDir)kana_table.cpp">
-      <Filter>Features\Karaoke copier</Filter>
-    </ClCompile>
    <ClCompile Include="$(SrcDir)dialog_spellchecker.cpp">
      <Filter>Features\Spell checker</Filter>
    </ClCompile>
--- a/aegisub/build/libaegisub/libaegisub.vcxproj
+++ b/aegisub/build/libaegisub/libaegisub.vcxproj
@ -4,7 +4,6 @@
    <ProjectGuid>{BB3FED86-DB7A-4DC7-964A-260FB86CDE61}</ProjectGuid>
    <RootNamespace>libaegisub</RootNamespace>
  </PropertyGroup>
-
  <!-- Aegisub project configuration -->
  <PropertyGroup Label="AegisubConfiguration">
    <AegisubProjectType>lib</AegisubProjectType>
@ -13,7 +12,6 @@
  <ImportGroup Label="PropertySheets">
    <Import Project="$(MSBuildThisFileDirectory)..\aegisub.props" />
  </ImportGroup>
-
  <!-- Project specific configuration -->
  <ItemDefinitionGroup>
    <ClCompile>
@ -33,7 +31,6 @@
      <ForcedIncludeFiles>lagi_pre.h</ForcedIncludeFiles>
    </ClCompile>
  </ItemDefinitionGroup>
-
  <!-- Source files -->
  <ItemGroup>
    <ClInclude Include="$(SrcDir)common\charset_6937.h" />
@ -60,6 +57,8 @@
    <ClInclude Include="$(SrcDir)include\libaegisub\hotkey.h" />
    <ClInclude Include="$(SrcDir)include\libaegisub\io.h" />
    <ClInclude Include="$(SrcDir)include\libaegisub\json.h" />
+    <ClInclude Include="$(SrcDir)include\libaegisub\kana_table.h" />
+    <ClInclude Include="$(SrcDir)include\libaegisub\karaoke_matcher.h" />
    <ClInclude Include="$(SrcDir)include\libaegisub\keyframe.h" />
    <ClInclude Include="$(SrcDir)include\libaegisub\line_iterator.h" />
    <ClInclude Include="$(SrcDir)include\libaegisub\line_wrap.h" />
@ -101,6 +100,8 @@
    <ClCompile Include="$(SrcDir)common\hotkey.cpp" />
    <ClCompile Include="$(SrcDir)common\io.cpp" />
    <ClCompile Include="$(SrcDir)common\json.cpp" />
+    <ClCompile Include="$(SrcDir)common\kana_table.cpp" />
+    <ClCompile Include="$(SrcDir)common\karaoke_matcher.cpp" />
    <ClCompile Include="$(SrcDir)common\keyframe.cpp" />
    <ClCompile Include="$(SrcDir)common\log.cpp" />
    <ClCompile Include="$(SrcDir)common\mru.cpp" />
--- a/aegisub/build/libaegisub/libaegisub.vcxproj.filters
+++ b/aegisub/build/libaegisub/libaegisub.vcxproj.filters
@ -155,6 +155,12 @@
    <ClInclude Include="$(SrcDir)include\libaegisub\ass\uuencode.h">
      <Filter>ASS</Filter>
    </ClInclude>
+    <ClInclude Include="$(SrcDir)include\libaegisub\karaoke_matcher.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="$(SrcDir)include\libaegisub\kana_table.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="$(SrcDir)windows\lagi_pre.cpp">
@ -256,6 +262,12 @@
    <ClCompile Include="$(SrcDir)ass\uuencode.cpp">
      <Filter>ASS</Filter>
    </ClCompile>
+    <ClCompile Include="$(SrcDir)common\kana_table.cpp">
+      <Filter>Source Files\Common</Filter>
+    </ClCompile>
+    <ClCompile Include="$(SrcDir)common\karaoke_matcher.cpp">
+      <Filter>Source Files\Common</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="$(SrcDir)include\libaegisub\charsets.def">
--- a/aegisub/libaegisub/Makefile
+++ b/aegisub/libaegisub/Makefile
@ -28,6 +28,8 @@ SRC += \
 	common/hotkey.cpp \
 	common/io.cpp \
 	common/json.cpp \
+	common/kana_table.cpp \
+	common/karaoke_matcher.cpp \
 	common/keyframe.cpp \
 	common/log.cpp \
 	common/mru.cpp \
--- a/aegisub/libaegisub/common/kana_table.cpp
+++ b/aegisub/libaegisub/common/kana_table.cpp
@ -0,0 +1,622 @@
+// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include "../config.h"
+
+#include "libaegisub/kana_table.h"
+
+#include <boost/range/algorithm.hpp>
+
+namespace {
+agi::kana_pair kana_to_romaji[] = {
+	{"\xE3\x81\x81", "a"},               // ぁ
+	{"\xE3\x81\x82", "a"},               // あ
+	{"\xE3\x81\x83", "i"},               // ぃ
+	{"\xE3\x81\x84", "i"},               // い
+	{"\xE3\x81\x85", "u"},               // ぅ
+	{"\xE3\x81\x86", "u"},               // う
+	{"\xE3\x81\x87", "e"},               // ぇ
+	{"\xE3\x81\x88", "e"},               // え
+	{"\xE3\x81\x89", "o"},               // ぉ
+	{"\xE3\x81\x8A", "o"},               // お
+	{"\xE3\x81\x8B", "ka"},              // か
+	{"\xE3\x81\x8C", "ga"},              // が
+	{"\xE3\x81\x8D", "ki"},              // き
+	{"\xE3\x81\x8D\xE3\x82\x83", "kya"}, // きゃ
+	{"\xE3\x81\x8D\xE3\x82\x85", "kyu"}, // きゅ
+	{"\xE3\x81\x8D\xE3\x82\x87", "kyo"}, // きょ
+	{"\xE3\x81\x8E", "gi"},              // ぎ
+	{"\xE3\x81\x8E\xE3\x82\x83", "gya"}, // ぎゃ
+	{"\xE3\x81\x8E\xE3\x82\x85", "gyu"}, // ぎゅ
+	{"\xE3\x81\x8E\xE3\x82\x87", "gyo"}, // ぎょ
+	{"\xE3\x81\x8F", "ku"},              // く
+	{"\xE3\x81\x90", "gu"},              // ぐ
+	{"\xE3\x81\x91", "ke"},              // け
+	{"\xE3\x81\x92", "ge"},              // げ
+	{"\xE3\x81\x93", "ko"},              // こ
+	{"\xE3\x81\x94", "go"},              // ご
+	{"\xE3\x81\x95", "sa"},              // さ
+	{"\xE3\x81\x96", "za"},              // ざ
+	{"\xE3\x81\x97", "shi"},             // し
+	{"\xE3\x81\x97\xE3\x82\x83", "sha"}, // しゃ
+	{"\xE3\x81\x97\xE3\x82\x85", "shu"}, // しゅ
+	{"\xE3\x81\x97\xE3\x82\x87", "sho"}, // しょ
+	{"\xE3\x81\x98", "ji"},              // じ
+	{"\xE3\x81\x98\xE3\x82\x83", "ja"},  // じゃ
+	{"\xE3\x81\x98\xE3\x82\x85", "ju"},  // じゅ
+	{"\xE3\x81\x98\xE3\x82\x87", "jo"},  // じょ
+	{"\xE3\x81\x99", "su"},              // す
+	{"\xE3\x81\x9A", "zu"},              // ず
+	{"\xE3\x81\x9B", "se"},              // せ
+	{"\xE3\x81\x9C", "ze"},              // ぜ
+	{"\xE3\x81\x9D", "so"},              // そ
+	{"\xE3\x81\x9E", "zo"},              // ぞ
+	{"\xE3\x81\x9F", "ta"},              // た
+	{"\xE3\x81\xA0", "da"},              // だ
+	{"\xE3\x81\xA1", "chi"},             // ち
+	{"\xE3\x81\xA1\xE3\x82\x83", "cha"}, // ちゃ
+	{"\xE3\x81\xA1\xE3\x82\x85", "chu"}, // ちゅ
+	{"\xE3\x81\xA1\xE3\x82\x87", "cho"}, // ちょ
+	{"\xE3\x81\xA2", "ji"},              // ぢ
+	{"\xE3\x81\xA2\xE3\x82\x83", "ja"},  // ぢゃ
+	{"\xE3\x81\xA2\xE3\x82\x85", "ju"},  // ぢゅ
+	{"\xE3\x81\xA2\xE3\x82\x87", "jo"},  // ぢょ
+	{"\xE3\x81\xA3", "c"},               // っ
+	{"\xE3\x81\xA3", "k"},               // っ
+	{"\xE3\x81\xA3", "p"},               // っ
+	{"\xE3\x81\xA3", "s"},               // っ
+	{"\xE3\x81\xA3", "t"},               // っ
+	{"\xE3\x81\xA4", "tsu"},             // つ
+	{"\xE3\x81\xA5", "zu"},              // づ
+	{"\xE3\x81\xA6", "te"},              // て
+	{"\xE3\x81\xA7", "de"},              // で
+	{"\xE3\x81\xA8", "to"},              // と
+	{"\xE3\x81\xA9", "do"},              // ど
+	{"\xE3\x81\xAA", "na"},              // な
+	{"\xE3\x81\xAB", "ni"},              // に
+	{"\xE3\x81\xAB\xE3\x82\x83", "nya"}, // にゃ
+	{"\xE3\x81\xAB\xE3\x82\x85", "nyu"}, // にゅ
+	{"\xE3\x81\xAB\xE3\x82\x87", "nyo"}, // にょ
+	{"\xE3\x81\xAC", "nu"},              // ぬ
+	{"\xE3\x81\xAD", "ne"},              // ね
+	{"\xE3\x81\xAE", "no"},              // の
+	{"\xE3\x81\xAF", "ha"},              // は
+	{"\xE3\x81\xAF", "wa"},              // は
+	{"\xE3\x81\xB0", "ba"},              // ば
+	{"\xE3\x81\xB1", "pa"},              // ぱ
+	{"\xE3\x81\xB2", "hi"},              // ひ
+	{"\xE3\x81\xB2\xE3\x82\x83", "hya"}, // ひゃ
+	{"\xE3\x81\xB2\xE3\x82\x85", "hyu"}, // ひゅ
+	{"\xE3\x81\xB2\xE3\x82\x87", "hyo"}, // ひょ
+	{"\xE3\x81\xB3", "bi"},              // び
+	{"\xE3\x81\xB3\xE3\x82\x83", "bya"}, // びゃ
+	{"\xE3\x81\xB3\xE3\x82\x85", "byu"}, // びゅ
+	{"\xE3\x81\xB3\xE3\x82\x87", "byo"}, // びょ
+	{"\xE3\x81\xB4", "pi"},              // ぴ
+	{"\xE3\x81\xB4\xE3\x82\x83", "pya"}, // ぴゃ
+	{"\xE3\x81\xB4\xE3\x82\x85", "pyu"}, // ぴゅ
+	{"\xE3\x81\xB4\xE3\x82\x87", "pyo"}, // ぴょ
+	{"\xE3\x81\xB5", "fu"},              // ふ
+	{"\xE3\x81\xB6", "bu"},              // ぶ
+	{"\xE3\x81\xB7", "pu"},              // ぷ
+	{"\xE3\x81\xB8", "he"},              // へ
+	{"\xE3\x81\xB8", "e"},               // へ
+	{"\xE3\x81\xB9", "be"},              // べ
+	{"\xE3\x81\xBA", "pe"},              // ぺ
+	{"\xE3\x81\xBB", "ho"},              // ほ
+	{"\xE3\x81\xBC", "bo"},              // ぼ
+	{"\xE3\x81\xBD", "po"},              // ぽ
+	{"\xE3\x81\xBE", "ma"},              // ま
+	{"\xE3\x81\xBF", "mi"},              // み
+	{"\xE3\x81\xBF\xE3\x82\x83", "mya"}, // みゃ
+	{"\xE3\x81\xBF\xE3\x82\x85", "myu"}, // みゅ
+	{"\xE3\x81\xBF\xE3\x82\x87", "myo"}, // みょ
+	{"\xE3\x82\x80", "mu"},              // む
+	{"\xE3\x82\x81", "me"},              // め
+	{"\xE3\x82\x82", "mo"},              // も
+	{"\xE3\x82\x84", "ya"},              // や
+	{"\xE3\x82\x86", "yu"},              // ゆ
+	{"\xE3\x82\x88", "yo"},              // よ
+	{"\xE3\x82\x89", "ra"},              // ら
+	{"\xE3\x82\x8A", "ri"},              // り
+	{"\xE3\x82\x8A\xE3\x82\x83", "rya"}, // りゃ
+	{"\xE3\x82\x8A\xE3\x82\x85", "ryu"}, // りゅ
+	{"\xE3\x82\x8A\xE3\x82\x87", "ryo"}, // りょ
+	{"\xE3\x82\x8B", "ru"},              // る
+	{"\xE3\x82\x8C", "re"},              // れ
+	{"\xE3\x82\x8D", "ro"},              // ろ
+	{"\xE3\x82\x8F", "wa"},              // わ
+	{"\xE3\x82\x90", "wi"},              // ゐ
+	{"\xE3\x82\x91", "we"},              // ゑ
+	{"\xE3\x82\x92", "wo"},              // を
+	{"\xE3\x82\x93", "m"},               // ん
+	{"\xE3\x82\x93", "n"},               // ん
+	{"\xE3\x82\xA1", "a"},               // ァ
+	{"\xE3\x82\xA2", "a"},               // ア
+	{"\xE3\x82\xA3", "i"},               // ィ
+	{"\xE3\x82\xA4", "i"},               // イ
+	{"\xE3\x82\xA4\xE3\x82\xA7", "ye"},  // イェ
+	{"\xE3\x82\xA5", "u"},               // ゥ
+	{"\xE3\x82\xA6", "u"},               // ウ
+	{"\xE3\x82\xA6\xE3\x82\xA3", "wi"},  // ウィ
+	{"\xE3\x82\xA6\xE3\x82\xA7", "we"},  // ウェ
+	{"\xE3\x82\xA6\xE3\x82\xA9", "wo"},  // ウォ
+	{"\xE3\x82\xA7", "e"},               // ェ
+	{"\xE3\x82\xA8", "e"},               // エ
+	{"\xE3\x82\xA9", "o"},               // ォ
+	{"\xE3\x82\xAA", "o"},               // オ
+	{"\xE3\x82\xAB", "ka"},              // カ
+	{"\xE3\x82\xAC", "ga"},              // ガ
+	{"\xE3\x82\xAD", "ki"},              // キ
+	{"\xE3\x82\xAD\xE3\x83\xA3", "kya"}, // キャ
+	{"\xE3\x82\xAD\xE3\x83\xA5", "kyu"}, // キュ
+	{"\xE3\x82\xAD\xE3\x83\xA7", "kyo"}, // キョ
+	{"\xE3\x82\xAE", "gi"},              // ギ
+	{"\xE3\x82\xAE\xE3\x83\xA3", "gya"}, // ギャ
+	{"\xE3\x82\xAE\xE3\x83\xA5", "gyu"}, // ギュ
+	{"\xE3\x82\xAE\xE3\x83\xA7", "gyo"}, // ギョ
+	{"\xE3\x82\xAF", "ku"},              // ク
+	{"\xE3\x82\xB0", "gu"},              // グ
+	{"\xE3\x82\xB1", "ke"},              // ケ
+	{"\xE3\x82\xB2", "ge"},              // ゲ
+	{"\xE3\x82\xB3", "ko"},              // コ
+	{"\xE3\x82\xB4", "go"},              // ゴ
+	{"\xE3\x82\xB5", "sa"},              // サ
+	{"\xE3\x82\xB6", "za"},              // ザ
+	{"\xE3\x82\xB7", "shi"},             // シ
+	{"\xE3\x82\xB7\xE3\x82\xA7", "she"}, // シェ
+	{"\xE3\x82\xB7\xE3\x83\xA3", "sha"}, // シャ
+	{"\xE3\x82\xB7\xE3\x83\xA5", "shu"}, // シュ
+	{"\xE3\x82\xB7\xE3\x83\xA7", "sho"}, // ショ
+	{"\xE3\x82\xB8", "ji"},              // ジ
+	{"\xE3\x82\xB8\xE3\x82\xA7", "je"},  // ジェ
+	{"\xE3\x82\xB8\xE3\x83\xA3", "ja"},  // ジャ
+	{"\xE3\x82\xB8\xE3\x83\xA5", "ju"},  // ジュ
+	{"\xE3\x82\xB8\xE3\x83\xA7", "jo"},  // ジョ
+	{"\xE3\x82\xB9", "su"},              // ス
+	{"\xE3\x82\xBA", "zu"},              // ズ
+	{"\xE3\x82\xBB", "se"},              // セ
+	{"\xE3\x82\xBC", "ze"},              // ゼ
+	{"\xE3\x82\xBD", "so"},              // ソ
+	{"\xE3\x82\xBE", "zo"},              // ゾ
+	{"\xE3\x82\xBF", "ta"},              // タ
+	{"\xE3\x83\x80", "da"},              // ダ
+	{"\xE3\x83\x81", "chi"},             // チ
+	{"\xE3\x83\x81\xE3\x82\xA7", "che"}, // チェ
+	{"\xE3\x83\x81\xE3\x83\xA3", "cha"}, // チャ
+	{"\xE3\x83\x81\xE3\x83\xA5", "chu"}, // チュ
+	{"\xE3\x83\x81\xE3\x83\xA7", "cho"}, // チョ
+	{"\xE3\x83\x82", "ji"},              // ヂ
+	{"\xE3\x83\x82\xE3\x83\xA3", "ja"},  // ヂャ
+	{"\xE3\x83\x82\xE3\x83\xA5", "ju"},  // ヂュ
+	{"\xE3\x83\x82\xE3\x83\xA7", "jo"},  // ヂョ
+	{"\xE3\x83\x83", "c"},               // ッ
+	{"\xE3\x83\x83", "k"},               // ッ
+	{"\xE3\x83\x83", "p"},               // ッ
+	{"\xE3\x83\x83", "s"},               // ッ
+	{"\xE3\x83\x83", "t"},               // ッ
+	{"\xE3\x83\x84", "tsu"},             // ツ
+	{"\xE3\x83\x84\xE3\x82\xA1", "tsa"}, // ツァ
+	{"\xE3\x83\x84\xE3\x82\xA3", "tsi"}, // ツィ
+	{"\xE3\x83\x84\xE3\x82\xA7", "tse"}, // ツェ
+	{"\xE3\x83\x84\xE3\x82\xA9", "tso"}, // ツォ
+	{"\xE3\x83\x85", "zu"},              // ヅ
+	{"\xE3\x83\x86", "te"},              // テ
+	{"\xE3\x83\x86\xE3\x82\xA3", "ti"},  // ティ
+	{"\xE3\x83\x86\xE3\x82\xA5", "tu"},  // テゥ
+	{"\xE3\x83\x86\xE3\x83\xA5", "tyu"}, // テュ
+	{"\xE3\x83\x87", "de"},              // デ
+	{"\xE3\x83\x87\xE3\x82\xA3", "di"},  // ディ
+	{"\xE3\x83\x87\xE3\x82\xA5", "du"},  // デゥ
+	{"\xE3\x83\x87\xE3\x82\xA5", "dyu"}, // デゥ
+	{"\xE3\x83\x88", "to"},              // ト
+	{"\xE3\x83\x89", "do"},              // ド
+	{"\xE3\x83\x8A", "na"},              // ナ
+	{"\xE3\x83\x8B", "ni"},              // ニ
+	{"\xE3\x83\x8B\xE3\x83\xA3", "nya"}, // ニャ
+	{"\xE3\x83\x8B\xE3\x83\xA5", "nyu"}, // ニュ
+	{"\xE3\x83\x8B\xE3\x83\xA7", "nyo"}, // ニョ
+	{"\xE3\x83\x8C", "nu"},              // ヌ
+	{"\xE3\x83\x8D", "ne"},              // ネ
+	{"\xE3\x83\x8E", "no"},              // ノ
+	{"\xE3\x83\x8F", "ha"},              // ハ
+	{"\xE3\x83\x90", "ba"},              // バ
+	{"\xE3\x83\x91", "pa"},              // パ
+	{"\xE3\x83\x92", "hi"},              // ヒ
+	{"\xE3\x83\x92\xE3\x83\xA3", "hya"}, // ヒャ
+	{"\xE3\x83\x92\xE3\x83\xA5", "hyu"}, // ヒュ
+	{"\xE3\x83\x92\xE3\x83\xA7", "hyo"}, // ヒョ
+	{"\xE3\x83\x93", "bi"},              // ビ
+	{"\xE3\x83\x93\xE3\x83\xA3", "bya"}, // ビャ
+	{"\xE3\x83\x93\xE3\x83\xA5", "byu"}, // ビュ
+	{"\xE3\x83\x93\xE3\x83\xA7", "byo"}, // ビョ
+	{"\xE3\x83\x94", "pi"},              // ピ
+	{"\xE3\x83\x94\xE3\x83\xA3", "pya"}, // ピャ
+	{"\xE3\x83\x94\xE3\x83\xA5", "pyu"}, // ピュ
+	{"\xE3\x83\x94\xE3\x83\xA7", "pyo"}, // ピョ
+	{"\xE3\x83\x95", "fu"},              // フ
+	{"\xE3\x83\x95\xE3\x82\xA1", "fa"},  // ファ
+	{"\xE3\x83\x95\xE3\x82\xA3", "fi"},  // フィ
+	{"\xE3\x83\x95\xE3\x82\xA7", "fe"},  // フェ
+	{"\xE3\x83\x95\xE3\x82\xA9", "fo"},  // フォ
+	{"\xE3\x83\x95\xE3\x83\xA5", "fyu"}, // フュ
+	{"\xE3\x83\x96", "bu"},              // ブ
+	{"\xE3\x83\x97", "pu"},              // プ
+	{"\xE3\x83\x98", "he"},              // ヘ
+	{"\xE3\x83\x99", "be"},              // ベ
+	{"\xE3\x83\x9A", "pe"},              // ペ
+	{"\xE3\x83\x9B", "ho"},              // ホ
+	{"\xE3\x83\x9C", "bo"},              // ボ
+	{"\xE3\x83\x9D", "po"},              // ポ
+	{"\xE3\x83\x9E", "ma"},              // マ
+	{"\xE3\x83\x9F", "mi"},              // ミ
+	{"\xE3\x83\x9F\xE3\x83\xA3", "mya"}, // ミャ
+	{"\xE3\x83\x9F\xE3\x83\xA5", "myu"}, // ミュ
+	{"\xE3\x83\x9F\xE3\x83\xA7", "myo"}, // ミョ
+	{"\xE3\x83\xA0", "mu"},              // ム
+	{"\xE3\x83\xA1", "me"},              // メ
+	{"\xE3\x83\xA2", "mo"},              // モ
+	{"\xE3\x83\xA4", "ya"},              // ヤ
+	{"\xE3\x83\xA6", "yu"},              // ユ
+	{"\xE3\x83\xA8", "yo"},              // ヨ
+	{"\xE3\x83\xA9", "ra"},              // ラ
+	{"\xE3\x83\xAA", "ri"},              // リ
+	{"\xE3\x83\xAA\xE3\x83\xA3", "rya"}, // リャ
+	{"\xE3\x83\xAA\xE3\x83\xA5", "ryu"}, // リュ
+	{"\xE3\x83\xAA\xE3\x83\xA7", "ryo"}, // リョ
+	{"\xE3\x83\xAB", "ru"},              // ル
+	{"\xE3\x83\xAC", "re"},              // レ
+	{"\xE3\x83\xAD", "ro"},              // ロ
+	{"\xE3\x83\xAF", "wa"},              // ワ
+	{"\xE3\x83\xB0", "wi"},              // ヰ
+	{"\xE3\x83\xB1", "we"},              // ヱ
+	{"\xE3\x83\xB2", "wo"},              // ヲ
+	{"\xE3\x83\xB3", "m"},               // ン
+	{"\xE3\x83\xB3", "n"},               // ン
+	{"\xE3\x83\xB4", "vu"},              // ヴ
+	{"\xE3\x83\xB4\xE3\x82\xA1", "va"},  // ヴァ
+	{"\xE3\x83\xB4\xE3\x82\xA3", "vi"},  // ヴィ
+	{"\xE3\x83\xB4\xE3\x82\xA7", "ve"},  // ヴェ
+	{"\xE3\x83\xB4\xE3\x82\xA9", "vo"},  // ヴォ
+	{"\xE3\x83\xB4\xE3\x83\xA3", "vya"}, // ヴャ
+	{"\xE3\x83\xB4\xE3\x83\xA5", "vyu"}, // ヴュ
+	{"\xE3\x83\xB4\xE3\x83\xA7", "vyo"}, // ヴョ
+	{"\xE3\x83\xBC", "a"},               // ー
+	{"\xE3\x83\xBC", "e"},               // ー
+	{"\xE3\x83\xBC", "i"},               // ー
+	{"\xE3\x83\xBC", "o"},               // ー
+	{"\xE3\x83\xBC", "u"},               // ー
+};
+
+agi::kana_pair romaji_to_kana[] = {
+	{"\xE3\x81\x81", "a"},               // ぁ
+	{"\xE3\x81\x82", "a"},               // あ
+	{"\xE3\x82\xA1", "a"},               // ァ
+	{"\xE3\x82\xA2", "a"},               // ア
+	{"\xE3\x83\xBC", "a"},               // ー
+	{"\xE3\x81\xB0", "ba"},              // ば
+	{"\xE3\x83\x90", "ba"},              // バ
+	{"\xE3\x81\xB9", "be"},              // べ
+	{"\xE3\x83\x99", "be"},              // ベ
+	{"\xE3\x81\xB3", "bi"},              // び
+	{"\xE3\x83\x93", "bi"},              // ビ
+	{"\xE3\x81\xBC", "bo"},              // ぼ
+	{"\xE3\x83\x9C", "bo"},              // ボ
+	{"\xE3\x81\xB6", "bu"},              // ぶ
+	{"\xE3\x83\x96", "bu"},              // ブ
+	{"\xE3\x81\xB3\xE3\x82\x83", "bya"}, // びゃ
+	{"\xE3\x83\x93\xE3\x83\xA3", "bya"}, // ビャ
+	{"\xE3\x81\xB3\xE3\x82\x87", "byo"}, // びょ
+	{"\xE3\x83\x93\xE3\x83\xA7", "byo"}, // ビョ
+	{"\xE3\x81\xB3\xE3\x82\x85", "byu"}, // びゅ
+	{"\xE3\x83\x93\xE3\x83\xA5", "byu"}, // ビュ
+	{"\xE3\x81\xA3", "c"},               // っ
+	{"\xE3\x83\x83", "c"},               // ッ
+	{"\xE3\x81\xA1\xE3\x82\x83", "cha"}, // ちゃ
+	{"\xE3\x83\x81\xE3\x83\xA3", "cha"}, // チャ
+	{"\xE3\x83\x81\xE3\x82\xA7", "che"}, // チェ
+	{"\xE3\x81\xA1", "chi"},             // ち
+	{"\xE3\x83\x81", "chi"},             // チ
+	{"\xE3\x81\xA1\xE3\x82\x87", "cho"}, // ちょ
+	{"\xE3\x83\x81\xE3\x83\xA7", "cho"}, // チョ
+	{"\xE3\x81\xA1\xE3\x82\x85", "chu"}, // ちゅ
+	{"\xE3\x83\x81\xE3\x83\xA5", "chu"}, // チュ
+	{"\xE3\x81\xA0", "da"},              // だ
+	{"\xE3\x83\x80", "da"},              // ダ
+	{"\xE3\x81\xA7", "de"},              // で
+	{"\xE3\x83\x87", "de"},              // デ
+	{"\xE3\x83\x87\xE3\x82\xA3", "di"},  // ディ
+	{"\xE3\x81\xA9", "do"},              // ど
+	{"\xE3\x83\x89", "do"},              // ド
+	{"\xE3\x83\x87\xE3\x82\xA5", "du"},  // デゥ
+	{"\xE3\x83\x87\xE3\x82\xA5", "dyu"}, // デゥ
+	{"\xE3\x81\x87", "e"},               // ぇ
+	{"\xE3\x81\x88", "e"},               // え
+	{"\xE3\x82\xA7", "e"},               // ェ
+	{"\xE3\x82\xA8", "e"},               // エ
+	{"\xE3\x83\xBC", "e"},               // ー
+	{"\xE3\x83\x95\xE3\x82\xA1", "fa"},  // ファ
+	{"\xE3\x83\x95\xE3\x82\xA7", "fe"},  // フェ
+	{"\xE3\x83\x95\xE3\x82\xA3", "fi"},  // フィ
+	{"\xE3\x83\x95\xE3\x82\xA9", "fo"},  // フォ
+	{"\xE3\x81\xB5", "fu"},              // ふ
+	{"\xE3\x83\x95", "fu"},              // フ
+	{"\xE3\x83\x95\xE3\x83\xA5", "fyu"}, // フュ
+	{"\xE3\x81\x8C", "ga"},              // が
+	{"\xE3\x82\xAC", "ga"},              // ガ
+	{"\xE3\x81\x92", "ge"},              // げ
+	{"\xE3\x82\xB2", "ge"},              // ゲ
+	{"\xE3\x81\x8E", "gi"},              // ぎ
+	{"\xE3\x82\xAE", "gi"},              // ギ
+	{"\xE3\x81\x94", "go"},              // ご
+	{"\xE3\x82\xB4", "go"},              // ゴ
+	{"\xE3\x81\x90", "gu"},              // ぐ
+	{"\xE3\x82\xB0", "gu"},              // グ
+	{"\xE3\x81\x8E\xE3\x82\x83", "gya"}, // ぎゃ
+	{"\xE3\x82\xAE\xE3\x83\xA3", "gya"}, // ギャ
+	{"\xE3\x81\x8E\xE3\x82\x87", "gyo"}, // ぎょ
+	{"\xE3\x82\xAE\xE3\x83\xA7", "gyo"}, // ギョ
+	{"\xE3\x81\x8E\xE3\x82\x85", "gyu"}, // ぎゅ
+	{"\xE3\x82\xAE\xE3\x83\xA5", "gyu"}, // ギュ
+	{"\xE3\x81\xAF", "ha"},              // は
+	{"\xE3\x83\x8F", "ha"},              // ハ
+	{"\xE3\x81\xB8", "he"},              // へ
+	{"\xE3\x83\x98", "he"},              // ヘ
+	{"\xE3\x81\xB2", "hi"},              // ひ
+	{"\xE3\x83\x92", "hi"},              // ヒ
+	{"\xE3\x81\xBB", "ho"},              // ほ
+	{"\xE3\x83\x9B", "ho"},              // ホ
+	{"\xE3\x81\xB2\xE3\x82\x83", "hya"}, // ひゃ
+	{"\xE3\x83\x92\xE3\x83\xA3", "hya"}, // ヒャ
+	{"\xE3\x81\xB2\xE3\x82\x87", "hyo"}, // ひょ
+	{"\xE3\x83\x92\xE3\x83\xA7", "hyo"}, // ヒョ
+	{"\xE3\x81\xB2\xE3\x82\x85", "hyu"}, // ひゅ
+	{"\xE3\x83\x92\xE3\x83\xA5", "hyu"}, // ヒュ
+	{"\xE3\x81\x83", "i"},               // ぃ
+	{"\xE3\x81\x84", "i"},               // い
+	{"\xE3\x82\xA3", "i"},               // ィ
+	{"\xE3\x82\xA4", "i"},               // イ
+	{"\xE3\x83\xBC", "i"},               // ー
+	{"\xE3\x81\x98\xE3\x82\x83", "ja"},  // じゃ
+	{"\xE3\x81\xA2\xE3\x82\x83", "ja"},  // ぢゃ
+	{"\xE3\x82\xB8\xE3\x83\xA3", "ja"},  // ジャ
+	{"\xE3\x83\x82\xE3\x83\xA3", "ja"},  // ヂャ
+	{"\xE3\x82\xB8\xE3\x82\xA7", "je"},  // ジェ
+	{"\xE3\x81\x98", "ji"},              // じ
+	{"\xE3\x81\xA2", "ji"},              // ぢ
+	{"\xE3\x82\xB8", "ji"},              // ジ
+	{"\xE3\x83\x82", "ji"},              // ヂ
+	{"\xE3\x81\x98\xE3\x82\x87", "jo"},  // じょ
+	{"\xE3\x81\xA2\xE3\x82\x87", "jo"},  // ぢょ
+	{"\xE3\x82\xB8\xE3\x83\xA7", "jo"},  // ジョ
+	{"\xE3\x83\x82\xE3\x83\xA7", "jo"},  // ヂョ
+	{"\xE3\x81\x98\xE3\x82\x85", "ju"},  // じゅ
+	{"\xE3\x81\xA2\xE3\x82\x85", "ju"},  // ぢゅ
+	{"\xE3\x82\xB8\xE3\x83\xA5", "ju"},  // ジュ
+	{"\xE3\x83\x82\xE3\x83\xA5", "ju"},  // ヂュ
+	{"\xE3\x81\xA3", "k"},               // っ
+	{"\xE3\x83\x83", "k"},               // ッ
+	{"\xE3\x81\x8B", "ka"},              // か
+	{"\xE3\x82\xAB", "ka"},              // カ
+	{"\xE3\x81\x91", "ke"},              // け
+	{"\xE3\x82\xB1", "ke"},              // ケ
+	{"\xE3\x81\x8D", "ki"},              // き
+	{"\xE3\x82\xAD", "ki"},              // キ
+	{"\xE3\x81\x93", "ko"},              // こ
+	{"\xE3\x82\xB3", "ko"},              // コ
+	{"\xE3\x81\x8F", "ku"},              // く
+	{"\xE3\x82\xAF", "ku"},              // ク
+	{"\xE3\x81\x8D\xE3\x82\x83", "kya"}, // きゃ
+	{"\xE3\x82\xAD\xE3\x83\xA3", "kya"}, // キャ
+	{"\xE3\x81\x8D\xE3\x82\x87", "kyo"}, // きょ
+	{"\xE3\x82\xAD\xE3\x83\xA7", "kyo"}, // キョ
+	{"\xE3\x81\x8D\xE3\x82\x85", "kyu"}, // きゅ
+	{"\xE3\x82\xAD\xE3\x83\xA5", "kyu"}, // キュ
+	{"\xE3\x82\x93", "m"},               // ん
+	{"\xE3\x83\xB3", "m"},               // ン
+	{"\xE3\x81\xBE", "ma"},              // ま
+	{"\xE3\x83\x9E", "ma"},              // マ
+	{"\xE3\x82\x81", "me"},              // め
+	{"\xE3\x83\xA1", "me"},              // メ
+	{"\xE3\x81\xBF", "mi"},              // み
+	{"\xE3\x83\x9F", "mi"},              // ミ
+	{"\xE3\x82\x82", "mo"},              // も
+	{"\xE3\x83\xA2", "mo"},              // モ
+	{"\xE3\x82\x80", "mu"},              // む
+	{"\xE3\x83\xA0", "mu"},              // ム
+	{"\xE3\x81\xBF\xE3\x82\x83", "mya"}, // みゃ
+	{"\xE3\x83\x9F\xE3\x83\xA3", "mya"}, // ミャ
+	{"\xE3\x81\xBF\xE3\x82\x87", "myo"}, // みょ
+	{"\xE3\x83\x9F\xE3\x83\xA7", "myo"}, // ミョ
+	{"\xE3\x81\xBF\xE3\x82\x85", "myu"}, // みゅ
+	{"\xE3\x83\x9F\xE3\x83\xA5", "myu"}, // ミュ
+	{"\xE3\x82\x93", "n"},               // ん
+	{"\xE3\x83\xB3", "n"},               // ン
+	{"\xE3\x81\xAA", "na"},              // な
+	{"\xE3\x83\x8A", "na"},              // ナ
+	{"\xE3\x81\xAD", "ne"},              // ね
+	{"\xE3\x83\x8D", "ne"},              // ネ
+	{"\xE3\x81\xAB", "ni"},              // に
+	{"\xE3\x83\x8B", "ni"},              // ニ
+	{"\xE3\x81\xAE", "no"},              // の
+	{"\xE3\x83\x8E", "no"},              // ノ
+	{"\xE3\x81\xAC", "nu"},              // ぬ
+	{"\xE3\x83\x8C", "nu"},              // ヌ
+	{"\xE3\x81\xAB\xE3\x82\x83", "nya"}, // にゃ
+	{"\xE3\x83\x8B\xE3\x83\xA3", "nya"}, // ニャ
+	{"\xE3\x81\xAB\xE3\x82\x87", "nyo"}, // にょ
+	{"\xE3\x83\x8B\xE3\x83\xA7", "nyo"}, // ニョ
+	{"\xE3\x81\xAB\xE3\x82\x85", "nyu"}, // にゅ
+	{"\xE3\x83\x8B\xE3\x83\xA5", "nyu"}, // ニュ
+	{"\xE3\x81\x89", "o"},               // ぉ
+	{"\xE3\x81\x8A", "o"},               // お
+	{"\xE3\x82\xA9", "o"},               // ォ
+	{"\xE3\x82\xAA", "o"},               // オ
+	{"\xE3\x83\xBC", "o"},               // ー
+	{"\xE3\x81\xA3", "p"},               // っ
+	{"\xE3\x83\x83", "p"},               // ッ
+	{"\xE3\x81\xB1", "pa"},              // ぱ
+	{"\xE3\x83\x91", "pa"},              // パ
+	{"\xE3\x81\xBA", "pe"},              // ぺ
+	{"\xE3\x83\x9A", "pe"},              // ペ
+	{"\xE3\x81\xB4", "pi"},              // ぴ
+	{"\xE3\x83\x94", "pi"},              // ピ
+	{"\xE3\x81\xBD", "po"},              // ぽ
+	{"\xE3\x83\x9D", "po"},              // ポ
+	{"\xE3\x81\xB7", "pu"},              // ぷ
+	{"\xE3\x83\x97", "pu"},              // プ
+	{"\xE3\x81\xB4\xE3\x82\x83", "pya"}, // ぴゃ
+	{"\xE3\x83\x94\xE3\x83\xA3", "pya"}, // ピャ
+	{"\xE3\x81\xB4\xE3\x82\x87", "pyo"}, // ぴょ
+	{"\xE3\x83\x94\xE3\x83\xA7", "pyo"}, // ピョ
+	{"\xE3\x81\xB4\xE3\x82\x85", "pyu"}, // ぴゅ
+	{"\xE3\x83\x94\xE3\x83\xA5", "pyu"}, // ピュ
+	{"\xE3\x82\x89", "ra"},              // ら
+	{"\xE3\x83\xA9", "ra"},              // ラ
+	{"\xE3\x82\x8C", "re"},              // れ
+	{"\xE3\x83\xAC", "re"},              // レ
+	{"\xE3\x82\x8A", "ri"},              // り
+	{"\xE3\x83\xAA", "ri"},              // リ
+	{"\xE3\x82\x8D", "ro"},              // ろ
+	{"\xE3\x83\xAD", "ro"},              // ロ
+	{"\xE3\x82\x8B", "ru"},              // る
+	{"\xE3\x83\xAB", "ru"},              // ル
+	{"\xE3\x82\x8A\xE3\x82\x83", "rya"}, // りゃ
+	{"\xE3\x83\xAA\xE3\x83\xA3", "rya"}, // リャ
+	{"\xE3\x82\x8A\xE3\x82\x87", "ryo"}, // りょ
+	{"\xE3\x83\xAA\xE3\x83\xA7", "ryo"}, // リョ
+	{"\xE3\x82\x8A\xE3\x82\x85", "ryu"}, // りゅ
+	{"\xE3\x83\xAA\xE3\x83\xA5", "ryu"}, // リュ
+	{"\xE3\x81\xA3", "s"},               // っ
+	{"\xE3\x83\x83", "s"},               // ッ
+	{"\xE3\x81\x95", "sa"},              // さ
+	{"\xE3\x82\xB5", "sa"},              // サ
+	{"\xE3\x81\x9B", "se"},              // せ
+	{"\xE3\x82\xBB", "se"},              // セ
+	{"\xE3\x81\x97\xE3\x82\x83", "sha"}, // しゃ
+	{"\xE3\x82\xB7\xE3\x83\xA3", "sha"}, // シャ
+	{"\xE3\x82\xB7\xE3\x82\xA7", "she"}, // シェ
+	{"\xE3\x81\x97", "shi"},             // し
+	{"\xE3\x82\xB7", "shi"},             // シ
+	{"\xE3\x81\x97\xE3\x82\x87", "sho"}, // しょ
+	{"\xE3\x82\xB7\xE3\x83\xA7", "sho"}, // ショ
+	{"\xE3\x81\x97\xE3\x82\x85", "shu"}, // しゅ
+	{"\xE3\x82\xB7\xE3\x83\xA5", "shu"}, // シュ
+	{"\xE3\x81\x9D", "so"},              // そ
+	{"\xE3\x82\xBD", "so"},              // ソ
+	{"\xE3\x81\x99", "su"},              // す
+	{"\xE3\x82\xB9", "su"},              // ス
+	{"\xE3\x81\xA3", "t"},               // っ
+	{"\xE3\x83\x83", "t"},               // ッ
+	{"\xE3\x81\x9F", "ta"},              // た
+	{"\xE3\x82\xBF", "ta"},              // タ
+	{"\xE3\x81\xA6", "te"},              // て
+	{"\xE3\x83\x86", "te"},              // テ
+	{"\xE3\x83\x86\xE3\x82\xA3", "ti"},  // ティ
+	{"\xE3\x81\xA8", "to"},              // と
+	{"\xE3\x83\x88", "to"},              // ト
+	{"\xE3\x83\x84\xE3\x82\xA1", "tsa"}, // ツァ
+	{"\xE3\x83\x84\xE3\x82\xA7", "tse"}, // ツェ
+	{"\xE3\x83\x84\xE3\x82\xA3", "tsi"}, // ツィ
+	{"\xE3\x83\x84\xE3\x82\xA9", "tso"}, // ツォ
+	{"\xE3\x81\xA4", "tsu"},             // つ
+	{"\xE3\x83\x84", "tsu"},             // ツ
+	{"\xE3\x83\x86\xE3\x82\xA5", "tu"},  // テゥ
+	{"\xE3\x83\x86\xE3\x83\xA5", "tyu"}, // テュ
+	{"\xE3\x81\x85", "u"},               // ぅ
+	{"\xE3\x81\x86", "u"},               // う
+	{"\xE3\x82\xA5", "u"},               // ゥ
+	{"\xE3\x82\xA6", "u"},               // ウ
+	{"\xE3\x83\xBC", "u"},               // ー
+	{"\xE3\x83\xB4\xE3\x82\xA1", "va"},  // ヴァ
+	{"\xE3\x83\xB4\xE3\x82\xA7", "ve"},  // ヴェ
+	{"\xE3\x83\xB4\xE3\x82\xA3", "vi"},  // ヴィ
+	{"\xE3\x83\xB4\xE3\x82\xA9", "vo"},  // ヴォ
+	{"\xE3\x83\xB4", "vu"},              // ヴ
+	{"\xE3\x83\xB4\xE3\x83\xA3", "vya"}, // ヴャ
+	{"\xE3\x83\xB4\xE3\x83\xA7", "vyo"}, // ヴョ
+	{"\xE3\x83\xB4\xE3\x83\xA5", "vyu"}, // ヴュ
+	{"\xE3\x81\xAF", "wa"},              // は
+	{"\xE3\x82\x8F", "wa"},              // わ
+	{"\xE3\x83\xAF", "wa"},              // ワ
+	{"\xE3\x82\x91", "we"},              // ゑ
+	{"\xE3\x82\xA6\xE3\x82\xA7", "we"},  // ウェ
+	{"\xE3\x83\xB1", "we"},              // ヱ
+	{"\xE3\x82\x90", "wi"},              // ゐ
+	{"\xE3\x82\xA6\xE3\x82\xA3", "wi"},  // ウィ
+	{"\xE3\x83\xB0", "wi"},              // ヰ
+	{"\xE3\x82\x92", "wo"},              // を
+	{"\xE3\x82\xA6\xE3\x82\xA9", "wo"},  // ウォ
+	{"\xE3\x83\xB2", "wo"},              // ヲ
+	{"\xE3\x82\x84", "ya"},              // や
+	{"\xE3\x83\xA4", "ya"},              // ヤ
+	{"\xE3\x82\xA4\xE3\x82\xA7", "ye"},  // イェ
+	{"\xE3\x82\x88", "yo"},              // よ
+	{"\xE3\x83\xA8", "yo"},              // ヨ
+	{"\xE3\x82\x86", "yu"},              // ゆ
+	{"\xE3\x83\xA6", "yu"},              // ユ
+	{"\xE3\x81\x96", "za"},              // ざ
+	{"\xE3\x82\xB6", "za"},              // ザ
+	{"\xE3\x81\x9C", "ze"},              // ぜ
+	{"\xE3\x82\xBC", "ze"},              // ゼ
+	{"\xE3\x81\x9E", "zo"},              // ぞ
+	{"\xE3\x82\xBE", "zo"},              // ゾ
+	{"\xE3\x81\x9A", "zu"},              // ず
+	{"\xE3\x81\xA5", "zu"},              // づ
+	{"\xE3\x82\xBA", "zu"},              // ズ
+	{"\xE3\x83\x85", "zu"},              // ヅ
+};
+
+bool cmp_kana(agi::kana_pair const& kp, std::string const& kana) {
+	return strcmp(kp.kana, kana.c_str()) < 0;
+}
+
+struct cmp_romaji {
+	bool operator()(agi::kana_pair const& kp, std::string const& romaji) const {
+		return strcmp(kp.romaji, romaji.c_str()) < 0;
+	}
+	bool operator()(std::string const& romaji, agi::kana_pair const& kp) const {
+		return strcmp(kp.romaji, romaji.c_str()) > 0;
+	}
+
+#ifdef _MSC_VER // debug iterator stuff needs this overload
+	bool operator()(agi::kana_pair const& a, agi::kana_pair const& b) const {
+		return strcmp(a.romaji, b.romaji) < 0;
+	}
+#endif
+};
+
+}
+
+namespace agi {
+std::vector<const char *> kana_to_romaji(std::string const& kana) {
+	std::vector<const char *> ret;
+	for (auto pair = boost::lower_bound(::kana_to_romaji, kana, cmp_kana);
+		pair != std::end(::kana_to_romaji) && !strcmp(pair->kana, kana.c_str());
+		++pair)
+		ret.push_back(pair->romaji);
+	return ret;
+}
+
+boost::iterator_range<const kana_pair *> romaji_to_kana(std::string const& romaji) {
+	for (size_t len = std::min<size_t>(3, romaji.size()); len > 0; --len) {
+		auto pair = boost::equal_range(::romaji_to_kana, romaji.substr(0, len).c_str(), cmp_romaji());
+		if (pair.first != pair.second)
+			return boost::make_iterator_range(pair.first, pair.second);
+	}
+	return boost::make_iterator_range(::romaji_to_kana, ::romaji_to_kana);
+}
+}
--- a/aegisub/libaegisub/common/karaoke_matcher.cpp
+++ b/aegisub/libaegisub/common/karaoke_matcher.cpp
@ -0,0 +1,209 @@
+// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include "../config.h"
+
+#include "libaegisub/karaoke_matcher.h"
+
+#include "libaegisub/kana_table.h"
+#include "libaegisub/util.h"
+
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/locale/boundary.hpp>
+#include <boost/locale/collator.hpp>
+#include <boost/range/algorithm/copy.hpp>
+#include <unicode/uchar.h>
+#include <unicode/utf8.h>
+
+namespace {
+int32_t next_codepoint(const char *str, size_t *i) {
+	UChar32 c;
+	U8_NEXT_UNSAFE(str, *i, c);
+	return c;
+}
+
+bool is_whitespace(int32_t c) {
+	return !!u_isUWhiteSpace(c);
+}
+
+bool is_whitespace(std::string const& str) {
+	size_t i = 0;
+	while (auto c = next_codepoint(str.c_str(), &i)) {
+		if (!u_isUWhiteSpace(c))
+			return false;
+	}
+	return true;
+}
+
+// strcmp but ignoring case and accents
+int compare(std::string const& a, std::string const& b) {
+	using namespace boost::locale;
+	return std::use_facet<collator<char>>(std::locale()).compare(collator_base::primary, a, b);
+}
+
+}
+
+namespace agi {
+
+karaoke_match_result auto_match_karaoke(std::vector<std::string> const& source_strings, std::string const& dest_string) {
+	karaoke_match_result result = { 0, 0 };
+	if (source_strings.empty()) return result;
+
+	using namespace boost::locale::boundary;
+	using boost::starts_with;
+
+	result.source_length = 1;
+	ssegment_index destination_characters(character, begin(dest_string), end(dest_string));
+	auto src = boost::to_lower_copy(source_strings[0]);
+	auto dst = destination_characters.begin();
+	auto dst_end = destination_characters.end();
+
+	// Eat all the whitespace at the beginning of the source and destination
+	// syllables and exit if either ran out.
+	auto eat_whitespace = [&]() -> bool {
+		size_t i = 0, first_non_whitespace = 0;
+		while (is_whitespace(next_codepoint(src.c_str(), &i)))
+			first_non_whitespace = i;
+		if (first_non_whitespace)
+			src = src.substr(first_non_whitespace);
+
+		while (dst != dst_end && is_whitespace(dst->str())) {
+			++dst;
+			++result.destination_length;
+		}
+
+		// If we ran out of dest then this needs to match the rest of the
+		// source syllables (this probably means the user did something wrong)
+		if (dst == dst_end) {
+			result.source_length = source_strings.size();
+			return true;
+		}
+
+		return src.empty();
+	};
+
+	if (eat_whitespace()) return result;
+
+	// We now have a non-whitespace character at the beginning of both source
+	// and destination. Check if the source starts with a romanized kana, and
+	// if it does then check if the destination also has the appropriate
+	// character. If it does, match them and repeat.
+	while (!src.empty()) {
+		// First check for a basic match of the first character of the source and dest
+		auto first_src_char = ssegment_index(character, begin(src), end(src)).begin()->str();
+		if (compare(first_src_char, dst->str()) == 0) {
+			++dst;
+			++result.destination_length;
+			src.erase(0, first_src_char.size());
+			if (eat_whitespace()) return result;
+			continue;
+		}
+
+		auto check = [&](kana_pair const& kp) -> bool {
+			if (!starts_with(&*dst->begin(), kp.kana)) return false;
+
+			src = src.substr(strlen(kp.romaji));
+			for (size_t i = 0; kp.kana[i]; ) {
+				i += dst->length();
+				++result.destination_length;
+				++dst;
+			}
+			return true;
+		};
+
+		bool matched = false;
+		for (auto const& match : romaji_to_kana(src)) {
+			if (check(match)) {
+				if (eat_whitespace()) return result;
+				matched = true;
+				break;
+			}
+		}
+		if (!matched) break;
+	}
+
+	// Source and dest are now non-empty and start with non-whitespace.
+	// If there's only one character left in the dest, it obviously needs to
+	// match all of the source syllables left.
+	if (std::distance(dst, dst_end) == 1) {
+		result.source_length = source_strings.size();
+		++result.destination_length;
+		return result;
+	}
+
+	// We couldn't match the current character, but if we can match the *next*
+	// syllable then we know that everything in between must belong to the
+	// current syllable. Do this by looking up to KANA_SEARCH_DISTANCE
+	// characters ahead in destination and seeing if we can match them against
+	// the beginning of a syllable after this syllable.
+	// If a match is found, make a guess at how much source and destination
+	// should be selected based on the distances it was found at.
+
+	// The longest kanji are 'uketamawa.ru' and 'kokorozashi', each with a
+	// reading consisting of five kana. This means each each character from
+	// the destination can match at most five syllables from the source.
+	static const int max_character_length = 5;
+
+	// Arbitrarily chosen limit on the number of dest characters to try
+	// skipping. Higher numbers probably increase false-positives.
+	static const int dst_lookahead_max = 3;
+
+	for (size_t lookahead = 0; lookahead < dst_lookahead_max; ++lookahead) {
+		if (++dst == dst_end) break;
+
+		// Transliterate this character if it's a known hiragana or katakana character
+		std::vector<const char *> translit;
+		auto next = std::next(dst);
+		if (next != dst_end)
+			boost::copy(kana_to_romaji(dst->str() + next->str()), back_inserter(translit));
+		boost::copy(kana_to_romaji(dst->str()), back_inserter(translit));
+
+		// Search for it and the transliterated version in the source
+		int src_lookahead_max = (lookahead + 1) * max_character_length;
+		int src_lookahead_pos = 0;
+		for (auto const& syl : source_strings) {
+			// Don't count blank syllables in the max search distance
+			if (is_whitespace(syl)) continue;
+			if (++src_lookahead_pos == 1) continue;
+			if (src_lookahead_pos > src_lookahead_max) break;
+
+			std::string lsyl = boost::to_lower_copy(syl);
+			if (!(starts_with(syl, dst->str()) || util::any_of(translit, [&](const char *str) { return starts_with(lsyl, str); })))
+				continue;
+
+			// The syllable immediately after the current one matched, so
+			// everything up to the match must go with the current syllable.
+			if (src_lookahead_pos == 2) {
+				result.destination_length += lookahead + 1;
+				return result;
+			}
+
+			// The match was multiple syllables ahead, so just divide the
+			// destination characters evenly between the source syllables
+			result.destination_length += 1;
+			result.source_length = static_cast<size_t>((src_lookahead_pos - 1.0) / (lookahead + 1.0) + .5);
+			return result;
+		}
+	}
+
+	// We wouldn't have gotten here if the dest was empty, so make sure at
+	// least one character is selected
+	result.destination_length = std::max<size_t>(result.destination_length, 1u);
+
+	return result;
+}
+}
--- a/aegisub/libaegisub/include/libaegisub/kana_table.h
+++ b/aegisub/libaegisub/include/libaegisub/kana_table.h
@ -0,0 +1,30 @@
+// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include <boost/range/iterator_range.hpp>
+#include <vector>
+
+namespace agi {
+	struct kana_pair {
+		const char *kana;
+		const char *romaji;
+	};
+
+	/// Transliterated romaji for the given kana, or nullptr if not applicable
+	std::vector<const char *> kana_to_romaji(std::string const& kana);
+
+	boost::iterator_range<const kana_pair *> romaji_to_kana(std::string const& romaji);
+}
--- a/aegisub/libaegisub/include/libaegisub/karaoke_matcher.h
+++ b/aegisub/libaegisub/include/libaegisub/karaoke_matcher.h
@ -0,0 +1,30 @@
+// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+// Aegisub Project http://www.aegisub.org/
+
+#include <string>
+#include <vector>
+
+namespace agi {
+	struct karaoke_match_result {
+		/// The number of strings in the source matched
+		size_t source_length;
+		/// The number of characters in the destination string matched
+		size_t destination_length;
+	};
+
+	/// Try to automatically select the portion of dst which corresponds to the first string in src
+	karaoke_match_result auto_match_karaoke(std::vector<std::string> const& src, std::string const& dst);
+}
--- a/aegisub/libaegisub/include/libaegisub/util.h
+++ b/aegisub/libaegisub/include/libaegisub/util.h
@ -75,5 +75,11 @@ namespace agi {
 	/// elsewhere (because libstcc++ 4.7 is missing it).
 	void sleep_for(int ms);

+	// boost.range doesn't have wrappers for the C++11 stuff
+	template<typename Range, typename Predicate>
+	bool any_of(Range&& r, Predicate&& p) {
+		return std::any_of(std::begin(r), std::end(r), std::forward<Predicate>(p));
+	}
+
 	} // namespace util
 } // namespace agi
--- a/aegisub/src/Makefile
+++ b/aegisub/src/Makefile
@ -194,7 +194,6 @@ SRC += \
 	hotkey.cpp \
 	hotkey_data_view_model.cpp \
 	initial_line_state.cpp \
-	kana_table.cpp \
 	lpeg.c \
 	main.cpp \
 	menu.cpp \
--- a/aegisub/src/dialog_kara_timing_copy.cpp
+++ b/aegisub/src/dialog_kara_timing_copy.cpp
@ -43,12 +43,17 @@
 #include "compat.h"
 #include "help_button.h"
 #include "include/aegisub/context.h"
-#include "kana_table.h"
 #include "libresrc/libresrc.h"
 #include "options.h"
 #include "selection_controller.h"
 #include "utils.h"

+#include <libaegisub/karaoke_matcher.h>
+
+#include <boost/algorithm/string/case_conv.hpp>
+#include <boost/algorithm/string/predicate.hpp>
+#include <boost/locale/boundary.hpp>
+#include <boost/range/algorithm_ext.hpp>
 #include <deque>

 #include <wx/checkbox.h>
@ -76,12 +81,13 @@ class KaraokeLineMatchDisplay : public wxControl {

 	std::vector<MatchGroup> matched_groups;
 	std::deque<MatchSyllable> unmatched_source;
-	std::string unmatched_destination;
+	std::string destination_str;
+	boost::locale::boundary::ssegment_index destination;
+	boost::locale::boundary::ssegment_index::iterator match_begin, match_end;

 	int last_total_matchgroup_render_width;

 	size_t source_sel_length;
-	size_t destination_sel_length;

 	void OnPaint(wxPaintEvent &event);

@ -96,7 +102,7 @@ public:
 	/// Number of syllables not yet matched from source
 	size_t GetRemainingSource() const { return unmatched_source.size(); }
 	/// Number of characters not yet matched from destination
-	size_t GetRemainingDestination() const { return unmatched_destination.size(); }
+	size_t GetRemainingDestination() const { return distance(match_end, destination.end()); }

 	// Adjust source and destination match lengths
 	void IncreaseSourceMatch();
@ -147,7 +153,7 @@ wxSize KaraokeLineMatchDisplay::GetBestSize() const
 	return wxSize(min_width * 2, h_src + h_dst + 7);
 }

-int DrawBoxedText(wxDC &dc, const std::string &txt, int x, int y)
+int DrawBoxedText(wxDC &dc, wxString const& txt, int x, int y)
 {
 	int tw, th;
 	// Assume the pen, brush and font properties have already been set in the DC.
@ -164,10 +170,9 @@ int DrawBoxedText(wxDC &dc, const std::string &txt, int x, int y)
 	}
 	else
 	{
-		wxString wxtxt(to_wx(txt));
-		dc.GetTextExtent(wxtxt, &tw, &th);
+		dc.GetTextExtent(txt, &tw, &th);
 		dc.DrawRectangle(x, y-2, tw+4, th+4);
-		dc.DrawText(wxtxt, x+2, y);
+		dc.DrawText(txt, x+2, y);
 		return tw+3;
 	}
 }
@ -256,11 +261,11 @@ void KaraokeLineMatchDisplay::OnPaint(wxPaintEvent &)
 		// Matched source syllables
 		int syl_x = next_x;
 		for (auto const& syl : grp.src)
-			syl_x += DrawBoxedText(dc, syl.text, syl_x, y_line1);
+			syl_x += DrawBoxedText(dc, to_wx(syl.text), syl_x, y_line1);

 		// Matched destination text
 		{
-			const int adv = DrawBoxedText(dc, grp.dst, next_x, y_line2);
+			const int adv = DrawBoxedText(dc, to_wx(grp.dst), next_x, y_line2);

 			// Adjust next_x here while we have the text_w
 			next_x = syl_x > next_x + adv ? syl_x : next_x + adv;
@ -292,24 +297,30 @@ void KaraokeLineMatchDisplay::OnPaint(wxPaintEvent &)
 			dc.SetBrush(wxBrush(inner_back));
 		}

-		syl_x += DrawBoxedText(dc, unmatched_source[j].text, syl_x, y_line1);
+		syl_x += DrawBoxedText(dc, to_wx(unmatched_source[j].text), syl_x, y_line1);
 	}

 	// Remaining destination
-	if (!unmatched_destination.empty())
+	if (match_begin != match_end)
 	{
 		dc.SetTextBackground(sel_back);
 		dc.SetTextForeground(sel_text);
 		dc.SetBrush(wxBrush(sel_back));
-		next_x += DrawBoxedText(dc, unmatched_destination.substr(0, destination_sel_length), next_x, y_line2);
+		wxString str;
+		for (auto it = match_begin; it != match_end; ++it)
+			str += to_wx(it->str());
+		next_x += DrawBoxedText(dc, str, next_x, y_line2);
 	}

-	if (destination_sel_length < unmatched_destination.size())
+	if (match_end != destination.end())
 	{
 		dc.SetTextBackground(inner_back);
 		dc.SetTextForeground(inner_text);
 		dc.SetBrush(wxBrush(inner_back));
-		DrawBoxedText(dc, unmatched_destination.substr(destination_sel_length), next_x, y_line2);
+		wxString str;
+		for (auto it = match_end; it != destination.end(); ++it)
+			str += to_wx(it->str());
+		DrawBoxedText(dc, str, next_x, y_line2);
 	}
 }

@ -328,8 +339,12 @@ void KaraokeLineMatchDisplay::SetInputData(AssDialogue *src, AssDialogue *dst)
 		source_sel_length = 1;
 	}

-	unmatched_destination = dst ? dst->GetStrippedText() : "";
-	destination_sel_length = std::max<size_t>(1, unmatched_destination.size());
+	destination_str = dst ? dst->GetStrippedText() : "";
+	using namespace boost::locale::boundary;
+	destination = ssegment_index(character, begin(destination_str), end(destination_str));
+	match_begin = match_end = destination.begin();
+	if (!destination_str.empty())
+		++match_end;

 	Refresh(true);
 }
@ -363,182 +378,34 @@ void KaraokeLineMatchDisplay::DecreaseSourceMatch()

 void KaraokeLineMatchDisplay::IncreseDestinationMatch()
 {
-	destination_sel_length = std::min(destination_sel_length + 1, GetRemainingDestination());
-	Refresh(true);
+	if (match_end != destination.end()) {
+		++match_end;
+		Refresh(true);
+	}
 }

 void KaraokeLineMatchDisplay::DecreaseDestinationMatch()
 {
-	destination_sel_length = std::max<size_t>(destination_sel_length, 1) - 1;
-	Refresh(true);
+	if (match_end != match_begin) {
+		--match_end;
+		Refresh(true);
+	}
 }

-/// Kana interpolation, in characters, unset to disable
-#define KANA_SEARCH_DISTANCE 3
-
 void KaraokeLineMatchDisplay::AutoMatchJapanese()
 {
-	if (unmatched_source.size() < 1) return;
-
-	// Quick escape: If there's no destination left, take all remaining source.
-	// (Usually this means the user made a mistake.)
-	if (unmatched_destination.empty())
-	{
-		source_sel_length = unmatched_source.size();
-		destination_sel_length = 0;
-		return;
-	}
-
-	// We'll first see if we can do something with the first unmatched source syllable
-	wxString src(to_wx(unmatched_source[0].text).Lower());
-	wxString dst(to_wx(unmatched_destination));
-	source_sel_length = 1; // we're working on the first, assume it was matched
-	destination_sel_length = 0;
-
-	// Quick escape: If the source syllable is empty, return with first source syllable and empty destination
-	if (src.empty()) return;
-
-	// Try to match the next source syllable against the destination.  Do it
-	// "inverted": try all kana from the table and prefix-match them against
-	// the destination, then if it matches a prefix, try to match the hepburn
-	// for it agast the source; eat if it matches.  Keep trying to match as
-	// long as there's text left in the source syllable or matching fails.
-	while (src.size() > 0)
-	{
-		wxString dst_hira_rest, dst_kata_rest, src_rest;
-		bool matched = false;
-		for (const KanaEntry *ke = KanaTable; ke->hiragana; ++ke)
-		{
-			if (src.StartsWith(ke->hepburn, &src_rest))
-			{
-				bool hira_matches = dst.StartsWith(ke->hiragana, &dst_hira_rest) && *ke->hiragana;
-				bool kata_matches = dst.StartsWith(ke->katakana, &dst_kata_rest);
-
-				if (hira_matches || kata_matches)
-				{
-					matched = true;
-					src = src_rest;
-					dst = hira_matches ? dst_hira_rest : dst_kata_rest;
-					destination_sel_length += wcslen(hira_matches ? ke->hiragana : ke->katakana);
-					break;
-				}
-			}
-		}
-		if (!matched) break;
-	}
-
-	// The source might be empty now: That's good!
-	// That means we managed to match it all against destination text
-	if (src.empty()) return;
-	// destination_sel_length already has the appropriate value
-	// and source_sel_length was already 1
-
-	// Now the source syllable might consist of just whitespace.
-	// Eat all whitespace at the start of the destination.
-	if (StringEmptyOrWhitespace(src))
-	{
-		wxString str(to_wx(unmatched_destination.substr(destination_sel_length)));
-		destination_sel_length += std::distance(str.begin(), std::find_if_not(str.begin(), str.end(), IsWhitespace));
-		// Now we've eaten all spaces in the destination as well
-		// so the selection lengths should be good
-		return;
-	}
-
-	// If there's just one character left in the destination at this point,
-	// (and the source doesn't begin with space syllables, see test above)
-	// assume it's safe to take all remaining source to match the single
-	// remaining destination.
-	if (unmatched_destination.size() == 1)
-	{
-		source_sel_length = unmatched_source.size();
-		destination_sel_length = 1;
-		return;
-	}
-
-#ifdef KANA_SEARCH_DISTANCE
-	// Try to look up to KANA_SEARCH_DISTANCE characters ahead in destination,
-	// see if any of those are recognised kana. If there are any within the
-	// range, see if it matches a following syllable, at most 5 source
-	// syllables per character in source we're ahead.
-	// The number 5 comes from the kanji with the longest readings:
-	// 'uketamawa.ru' and 'kokorozashi' which each have a reading consisting of
-	// five kana.
-	// Only match the found kana in destination against the beginning of source
-	// syllables, not the middle of them.
-	// If a match is found, make a guess at how much source and destination
-	// should be selected based on the distances it was found at.
-	dst = to_wx(unmatched_destination);
-	for (size_t lookahead = 0; lookahead < KANA_SEARCH_DISTANCE; ++lookahead)
-	{
-		// Eat dst at the beginning, don't test for the first character being kana
-		dst = dst.Mid(1);
-		// Find a position where hiragana or katakana matches
-		wxString matched_roma;
-		wxString matched_kana;
-		for (const KanaEntry *ke = KanaTable; ke->hiragana; ++ke)
-		{
-			if (*ke->hiragana && dst.StartsWith(ke->hiragana))
-			{
-				matched_roma = ke->hepburn;
-				matched_kana = ke->hiragana;
-				break;
-			}
-			if (*ke->katakana && dst.StartsWith(ke->katakana))
-			{
-				matched_roma = ke->hepburn;
-				matched_kana = ke->katakana;
-				break;
-			}
-		}
-		// If we didn't match any kana against dst, move to next char in dst
-		if (!matched_kana)
-			continue;
-		// Otherwise look for a match for the romaji
-		// For the magic number 5, see big comment block above
-		int src_lookahead_max = (lookahead+1)*5;
-		int src_lookahead_pos = 0;
-		for (auto const& syl : unmatched_source)
-		{
-			// Check if we've gone too far ahead in the source
-			if (src_lookahead_pos++ >= src_lookahead_max) break;
-			// Otherwise look for a match
-			if (to_wx(syl.text).StartsWith(matched_roma))
-			{
-				// Yay! Time to interpolate.
-				// Special case: If the last source syllable before the matching one is
-				// empty or contains just whitespace, don't include that one.
-				if (src_lookahead_pos > 1 && StringEmptyOrWhitespace(to_wx(unmatched_source[src_lookahead_pos-2].text)))
-					src_lookahead_pos -= 1;
-				// Special case: Just one source syllable matching, pick all destination found
-				if (src_lookahead_pos == 2)
-				{
-					source_sel_length = 1;
-					destination_sel_length = lookahead+1;
-					return;
-				}
-				// Otherwise try to split the eaten source syllables evenly between the eaten
-				// destination characters, and do a regular rounding.
-				float src_per_dst = (float)(src_lookahead_pos-1)/(float)(lookahead+1);
-				source_sel_length = (int)(src_per_dst + 0.5);
-				destination_sel_length = 1;
-				return;
-			}
-		}
-	}
-#endif
-
-	// Okay so we didn't match anything. Aww.
-	// Just fail...
-	// We know from earlier that we do have both some source and some destination.
-	source_sel_length = 1;
-	destination_sel_length = 1;
-	return;
+	std::vector<std::string> source;
+	for (auto const& syl : unmatched_source)
+		source.emplace_back(syl.text);
+	auto result = agi::auto_match_karaoke(source, match_begin == destination.end() ? "" : &*match_begin->begin());
+	source_sel_length = result.source_length;
+	match_end = std::next(match_begin, result.destination_length);
 }

 bool KaraokeLineMatchDisplay::AcceptMatch()
 {
 	// Completely empty match
-	if (source_sel_length == 0 && destination_sel_length == 0) return false;
+	if (source_sel_length == 0 && match_begin == match_end) return false;

 	MatchGroup match;

@ -547,10 +414,8 @@ bool KaraokeLineMatchDisplay::AcceptMatch()
 	unmatched_source.erase(unmatched_source.begin(), unmatched_source.begin() + source_sel_length);
 	source_sel_length = 0;

-	assert(destination_sel_length <= unmatched_destination.size());
-	match.dst = unmatched_destination.substr(0, destination_sel_length);
-	unmatched_destination.erase(0, destination_sel_length);
-	destination_sel_length = 0;
+	match.dst = std::string(match_begin->begin(), match_end == destination.end() ? destination_str.end() : match_end->begin());
+	match_begin = match_end;

 	matched_groups.emplace_back(std::move(match));

@ -569,12 +434,12 @@ bool KaraokeLineMatchDisplay::UndoMatch()
 	MatchGroup &group = matched_groups.back();

 	source_sel_length = group.src.size();
-	destination_sel_length = group.dst.size();
-
 	copy(group.src.rbegin(), group.src.rend(), front_inserter(unmatched_source));
 	group.src.clear();

-	unmatched_destination = group.dst + unmatched_destination;
+	match_end = match_begin;
+	for (size_t size = group.dst.size(); size > 0; size -= match_begin->length())
+		--match_begin;

 	matched_groups.pop_back();

--- a/aegisub/src/kana_table.cpp
+++ b/aegisub/src/kana_table.cpp
@ -1,262 +0,0 @@
-// Copyright (c) 2006, Rodrigo Braz Monteiro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of the Aegisub Group nor the names of its contributors
-//     may be used to endorse or promote products derived from this software
-//     without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Aegisub Project http://www.aegisub.org/
-
-/// @file kana_table.cpp
-/// @brief Data about the Japanese kana syllabary used by kanji karaoke timing copying
-/// @ingroup kara_timing_copy
-///
-
-
-#include "config.h"
-
-#include "kana_table.h"
-
-const KanaEntry KanaTable[] =
-{
-	// Regular kana usage and combinations
-	{ L"\u3042", L"\u30a2", L"a" },
-	{ L"\u3044", L"\u30a4", L"i" },
-	{ L"\u3046", L"\u30a6", L"u" },
-	{ L"\u3048", L"\u30a8", L"e" },
-	{ L"\u304a", L"\u30aa", L"o" },
-
-	{ L"\u304b", L"\u30ab", L"ka" },
-	{ L"\u304d", L"\u30ad", L"ki" },
-	{ L"\u304f", L"\u30af", L"ku" },
-	{ L"\u3051", L"\u30b1", L"ke" },
-	{ L"\u3053", L"\u30b3", L"ko" },
-
-	{ L"\u3055", L"\u30b5", L"sa" },
-	{ L"\u3057", L"\u30b7", L"shi" },
-	{ L"\u3059", L"\u30b9", L"su" },
-	{ L"\u305b", L"\u30bb", L"se" },
-	{ L"\u305d", L"\u30bd", L"so" },
-
-	{ L"\u305f", L"\u30bf", L"ta" },
-	{ L"\u3061", L"\u30c1", L"chi" },
-	{ L"\u3064", L"\u30c4", L"tsu" },
-	{ L"\u3066", L"\u30c6", L"te" },
-	{ L"\u3068", L"\u30c8", L"to" },
-
-	{ L"\u306a", L"\u30ca", L"na" },
-	{ L"\u306b", L"\u30cb", L"ni" },
-	{ L"\u306c", L"\u30cc", L"nu" },
-	{ L"\u306d", L"\u30cd", L"ne" },
-	{ L"\u306e", L"\u30ce", L"no" },
-
-	{ L"\u306f", L"\u30cf", L"ha" },
-	{ L"\u3072", L"\u30d2", L"hi" },
-	{ L"\u3075", L"\u30d5", L"fu" },
-	{ L"\u3078", L"\u30d8", L"he" },
-	{ L"\u307b", L"\u30db", L"ho" },
-
-	{ L"\u307e", L"\u30de", L"ma" },
-	{ L"\u307f", L"\u30df", L"mi" },
-	{ L"\u3080", L"\u30e0", L"mu" },
-	{ L"\u3081", L"\u30e1", L"me" },
-	{ L"\u3082", L"\u30e2", L"mo" },
-
-	{ L"\u3084", L"\u30e4", L"ya" },
-	{ L"\u3086", L"\u30e6", L"yu" },
-	{ L"\u3088", L"\u30e8", L"yo" },
-
-	{ L"\u3089", L"\u30e9", L"ra" },
-	{ L"\u308a", L"\u30ea", L"ri" },
-	{ L"\u308b", L"\u30eb", L"ru" },
-	{ L"\u308c", L"\u30ec", L"re" },
-	{ L"\u308d", L"\u30ed", L"ro" },
-
-	{ L"\u308f", L"\u30ef", L"wa" },
-	{ L"\u3090", L"\u30f0", L"wi" },
-	{ L"\u3091", L"\u30f1", L"we" },
-	{ L"\u3092", L"\u30f2", L"wo" },
-
-	{ L"\u304c", L"\u30ac", L"ga" },
-	{ L"\u304e", L"\u30ae", L"gi" },
-	{ L"\u3050", L"\u30b0", L"gu" },
-	{ L"\u3052", L"\u30b2", L"ge" },
-	{ L"\u3054", L"\u30b4", L"go" },
-
-	{ L"\u3056", L"\u30b6", L"za" },
-	{ L"\u3058", L"\u30b8", L"ji" },
-	{ L"\u305a", L"\u30ba", L"zu" },
-	{ L"\u305c", L"\u30bc", L"ze" },
-	{ L"\u305e", L"\u30be", L"zo" },
-
-	{ L"\u3060", L"\u30c0", L"da" },
-	{ L"\u3062", L"\u30c2", L"ji" },
-	{ L"\u3065", L"\u30c5", L"zu" },
-	{ L"\u3067", L"\u30c7", L"de" },
-	{ L"\u3069", L"\u30c9", L"do" },
-
-	{ L"\u3070", L"\u30d0", L"ba" },
-	{ L"\u3073", L"\u30d3", L"bi" },
-	{ L"\u3076", L"\u30d6", L"bu" },
-	{ L"\u3079", L"\u30d9", L"be" },
-	{ L"\u307c", L"\u30dc", L"bo" },
-
-	{ L"\u3071", L"\u30d1", L"pa" },
-	{ L"\u3074", L"\u30d4", L"pi" },
-	{ L"\u3077", L"\u30d7", L"pu" },
-	{ L"\u307a", L"\u30da", L"pe" },
-	{ L"\u307d", L"\u30dd", L"po" },
-
-	{ L"\u304d\u3083", L"\u30ad\u30e3", L"kya" },
-	{ L"\u304d\u3085", L"\u30ad\u30e5", L"kyu" },
-	{ L"\u304d\u3087", L"\u30ad\u30e7", L"kyo" },
-
-	{ L"\u3057\u3083", L"\u30b7\u30e3", L"sha" },
-	{ L"\u3057\u3085", L"\u30b7\u30e5", L"shu" },
-	{ L"\u3057\u3087", L"\u30b7\u30e7", L"sho" },
-
-	{ L"\u3061\u3083", L"\u30c1\u30e3", L"cha" },
-	{ L"\u3061\u3085", L"\u30c1\u30e5", L"chu" },
-	{ L"\u3061\u3087", L"\u30c1\u30e7", L"cho" },
-
-	{ L"\u306b\u3083", L"\u30cb\u30e3", L"nya" },
-	{ L"\u306b\u3085", L"\u30cb\u30e5", L"nyu" },
-	{ L"\u306b\u3087", L"\u30cb\u30e7", L"nyo" },
-
-	{ L"\u3072\u3083", L"\u30d2\u30e3", L"hya" },
-	{ L"\u3072\u3085", L"\u30d2\u30e5", L"hyu" },
-	{ L"\u3072\u3087", L"\u30d2\u30e7", L"hyo" },
-
-	{ L"\u307f\u3083", L"\u30df\u30e3", L"mya" },
-	{ L"\u307f\u3085", L"\u30df\u30e5", L"myu" },
-	{ L"\u307f\u3087", L"\u30df\u30e7", L"myo" },
-
-	{ L"\u308a\u3083", L"\u30ea\u30e3", L"rya" },
-	{ L"\u308a\u3085", L"\u30ea\u30e5", L"ryu" },
-	{ L"\u308a\u3087", L"\u30ea\u30e7", L"ryo" },
-
-	{ L"\u304e\u3083", L"\u30ae\u30e3", L"gya" },
-	{ L"\u304e\u3085", L"\u30ae\u30e5", L"gyu" },
-	{ L"\u304e\u3087", L"\u30ae\u30e7", L"gyo" },
-
-	{ L"\u3058\u3083", L"\u30b8\u30e3", L"ja" },
-	{ L"\u3058\u3085", L"\u30b8\u30e5", L"ju" },
-	{ L"\u3058\u3087", L"\u30b8\u30e7", L"jo" },
-
-	{ L"\u3062\u3083", L"\u30c2\u30e3", L"ja" },
-	{ L"\u3062\u3085", L"\u30c2\u30e5", L"ju" },
-	{ L"\u3062\u3087", L"\u30c2\u30e7", L"jo" },
-
-	{ L"\u3073\u3083", L"\u30d3\u30e3", L"bya" },
-	{ L"\u3073\u3085", L"\u30d3\u30e5", L"byu" },
-	{ L"\u3073\u3087", L"\u30d3\u30e7", L"byo" },
-
-	{ L"\u3074\u3083", L"\u30d4\u30e3", L"pya" },
-	{ L"\u3074\u3085", L"\u30d4\u30e5", L"pyu" },
-	{ L"\u3074\u3087", L"\u30d4\u30e7", L"pyo" },
-
-
-	// Specialty katakana usage for loan words
-
-	// Katakana fu + small vowel
-	{ L"", L"\u30d5\u30a1", L"fa" },
-	{ L"", L"\u30d5\u30a3", L"fi" },
-	{ L"", L"\u30d5\u30a7", L"fe" },
-	{ L"", L"\u30d5\u30a9", L"fo" },
-
-	// Katakana vu + small vowel
-	{ L"", L"\u30f4\u30a1", L"va" },
-	{ L"", L"\u30f4\u30a3", L"vi" },
-	{ L"", L"\u30f4", L"vu" },
-	{ L"", L"\u30f4\u30a7", L"ve" },
-	{ L"", L"\u30f4\u30a9", L"vo" },
-
-	// Katakana fu + small yu
-	{ L"", L"\u30d5\u30e5", L"fyu" },
-
-	// Katakana i + little e
-	{ L"", L"\u30a4\u30a7", L"ye" },
-
-	// Katakana u + little vowels
-	{ L"", L"\u30a6\u30a3", L"wi" },
-	{ L"", L"\u30a6\u30a7", L"we" },
-	{ L"", L"\u30a6\u30a9", L"wo" },
-
-	// Katakana vu + small ya-yu-yo
-	{ L"", L"\u30f4\u30e3", L"vya" },
-	{ L"", L"\u30f4\u30e5", L"vyu" },
-	{ L"", L"\u30f4\u30e7", L"vyo" },
-
-	// Katakana shi-ji-chi + small e
-	{ L"", L"\u30b7\u30a7", L"she" },
-	{ L"", L"\u30b8\u30a7", L"je" },
-	{ L"", L"\u30c1\u30a7", L"che" },
-
-	// Katakana de + small i-u-yu
-	{ L"", L"\u30c6\u30a3", L"ti" },
-	{ L"", L"\u30c6\u30a5", L"tu" },
-	{ L"", L"\u30c6\u30e5", L"tyu" },
-
-	// Katakana de + small i-u-yu
-	{ L"", L"\u30c7\u30a3", L"di" },
-	{ L"", L"\u30c7\u30a5", L"du" },
-	{ L"", L"\u30c7\u30a5", L"dyu" },
-
-	// Katakana tsu + small vowels
-	{ L"", L"\u30c4\u30a1", L"tsa" },
-	{ L"", L"\u30c4\u30a3", L"tsi" },
-	{ L"", L"\u30c4\u30a7", L"tse" },
-	{ L"", L"\u30c4\u30a9", L"tso" },
-
-
-	// Syllablic consonants
-
-	// Small tsu
-	{ L"\u3063", L"\u30c3", L"t" },
-	{ L"\u3063", L"\u30c3", L"c" },
-	{ L"\u3063", L"\u30c3", L"s" },
-	{ L"\u3063", L"\u30c3", L"k" },
-	{ L"\u3063", L"\u30c3", L"p" },
-
-	// Syllabic n
-	{ L"\u3093", L"\u30f3", L"n" },
-	{ L"\u3093", L"\u30f3", L"m" },
-
-
-	// Other special usage
-
-	// Small vowels
-	{ L"\u3041", L"\u30a1", L"a" },
-	{ L"\u3043", L"\u30a3", L"i" },
-	{ L"\u3045", L"\u30a5", L"u" },
-	{ L"\u3047", L"\u30a7", L"e" },
-	{ L"\u3049", L"\u30a9", L"o" },
-
-	// Long vowel mark (dash)
-	{ L"", L"\u30fc", L"a" },
-	{ L"", L"\u30fc", L"i" },
-	{ L"", L"\u30fc", L"u" },
-	{ L"", L"\u30fc", L"e" },
-	{ L"", L"\u30fc", L"o" },
-	{ 0, 0, 0 }
-};
--- a/aegisub/src/kana_table.h
+++ b/aegisub/src/kana_table.h
@ -1,53 +0,0 @@
-// Copyright (c) 2006, Rodrigo Braz Monteiro
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   * Redistributions of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//   * Redistributions in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//   * Neither the name of the Aegisub Group nor the names of its contributors
-//     may be used to endorse or promote products derived from this software
-//     without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Aegisub Project http://www.aegisub.org/
-
-/// @file kana_table.h
-/// @see kana_table.cpp
-/// @ingroup kara_timing_copy
-///
-
-#include <list>
-
-#include <wx/string.h>
-
-/// @class KanaEntry
-/// @brief Base class for Kana + Romaji tuples.
-struct KanaEntry {
-	/// Hiragana
-	const wchar_t *hiragana;
-
-	/// Katakana
-	const wchar_t *katakana;
-
-	/// Hepburn romaji.
-	const wchar_t *hepburn;
-};
-
-/// Table of Hiragana, Katakana and Hepburn romaji tuples.
-extern const KanaEntry KanaTable[];
--- a/aegisub/src/utils.cpp
+++ b/aegisub/src/utils.cpp
@ -54,7 +54,6 @@
 #include <boost/locale/boundary.hpp>
 #include <boost/range/algorithm_ext.hpp>
 #include <map>
-#include <unicode/uchar.h>

 #include <wx/clipbrd.h>
 #include <wx/filedlg.h>
@ -109,14 +108,6 @@ int SmallestPowerOf2(int x) {
 	return x;
 }

-bool IsWhitespace(wxUniChar c) {
-	return !!u_isUWhiteSpace(c.GetValue());
-}
-
-bool StringEmptyOrWhitespace(const wxString &str) {
-	return std::all_of(str.begin(), str.end(), IsWhitespace);
-}
-
 void RestartAegisub() {
 	config::opt->Flush();

--- a/aegisub/src/utils.h
+++ b/aegisub/src/utils.h
@ -61,12 +61,6 @@ void StatusTimeout(wxString const& msg, int ms = 10000);
 /// Algorithm from http://bob.allegronetwork.com/prog/tricks.html
 int SmallestPowerOf2(int x);

-/// Check if wchar 'c' is a whitespace character
-bool IsWhitespace(wxUniChar c);
-
-/// Check if every character in str is whitespace
-bool StringEmptyOrWhitespace(const wxString &str);
-
 /// Get the length in characters of the longest line in the given text
 size_t MaxLineLength(std::string const& text);

--- a/aegisub/tests/Makefile
+++ b/aegisub/tests/Makefile
@ -25,6 +25,7 @@ SRC = \
 		tests/hotkey.cpp \
 		tests/iconv.cpp \
 		tests/ifind.cpp \
+		tests/karaoke_matcher.cpp \
 		tests/keyframe.cpp \
 		tests/line_iterator.cpp \
 		tests/line_wrap.cpp \
--- a/aegisub/tests/tests/karaoke_matcher.cpp
+++ b/aegisub/tests/tests/karaoke_matcher.cpp
@ -0,0 +1,197 @@
+// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
+//
+// Permission to use, copy, modify, and distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+#include <libaegisub/karaoke_matcher.h>
+
+#include "main.h"
+#include "util.h"
+
+class lagi_karaoke_matcher : public libagi { };
+
+namespace agi {
+bool operator==(karaoke_match_result const& a, karaoke_match_result const& b) {
+	return a.source_length == b.source_length && a.destination_length == b.destination_length;
+}
+::std::ostream& operator<<(::std::ostream& os, karaoke_match_result const& r) {
+	return os << "karaoke_match_result{" << r.source_length << ", " << r.destination_length << "}";
+}
+}
+
+using agi::auto_match_karaoke;
+using agi::karaoke_match_result;
+
+TEST(lagi_karaoke_matcher, empty_src_gives_zero_src_length) {
+	EXPECT_EQ(0, auto_match_karaoke(std::vector<std::string>(), "").source_length);
+	EXPECT_EQ(0, auto_match_karaoke(std::vector<std::string>(), "a").source_length);
+}
+
+TEST(lagi_karaoke_matcher, empty_dest_gives_zero_dest_length) {
+	EXPECT_EQ(0, auto_match_karaoke(std::vector<std::string>(), "").destination_length);
+}
+
+TEST(lagi_karaoke_matcher, empty_dest_with_source_selects_all_source) {
+	EXPECT_EQ(2, auto_match_karaoke({"a", "b"}, "").source_length);
+}
+
+TEST(lagi_karaoke_matcher, empty_but_present_src_syllable_matches_no_dest) {
+	EXPECT_EQ((karaoke_match_result{1, 0}),
+	          auto_match_karaoke({"", "b"}, "cc"));
+}
+
+TEST(lagi_karaoke_matcher, dest_with_non_match_selects_first_character) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"a", "b"}, "cc"));
+}
+
+TEST(lagi_karaoke_matcher, dest_with_identical_match_selects_match) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"abc", "de"}, "abcde"));
+}
+
+TEST(lagi_karaoke_matcher, match_is_case_insensitive) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"abc", "de"}, "ABCDE"));
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"ABC", "DE"}, "abcde"));
+}
+
+TEST(lagi_karaoke_matcher, leading_whitespace_in_source_is_ignored) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({" abc", "de"}, "abcde"));
+}
+
+TEST(lagi_karaoke_matcher, trailing_whitespace_in_source_is_ignored) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"abc ", "de"}, "abcde"));
+}
+
+TEST(lagi_karaoke_matcher, whitespace_in_dest_is_consumed) {
+	EXPECT_EQ((karaoke_match_result{1, 4}),
+	          auto_match_karaoke({"abc ", "de"}, " abcde"));
+	EXPECT_EQ((karaoke_match_result{1, 4}),
+	          auto_match_karaoke({"abc ", "de"}, "abc de"));
+	EXPECT_EQ((karaoke_match_result{1, 5}),
+	          auto_match_karaoke({"abc ", "de"}, "ab c de"));
+}
+
+TEST(lagi_karaoke_matcher, dest_match_is_in_characters) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"∫∫", "de"}, "∫∫a"));
+}
+
+TEST(lagi_karaoke_matcher, decomposed_characters_are_handled_atomically) {
+	// YODO
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"∫∫", "de"}, "∫∫a"));
+}
+
+TEST(lagi_karaoke_matcher, single_hiragana_is_matched) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"ro" "de"}, "ろ"));
+}
+
+TEST(lagi_karaoke_matcher, single_katakana_is_matched) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"ro" "de"}, "ロ"));
+}
+
+TEST(lagi_karaoke_matcher, multiple_characters_matched) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"romaji" "de"}, "ろまじ"));
+}
+TEST(lagi_karaoke_matcher, multiple_character_kana) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"kya", "e"}, "きゃe"));
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"kya"}, "きゃ"));
+}
+
+TEST(lagi_karaoke_matcher, whitespace_between_characters_in_source_ignored) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"ro ma ji" "de"}, "ろまじ"));
+}
+
+TEST(lagi_karaoke_matcher, whitespace_inside_characters_in_source_breaks_match) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"r om aj i" "de"}, "ろまじ"));
+}
+
+TEST(lagi_karaoke_matcher, single_dest_character_consumes_all_source) {
+	EXPECT_EQ((karaoke_match_result{3, 1}),
+	          auto_match_karaoke({"a", "b", "c"}, "ろ"));
+}
+
+TEST(lagi_karaoke_matcher, fullwidth_letters_are_matched_to_ascii) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"ab", "cd"}, "ａｂc"));
+}
+
+TEST(lagi_karaoke_matcher, simple_lookahead) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"ab", "ro"}, "eeろ"));
+}
+
+TEST(lagi_karaoke_matcher, lookahead_ignores_empty_syllables) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"ab", "", "ro"}, "eeろ"));
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"ab", "", "", "ro"}, "eeろ"));
+}
+
+TEST(lagi_karaoke_matcher, lookahead_only_looks_at_three_characters_of_dst) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"abc", "", "ro"}, "eeeろ"));
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"abcd", "", "ro"}, "eeeeろ"));
+}
+
+TEST(lagi_karaoke_matcher, lookahead_two_syllables) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"a", "b", "ro"}, "eeろ"));
+	EXPECT_EQ((karaoke_match_result{2, 1}),
+	          auto_match_karaoke({"a", "b", "c", "ro"}, "eeろ"));
+	EXPECT_EQ((karaoke_match_result{2, 1}),
+	          auto_match_karaoke({"a", "b", "c", "d", "ro"}, "eeろ"));
+	EXPECT_EQ((karaoke_match_result{3, 1}),
+	          auto_match_karaoke({"a", "b", "c", "d", "f", "ro"}, "eeろ"));
+	EXPECT_EQ((karaoke_match_result{3, 2}),
+	          auto_match_karaoke({"a", "b", "c", "d", "f", "ro"}, " eeろ"));
+}
+
+TEST(lagi_karaoke_matcher, lookahead_multicharacter_kana) {
+	EXPECT_EQ((karaoke_match_result{1, 2}),
+	          auto_match_karaoke({"aa", "kya"}, "eeきゃ"));
+}
+
+TEST(lagi_karaoke_matcher, ha_is_wa) {
+	EXPECT_EQ((karaoke_match_result{2, 1}),
+	          auto_match_karaoke({"Bo", "ku", "wa"}, "僕は"));
+}
+
+TEST(lagi_karaoke_matcher, he_is_e) {
+	EXPECT_EQ((karaoke_match_result{2, 1}),
+	          auto_match_karaoke({"Bo", "ku", "e"}, "僕へ"));
+}
+
+TEST(lagi_karaoke_matcher, shitta) {
+	EXPECT_EQ((karaoke_match_result{1, 1}),
+	          auto_match_karaoke({"shi", "tta", ""}, "知った"));
+	EXPECT_EQ((karaoke_match_result{2, 2}),
+	          auto_match_karaoke({"tta", ""}, "った"));
+}
+
+TEST(lagi_karaoke_matcher, lookahead_is_case_insensitive) {
+	EXPECT_EQ((karaoke_match_result{1, 3}),
+	          auto_match_karaoke({"Oh... ", "Nan", "ka ", "ta", "ri", "nai"}, "Oh…なんか足りない"));
+}