add utf8 unit test

This commit is contained in:
Arvid Norberg 2014-05-03 05:09:21 +00:00
parent 0ed91e248f
commit a52aadc713
4 changed files with 280 additions and 1 deletions

View File

@ -88,6 +88,7 @@ feature launcher : none valgrind : composite ;
feature.compose <launcher>valgrind : <testing.launcher>"valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" <valgrind>on ;
test-suite libtorrent :
[ run test_utf8.cpp ]
[ run test_gzip.cpp ]
[ run test_bitfield.cpp ]
[ run test_torrent_info.cpp ]

View File

@ -42,7 +42,7 @@ int test_main()
{
std::vector<char> zipped;
error_code ec;
int r = load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
, ec.message().c_str());
TEST_CHECK(!ec);

128
test/test_utf8.cpp Normal file
View File

@ -0,0 +1,128 @@
/*
Copyright (c) 2014, Arvid Norberg
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include "test.hpp"
#include "libtorrent/utf8.hpp"
#include "libtorrent/ConvertUTF.h"
#include "setup_transfer.hpp" // for load_file
#include "file.hpp" // for combine_path
#include <vector>
using namespace libtorrent;
int test_main()
{
std::vector<char> utf8_source;
error_code ec;
load_file(combine_path("..", "utf8_test.txt"), utf8_source, ec, 1000000);
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
, ec.message().c_str());
TEST_CHECK(!ec);
// test lower level conversions
// utf8 -> utf16 -> utf32 -> utf8
{
std::vector<UTF16> utf16(utf8_source.size());
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
UTF16* out16 = &utf16[0];
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size()
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
std::vector<UTF32> utf32(utf8_source.size());
UTF16 const* in16 = &utf16[0];
UTF32* out32 = &utf32[0];
ret = ConvertUTF16toUTF32(&in16, out16
, &out32, out32 + utf32.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
std::vector<UTF8> utf8(utf8_source.size());
UTF32 const* in32 = &utf32[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF32toUTF8(&in32, out32
, &out8, out8 + utf8.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
}
// utf8 -> utf32 -> utf16 -> utf8
{
std::vector<UTF32> utf32(utf8_source.size());
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
UTF32* out32 = &utf32[0];
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size()
, &out32, out32 + utf32.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
std::vector<UTF16> utf16(utf8_source.size());
UTF32 const* in32 = &utf32[0];
UTF16* out16 = &utf16[0];
ret = ConvertUTF32toUTF16(&in32, out32
, &out16, out16 + utf16.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
std::vector<UTF8> utf8(utf8_source.size());
UTF16 const* in16 = &utf16[0];
UTF8* out8 = &utf8[0];
ret = ConvertUTF16toUTF8(&in16, out16
, &out8, out8 + utf8.size(), strictConversion);
TEST_EQUAL(ret, conversionOK);
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
}
// test higher level conversions
std::string utf8;
std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8));
std::wstring wide;
utf8_conv_result_t ret = utf8_wchar(utf8, wide);
TEST_EQUAL(ret, conversion_ok);
std::string identity;
ret = wchar_utf8(wide, identity);
TEST_EQUAL(ret, conversion_ok);
TEST_EQUAL(utf8, identity);
return 0;
}

150
test/utf8_test.txt Normal file
View File

@ -0,0 +1,150 @@
Sentences that contain all letters commonly used in a language
--------------------------------------------------------------
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2012-04-11
This is an example of a plain-text file encoded in UTF-8.
Danish (da)
---------
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
Wolther spillede på xylofon.
(= Quiz contestants were eating strawbery with cream while Wolther
the circus clown played on xylophone.)
German (de)
-----------
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
(= Wrongful practicing of xylophone music tortures every larger dwarf)
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
Heizölrückstoßabdämpfung
(= fuel oil recoil absorber)
(jqvwxy missing, but all non-ASCII letters in one word)
Greek (el)
----------
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
(= No more shall I see acacias or myrtles in the golden clearing)
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
(= I uncover the soul-destroying abhorrence)
English (en)
------------
The quick brown fox jumps over the lazy dog
Spanish (es)
------------
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
frío, añoraba a su querido cachorro.
(Contains every letter and every accent, but not every combination
of vowel + acute.)
French (fr)
-----------
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
qui lui permet de penser à la cænogenèse de l'être dont il est question
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
pense-t-il, diminue çà et là la qualité de son œuvre.
l'île exiguë
Où l'obèse jury mûr
Fête l'haï volapük,
Âne ex aéquo au whist,
Ôtez ce vœu déçu.
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
canoë au delà des îles, près du mälström où brûlent les novæ.
Irish Gaelic (ga)
-----------------
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
Hungarian (hu)
--------------
Árvíztűrő tükörfúrógép
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
Icelandic (is)
--------------
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
Sævör grét áðan því úlpan var ónýt
(some ASCII letters missing)
Japanese (jp)
-------------
Hiragana: (Iroha)
いろはにほへとちりぬるを
わかよたれそつねならむ
うゐのおくやまけふこえて
あさきゆめみしゑひもせす
Katakana:
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
Hebrew (iw)
-----------
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
Polish (pl)
-----------
Pchnąć w tę łódź jeża lub ośm skrzyń fig
(= To push a hedgehog or eight bins of figs in this boat)
Russian (ru)
------------
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
Съешь же ещё этих мягких французских булок да выпей чаю
(= Eat some more of these fresh French loafs and have some tea)
Thai (th)
---------
[--------------------------|------------------------]
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
[The copyright for the Thai example is owned by The Computer
Association of Thailand under the Royal Patronage of His Majesty the
King.]
Turkish (tr)
------------
Pijamalı hasta, yağız şoföre çabucak güvendi.
(=Patient with pajamas, trusted swarthy driver quickly)
Special thanks to the people from all over the world who contributed
these sentences since 1999.
A much larger collection of such pangrams is now available at
http://en.wikipedia.org/wiki/List_of_pangrams