add utf8 unit test
This commit is contained in:
parent
0ed91e248f
commit
a52aadc713
|
@ -88,6 +88,7 @@ feature launcher : none valgrind : composite ;
|
||||||
feature.compose <launcher>valgrind : <testing.launcher>"valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" <valgrind>on ;
|
feature.compose <launcher>valgrind : <testing.launcher>"valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" <valgrind>on ;
|
||||||
|
|
||||||
test-suite libtorrent :
|
test-suite libtorrent :
|
||||||
|
[ run test_utf8.cpp ]
|
||||||
[ run test_gzip.cpp ]
|
[ run test_gzip.cpp ]
|
||||||
[ run test_bitfield.cpp ]
|
[ run test_bitfield.cpp ]
|
||||||
[ run test_torrent_info.cpp ]
|
[ run test_torrent_info.cpp ]
|
||||||
|
|
|
@ -42,7 +42,7 @@ int test_main()
|
||||||
{
|
{
|
||||||
std::vector<char> zipped;
|
std::vector<char> zipped;
|
||||||
error_code ec;
|
error_code ec;
|
||||||
int r = load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
|
load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
|
||||||
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
|
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
|
||||||
, ec.message().c_str());
|
, ec.message().c_str());
|
||||||
TEST_CHECK(!ec);
|
TEST_CHECK(!ec);
|
||||||
|
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
|
||||||
|
Copyright (c) 2014, Arvid Norberg
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the author nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "test.hpp"
|
||||||
|
#include "libtorrent/utf8.hpp"
|
||||||
|
#include "libtorrent/ConvertUTF.h"
|
||||||
|
#include "setup_transfer.hpp" // for load_file
|
||||||
|
#include "file.hpp" // for combine_path
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace libtorrent;
|
||||||
|
|
||||||
|
int test_main()
|
||||||
|
{
|
||||||
|
std::vector<char> utf8_source;
|
||||||
|
error_code ec;
|
||||||
|
load_file(combine_path("..", "utf8_test.txt"), utf8_source, ec, 1000000);
|
||||||
|
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
|
||||||
|
, ec.message().c_str());
|
||||||
|
TEST_CHECK(!ec);
|
||||||
|
|
||||||
|
// test lower level conversions
|
||||||
|
|
||||||
|
// utf8 -> utf16 -> utf32 -> utf8
|
||||||
|
{
|
||||||
|
std::vector<UTF16> utf16(utf8_source.size());
|
||||||
|
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
|
||||||
|
UTF16* out16 = &utf16[0];
|
||||||
|
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size()
|
||||||
|
, &out16, out16 + utf16.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
|
||||||
|
std::vector<UTF32> utf32(utf8_source.size());
|
||||||
|
UTF16 const* in16 = &utf16[0];
|
||||||
|
UTF32* out32 = &utf32[0];
|
||||||
|
ret = ConvertUTF16toUTF32(&in16, out16
|
||||||
|
, &out32, out32 + utf32.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
|
||||||
|
std::vector<UTF8> utf8(utf8_source.size());
|
||||||
|
UTF32 const* in32 = &utf32[0];
|
||||||
|
UTF8* out8 = &utf8[0];
|
||||||
|
ret = ConvertUTF32toUTF8(&in32, out32
|
||||||
|
, &out8, out8 + utf8.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
|
||||||
|
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// utf8 -> utf32 -> utf16 -> utf8
|
||||||
|
{
|
||||||
|
std::vector<UTF32> utf32(utf8_source.size());
|
||||||
|
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
|
||||||
|
UTF32* out32 = &utf32[0];
|
||||||
|
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size()
|
||||||
|
, &out32, out32 + utf32.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
|
||||||
|
std::vector<UTF16> utf16(utf8_source.size());
|
||||||
|
UTF32 const* in32 = &utf32[0];
|
||||||
|
UTF16* out16 = &utf16[0];
|
||||||
|
ret = ConvertUTF32toUTF16(&in32, out32
|
||||||
|
, &out16, out16 + utf16.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
|
||||||
|
std::vector<UTF8> utf8(utf8_source.size());
|
||||||
|
UTF16 const* in16 = &utf16[0];
|
||||||
|
UTF8* out8 = &utf8[0];
|
||||||
|
ret = ConvertUTF16toUTF8(&in16, out16
|
||||||
|
, &out8, out8 + utf8.size(), strictConversion);
|
||||||
|
|
||||||
|
TEST_EQUAL(ret, conversionOK);
|
||||||
|
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
|
||||||
|
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// test higher level conversions
|
||||||
|
|
||||||
|
std::string utf8;
|
||||||
|
std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8));
|
||||||
|
|
||||||
|
std::wstring wide;
|
||||||
|
utf8_conv_result_t ret = utf8_wchar(utf8, wide);
|
||||||
|
TEST_EQUAL(ret, conversion_ok);
|
||||||
|
|
||||||
|
std::string identity;
|
||||||
|
ret = wchar_utf8(wide, identity);
|
||||||
|
TEST_EQUAL(ret, conversion_ok);
|
||||||
|
|
||||||
|
TEST_EQUAL(utf8, identity);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,150 @@
|
||||||
|
Sentences that contain all letters commonly used in a language
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2012-04-11
|
||||||
|
|
||||||
|
This is an example of a plain-text file encoded in UTF-8.
|
||||||
|
|
||||||
|
|
||||||
|
Danish (da)
|
||||||
|
---------
|
||||||
|
|
||||||
|
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
|
||||||
|
Wolther spillede på xylofon.
|
||||||
|
(= Quiz contestants were eating strawbery with cream while Wolther
|
||||||
|
the circus clown played on xylophone.)
|
||||||
|
|
||||||
|
German (de)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||||
|
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||||
|
|
||||||
|
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||||
|
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||||
|
|
||||||
|
Heizölrückstoßabdämpfung
|
||||||
|
(= fuel oil recoil absorber)
|
||||||
|
(jqvwxy missing, but all non-ASCII letters in one word)
|
||||||
|
|
||||||
|
Greek (el)
|
||||||
|
----------
|
||||||
|
|
||||||
|
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
|
||||||
|
(= No more shall I see acacias or myrtles in the golden clearing)
|
||||||
|
|
||||||
|
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
|
||||||
|
(= I uncover the soul-destroying abhorrence)
|
||||||
|
|
||||||
|
English (en)
|
||||||
|
------------
|
||||||
|
|
||||||
|
The quick brown fox jumps over the lazy dog
|
||||||
|
|
||||||
|
Spanish (es)
|
||||||
|
------------
|
||||||
|
|
||||||
|
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
|
||||||
|
frío, añoraba a su querido cachorro.
|
||||||
|
(Contains every letter and every accent, but not every combination
|
||||||
|
of vowel + acute.)
|
||||||
|
|
||||||
|
French (fr)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||||
|
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
|
||||||
|
qui lui permet de penser à la cænogenèse de l'être dont il est question
|
||||||
|
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
|
||||||
|
pense-t-il, diminue çà et là la qualité de son œuvre.
|
||||||
|
|
||||||
|
l'île exiguë
|
||||||
|
Où l'obèse jury mûr
|
||||||
|
Fête l'haï volapük,
|
||||||
|
Âne ex aéquo au whist,
|
||||||
|
Ôtez ce vœu déçu.
|
||||||
|
|
||||||
|
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||||
|
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||||
|
|
||||||
|
Irish Gaelic (ga)
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||||
|
|
||||||
|
Hungarian (hu)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Árvíztűrő tükörfúrógép
|
||||||
|
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
|
||||||
|
|
||||||
|
Icelandic (is)
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||||
|
|
||||||
|
Sævör grét áðan því úlpan var ónýt
|
||||||
|
(some ASCII letters missing)
|
||||||
|
|
||||||
|
Japanese (jp)
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Hiragana: (Iroha)
|
||||||
|
|
||||||
|
いろはにほへとちりぬるを
|
||||||
|
わかよたれそつねならむ
|
||||||
|
うゐのおくやまけふこえて
|
||||||
|
あさきゆめみしゑひもせす
|
||||||
|
|
||||||
|
Katakana:
|
||||||
|
|
||||||
|
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
|
||||||
|
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
|
||||||
|
|
||||||
|
Hebrew (iw)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
|
||||||
|
|
||||||
|
Polish (pl)
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||||
|
(= To push a hedgehog or eight bins of figs in this boat)
|
||||||
|
|
||||||
|
Russian (ru)
|
||||||
|
------------
|
||||||
|
|
||||||
|
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||||
|
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
|
||||||
|
|
||||||
|
Съешь же ещё этих мягких французских булок да выпей чаю
|
||||||
|
(= Eat some more of these fresh French loafs and have some tea)
|
||||||
|
|
||||||
|
Thai (th)
|
||||||
|
---------
|
||||||
|
|
||||||
|
[--------------------------|------------------------]
|
||||||
|
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
|
||||||
|
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
|
||||||
|
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
|
||||||
|
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
|
||||||
|
|
||||||
|
[The copyright for the Thai example is owned by The Computer
|
||||||
|
Association of Thailand under the Royal Patronage of His Majesty the
|
||||||
|
King.]
|
||||||
|
|
||||||
|
Turkish (tr)
|
||||||
|
------------
|
||||||
|
|
||||||
|
Pijamalı hasta, yağız şoföre çabucak güvendi.
|
||||||
|
(=Patient with pajamas, trusted swarthy driver quickly)
|
||||||
|
|
||||||
|
|
||||||
|
Special thanks to the people from all over the world who contributed
|
||||||
|
these sentences since 1999.
|
||||||
|
|
||||||
|
A much larger collection of such pangrams is now available at
|
||||||
|
|
||||||
|
http://en.wikipedia.org/wiki/List_of_pangrams
|
||||||
|
|
Loading…
Reference in New Issue