add utf8 unit test
This commit is contained in:
parent
0ed91e248f
commit
a52aadc713
|
@ -88,6 +88,7 @@ feature launcher : none valgrind : composite ;
|
|||
feature.compose <launcher>valgrind : <testing.launcher>"valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" <valgrind>on ;
|
||||
|
||||
test-suite libtorrent :
|
||||
[ run test_utf8.cpp ]
|
||||
[ run test_gzip.cpp ]
|
||||
[ run test_bitfield.cpp ]
|
||||
[ run test_torrent_info.cpp ]
|
||||
|
|
|
@ -42,7 +42,7 @@ int test_main()
|
|||
{
|
||||
std::vector<char> zipped;
|
||||
error_code ec;
|
||||
int r = load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
|
||||
load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
|
||||
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
|
||||
, ec.message().c_str());
|
||||
TEST_CHECK(!ec);
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
|
||||
Copyright (c) 2014, Arvid Norberg
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the author nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "test.hpp"
|
||||
#include "libtorrent/utf8.hpp"
|
||||
#include "libtorrent/ConvertUTF.h"
|
||||
#include "setup_transfer.hpp" // for load_file
|
||||
#include "file.hpp" // for combine_path
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace libtorrent;
|
||||
|
||||
int test_main()
|
||||
{
|
||||
std::vector<char> utf8_source;
|
||||
error_code ec;
|
||||
load_file(combine_path("..", "utf8_test.txt"), utf8_source, ec, 1000000);
|
||||
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
|
||||
, ec.message().c_str());
|
||||
TEST_CHECK(!ec);
|
||||
|
||||
// test lower level conversions
|
||||
|
||||
// utf8 -> utf16 -> utf32 -> utf8
|
||||
{
|
||||
std::vector<UTF16> utf16(utf8_source.size());
|
||||
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
|
||||
UTF16* out16 = &utf16[0];
|
||||
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size()
|
||||
, &out16, out16 + utf16.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
|
||||
std::vector<UTF32> utf32(utf8_source.size());
|
||||
UTF16 const* in16 = &utf16[0];
|
||||
UTF32* out32 = &utf32[0];
|
||||
ret = ConvertUTF16toUTF32(&in16, out16
|
||||
, &out32, out32 + utf32.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
|
||||
std::vector<UTF8> utf8(utf8_source.size());
|
||||
UTF32 const* in32 = &utf32[0];
|
||||
UTF8* out8 = &utf8[0];
|
||||
ret = ConvertUTF32toUTF8(&in32, out32
|
||||
, &out8, out8 + utf8.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
|
||||
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
|
||||
}
|
||||
|
||||
// utf8 -> utf32 -> utf16 -> utf8
|
||||
{
|
||||
std::vector<UTF32> utf32(utf8_source.size());
|
||||
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
|
||||
UTF32* out32 = &utf32[0];
|
||||
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size()
|
||||
, &out32, out32 + utf32.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
|
||||
std::vector<UTF16> utf16(utf8_source.size());
|
||||
UTF32 const* in32 = &utf32[0];
|
||||
UTF16* out16 = &utf16[0];
|
||||
ret = ConvertUTF32toUTF16(&in32, out32
|
||||
, &out16, out16 + utf16.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
|
||||
std::vector<UTF8> utf8(utf8_source.size());
|
||||
UTF16 const* in16 = &utf16[0];
|
||||
UTF8* out8 = &utf8[0];
|
||||
ret = ConvertUTF16toUTF8(&in16, out16
|
||||
, &out8, out8 + utf8.size(), strictConversion);
|
||||
|
||||
TEST_EQUAL(ret, conversionOK);
|
||||
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
|
||||
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
|
||||
}
|
||||
|
||||
// test higher level conversions
|
||||
|
||||
std::string utf8;
|
||||
std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8));
|
||||
|
||||
std::wstring wide;
|
||||
utf8_conv_result_t ret = utf8_wchar(utf8, wide);
|
||||
TEST_EQUAL(ret, conversion_ok);
|
||||
|
||||
std::string identity;
|
||||
ret = wchar_utf8(wide, identity);
|
||||
TEST_EQUAL(ret, conversion_ok);
|
||||
|
||||
TEST_EQUAL(utf8, identity);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
Sentences that contain all letters commonly used in a language
|
||||
--------------------------------------------------------------
|
||||
|
||||
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2012-04-11
|
||||
|
||||
This is an example of a plain-text file encoded in UTF-8.
|
||||
|
||||
|
||||
Danish (da)
|
||||
---------
|
||||
|
||||
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
|
||||
Wolther spillede på xylofon.
|
||||
(= Quiz contestants were eating strawbery with cream while Wolther
|
||||
the circus clown played on xylophone.)
|
||||
|
||||
German (de)
|
||||
-----------
|
||||
|
||||
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
|
||||
(= Wrongful practicing of xylophone music tortures every larger dwarf)
|
||||
|
||||
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
|
||||
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
|
||||
|
||||
Heizölrückstoßabdämpfung
|
||||
(= fuel oil recoil absorber)
|
||||
(jqvwxy missing, but all non-ASCII letters in one word)
|
||||
|
||||
Greek (el)
|
||||
----------
|
||||
|
||||
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
|
||||
(= No more shall I see acacias or myrtles in the golden clearing)
|
||||
|
||||
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
|
||||
(= I uncover the soul-destroying abhorrence)
|
||||
|
||||
English (en)
|
||||
------------
|
||||
|
||||
The quick brown fox jumps over the lazy dog
|
||||
|
||||
Spanish (es)
|
||||
------------
|
||||
|
||||
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
|
||||
frío, añoraba a su querido cachorro.
|
||||
(Contains every letter and every accent, but not every combination
|
||||
of vowel + acute.)
|
||||
|
||||
French (fr)
|
||||
-----------
|
||||
|
||||
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
|
||||
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
|
||||
qui lui permet de penser à la cænogenèse de l'être dont il est question
|
||||
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
|
||||
pense-t-il, diminue çà et là la qualité de son œuvre.
|
||||
|
||||
l'île exiguë
|
||||
Où l'obèse jury mûr
|
||||
Fête l'haï volapük,
|
||||
Âne ex aéquo au whist,
|
||||
Ôtez ce vœu déçu.
|
||||
|
||||
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
|
||||
canoë au delà des îles, près du mälström où brûlent les novæ.
|
||||
|
||||
Irish Gaelic (ga)
|
||||
-----------------
|
||||
|
||||
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
|
||||
|
||||
Hungarian (hu)
|
||||
--------------
|
||||
|
||||
Árvíztűrő tükörfúrógép
|
||||
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
|
||||
|
||||
Icelandic (is)
|
||||
--------------
|
||||
|
||||
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
|
||||
|
||||
Sævör grét áðan því úlpan var ónýt
|
||||
(some ASCII letters missing)
|
||||
|
||||
Japanese (jp)
|
||||
-------------
|
||||
|
||||
Hiragana: (Iroha)
|
||||
|
||||
いろはにほへとちりぬるを
|
||||
わかよたれそつねならむ
|
||||
うゐのおくやまけふこえて
|
||||
あさきゆめみしゑひもせす
|
||||
|
||||
Katakana:
|
||||
|
||||
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
|
||||
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
|
||||
|
||||
Hebrew (iw)
|
||||
-----------
|
||||
|
||||
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
|
||||
|
||||
Polish (pl)
|
||||
-----------
|
||||
|
||||
Pchnąć w tę łódź jeża lub ośm skrzyń fig
|
||||
(= To push a hedgehog or eight bins of figs in this boat)
|
||||
|
||||
Russian (ru)
|
||||
------------
|
||||
|
||||
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
|
||||
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
|
||||
|
||||
Съешь же ещё этих мягких французских булок да выпей чаю
|
||||
(= Eat some more of these fresh French loafs and have some tea)
|
||||
|
||||
Thai (th)
|
||||
---------
|
||||
|
||||
[--------------------------|------------------------]
|
||||
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
|
||||
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
|
||||
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
|
||||
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
|
||||
|
||||
[The copyright for the Thai example is owned by The Computer
|
||||
Association of Thailand under the Royal Patronage of His Majesty the
|
||||
King.]
|
||||
|
||||
Turkish (tr)
|
||||
------------
|
||||
|
||||
Pijamalı hasta, yağız şoföre çabucak güvendi.
|
||||
(=Patient with pajamas, trusted swarthy driver quickly)
|
||||
|
||||
|
||||
Special thanks to the people from all over the world who contributed
|
||||
these sentences since 1999.
|
||||
|
||||
A much larger collection of such pangrams is now available at
|
||||
|
||||
http://en.wikipedia.org/wiki/List_of_pangrams
|
||||
|
Loading…
Reference in New Issue