diff --git a/test/Jamfile b/test/Jamfile index bcc153e1b..1b7070731 100644 --- a/test/Jamfile +++ b/test/Jamfile @@ -88,6 +88,7 @@ feature launcher : none valgrind : composite ; feature.compose valgrind : "valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" on ; test-suite libtorrent : + [ run test_utf8.cpp ] [ run test_gzip.cpp ] [ run test_bitfield.cpp ] [ run test_torrent_info.cpp ] diff --git a/test/test_gzip.cpp b/test/test_gzip.cpp index 940c552bb..857612bdc 100644 --- a/test/test_gzip.cpp +++ b/test/test_gzip.cpp @@ -42,7 +42,7 @@ int test_main() { std::vector zipped; error_code ec; - int r = load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000); + load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000); if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value() , ec.message().c_str()); TEST_CHECK(!ec); diff --git a/test/test_utf8.cpp b/test/test_utf8.cpp new file mode 100644 index 000000000..f8fbad1d9 --- /dev/null +++ b/test/test_utf8.cpp @@ -0,0 +1,128 @@ +/* + +Copyright (c) 2014, Arvid Norberg +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "test.hpp" +#include "libtorrent/utf8.hpp" +#include "libtorrent/ConvertUTF.h" +#include "setup_transfer.hpp" // for load_file +#include "file.hpp" // for combine_path + +#include + +using namespace libtorrent; + +int test_main() +{ + std::vector utf8_source; + error_code ec; + load_file(combine_path("..", "utf8_test.txt"), utf8_source, ec, 1000000); + if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value() + , ec.message().c_str()); + TEST_CHECK(!ec); + + // test lower level conversions + + // utf8 -> utf16 -> utf32 -> utf8 + { + std::vector utf16(utf8_source.size()); + UTF8 const* in8 = (UTF8 const*)&utf8_source[0]; + UTF16* out16 = &utf16[0]; + ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size() + , &out16, out16 + utf16.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + + std::vector utf32(utf8_source.size()); + UTF16 const* in16 = &utf16[0]; + UTF32* out32 = &utf32[0]; + ret = ConvertUTF16toUTF32(&in16, out16 + , &out32, out32 + utf32.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + + std::vector utf8(utf8_source.size()); + UTF32 const* in32 = &utf32[0]; + UTF8* out8 = &utf8[0]; + ret = ConvertUTF32toUTF8(&in32, out32 + , &out8, out8 + utf8.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + TEST_EQUAL(out8 - &utf8[0], utf8_source.size()); + TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0])); + } + + // utf8 -> utf32 -> utf16 -> utf8 + { + std::vector utf32(utf8_source.size()); + UTF8 const* in8 = (UTF8 const*)&utf8_source[0]; + UTF32* out32 = &utf32[0]; + ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size() + , &out32, out32 + utf32.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + + std::vector utf16(utf8_source.size()); + UTF32 const* in32 = &utf32[0]; + UTF16* out16 = &utf16[0]; + ret = ConvertUTF32toUTF16(&in32, out32 + , &out16, out16 + utf16.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + + std::vector utf8(utf8_source.size()); + UTF16 const* in16 = &utf16[0]; + UTF8* out8 = &utf8[0]; + ret = ConvertUTF16toUTF8(&in16, out16 + , &out8, out8 + utf8.size(), strictConversion); + + TEST_EQUAL(ret, conversionOK); + TEST_EQUAL(out8 - &utf8[0], utf8_source.size()); + TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0])); + } + + // test higher level conversions + + std::string utf8; + std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8)); + + std::wstring wide; + utf8_conv_result_t ret = utf8_wchar(utf8, wide); + TEST_EQUAL(ret, conversion_ok); + + std::string identity; + ret = wchar_utf8(wide, identity); + TEST_EQUAL(ret, conversion_ok); + + TEST_EQUAL(utf8, identity); + return 0; +} + diff --git a/test/utf8_test.txt b/test/utf8_test.txt new file mode 100644 index 000000000..81e850f2b --- /dev/null +++ b/test/utf8_test.txt @@ -0,0 +1,150 @@ +Sentences that contain all letters commonly used in a language +-------------------------------------------------------------- + +Markus Kuhn -- 2012-04-11 + +This is an example of a plain-text file encoded in UTF-8. + + +Danish (da) +--------- + + Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen + Wolther spillede på xylofon. + (= Quiz contestants were eating strawbery with cream while Wolther + the circus clown played on xylophone.) + +German (de) +----------- + + Falsches Üben von Xylophonmusik quält jeden größeren Zwerg + (= Wrongful practicing of xylophone music tortures every larger dwarf) + + Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich + (= Twelve boxing fighters hunted Eva across the dike of Sylt) + + Heizölrückstoßabdämpfung + (= fuel oil recoil absorber) + (jqvwxy missing, but all non-ASCII letters in one word) + +Greek (el) +---------- + + Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο + (= No more shall I see acacias or myrtles in the golden clearing) + + Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία + (= I uncover the soul-destroying abhorrence) + +English (en) +------------ + + The quick brown fox jumps over the lazy dog + +Spanish (es) +------------ + + El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y + frío, añoraba a su querido cachorro. + (Contains every letter and every accent, but not every combination + of vowel + acute.) + +French (fr) +----------- + + Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à + côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce + qui lui permet de penser à la cænogenèse de l'être dont il est question + dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui, + pense-t-il, diminue çà et là la qualité de son œuvre. + + l'île exiguë + Où l'obèse jury mûr + Fête l'haï volapük, + Âne ex aéquo au whist, + Ôtez ce vœu déçu. + + Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en + canoë au delà des îles, près du mälström où brûlent les novæ. + +Irish Gaelic (ga) +----------------- + + D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh + +Hungarian (hu) +-------------- + + Árvíztűrő tükörfúrógép + (= flood-proof mirror-drilling machine, only all non-ASCII letters) + +Icelandic (is) +-------------- + + Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa + + Sævör grét áðan því úlpan var ónýt + (some ASCII letters missing) + +Japanese (jp) +------------- + + Hiragana: (Iroha) + + いろはにほへとちりぬるを + わかよたれそつねならむ + うゐのおくやまけふこえて + あさきゆめみしゑひもせす + + Katakana: + + イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム + ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン + +Hebrew (iw) +----------- + + ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה + +Polish (pl) +----------- + + Pchnąć w tę łódź jeża lub ośm skrzyń fig + (= To push a hedgehog or eight bins of figs in this boat) + +Russian (ru) +------------ + + В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! + (= Would a citrus live in the bushes of south? Yes, but only a fake one!) + + Съешь же ещё этих мягких французских булок да выпей чаю + (= Eat some more of these fresh French loafs and have some tea) + +Thai (th) +--------- + + [--------------------------|------------------------] + ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน + จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร + ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย + ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ + + [The copyright for the Thai example is owned by The Computer + Association of Thailand under the Royal Patronage of His Majesty the + King.] + +Turkish (tr) +------------ + + Pijamalı hasta, yağız şoföre çabucak güvendi. + (=Patient with pajamas, trusted swarthy driver quickly) + + +Special thanks to the people from all over the world who contributed +these sentences since 1999. + +A much larger collection of such pangrams is now available at + + http://en.wikipedia.org/wiki/List_of_pangrams +