From 08119c361d1181b3e8f1abb429236e488a664753 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Tue, 13 Aug 2013 15:42:54 -0400 Subject: Imported Upstream version 2.2.1 --- test/fts4unicode.test | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'test/fts4unicode.test') diff --git a/test/fts4unicode.test b/test/fts4unicode.test index 0ac60a6..8bd83f6 100644 --- a/test/fts4unicode.test +++ b/test/fts4unicode.test @@ -44,12 +44,12 @@ proc do_unicode_token_test3 {tn args} { } do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü} -do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx} +do_unicode_token_test 1.1 {Ä Ö Ãœ} {0 ä Ä 1 ö Ö 2 ü Ãœ} +do_unicode_token_test 1.2 {xÄx xÖx xÃœx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÃœx} # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" -do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" +do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E" do_unicode_token_test 1.6 "The quick brown fox" { @@ -60,12 +60,15 @@ do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" { } do_unicode_token_test2 1.8 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test2 1.9 {Ä Ö Ü} {0 a Ä 1 o Ö 2 u Ü} -do_unicode_token_test2 1.10 {xÄx xÖx xÜx} {0 xax xÄx 1 xox xÖx 2 xux xÜx} +do_unicode_token_test2 1.9 {Ä Ö Ãœ} {0 a Ä 1 o Ö 2 u Ãœ} +do_unicode_token_test2 1.10 {xÄx xÖx xÃœx} {0 xax xÄx 1 xox xÖx 2 xux xÃœx} # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. -do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" +do_unicode_token_test2 1.11 "xx\u0301xx" "0 xxxx xx\u301xx" + +# Title-case mappings work +do_unicode_token_test 1.12 "\u01c5" "0 \u01c6 \u01c5" #------------------------------------------------------------------------- # @@ -383,5 +386,3 @@ foreach T $tokenizers { finish_test - - -- cgit v1.2.3