diff options
author | Hans-Christoph Steiner <hans@eds.org> | 2013-08-13 15:43:01 -0400 |
---|---|---|
committer | Hans-Christoph Steiner <hans@eds.org> | 2013-08-13 15:43:01 -0400 |
commit | 4228998fd796fa2f9e84fb73632e0a07cc7cd188 (patch) | |
tree | 15b2336f351468fedd0c39e9de4ad905a686f3b0 /test/fts4unicode.test | |
parent | bdee7cf7d974b2f70d5934786c5666006e7360be (diff) | |
parent | 08119c361d1181b3e8f1abb429236e488a664753 (diff) |
Merge tag 'upstream/2.2.1'
Upstream version 2.2.1
# gpg: Signature made Tue 13 Aug 2013 03:42:56 PM EDT using RSA key ID 374BBE81
# gpg: Good signature from "Hans-Christoph Steiner <hans@at.or.at>"
# gpg: aka "[jpeg image of size 5408]"
# gpg: aka "Hans-Christoph Steiner <hs420@nyu.edu>"
# gpg: aka "Hans-Christoph Steiner <hans@eds.org>"
# gpg: aka "Hans-Christoph Steiner <hans@guardianproject.info>"
# gpg: aka "Hans-Christoph Steiner <hansi@nyu.edu>"
# gpg: aka "Hans-Christoph Steiner <hans@guardianproject.info>"
Diffstat (limited to 'test/fts4unicode.test')
-rw-r--r-- | test/fts4unicode.test | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/test/fts4unicode.test b/test/fts4unicode.test index 0ac60a6..8bd83f6 100644 --- a/test/fts4unicode.test +++ b/test/fts4unicode.test @@ -44,12 +44,12 @@ proc do_unicode_token_test3 {tn args} { } do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü} -do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx} +do_unicode_token_test 1.1 {Ä Ö Ãœ} {0 ä Ä 1 ö Ö 2 ü Ãœ} +do_unicode_token_test 1.2 {xÄx xÖx xÃœx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÃœx} # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" -do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" +do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E" do_unicode_token_test 1.6 "The quick brown fox" { @@ -60,12 +60,15 @@ do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" { } do_unicode_token_test2 1.8 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test2 1.9 {Ä Ö Ü} {0 a Ä 1 o Ö 2 u Ü} -do_unicode_token_test2 1.10 {xÄx xÖx xÜx} {0 xax xÄx 1 xox xÖx 2 xux xÜx} +do_unicode_token_test2 1.9 {Ä Ö Ãœ} {0 a Ä 1 o Ö 2 u Ãœ} +do_unicode_token_test2 1.10 {xÄx xÖx xÃœx} {0 xax xÄx 1 xox xÖx 2 xux xÃœx} # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. -do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" +do_unicode_token_test2 1.11 "xx\u0301xx" "0 xxxx xx\u301xx" + +# Title-case mappings work +do_unicode_token_test 1.12 "\u01c5" "0 \u01c6 \u01c5" #------------------------------------------------------------------------- # @@ -383,5 +386,3 @@ foreach T $tokenizers { finish_test - - |