1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
# 2011 January 19
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library. The focus of the tests
# in this file is the use of the sqlite_stat4 histogram data on tables
# with many repeated values and only a few distinct values.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !stat4&&!stat3 {
finish_test
return
}
set testprefix analyze5
proc eqp {sql {db db}} {
uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}
proc alpha {blob} {
set ret ""
foreach c [split $blob {}] {
if {[string is alpha $c]} {append ret $c}
}
return $ret
}
db func alpha alpha
db func lindex lindex
unset -nocomplain i t u v w x y z
do_test analyze5-1.0 {
db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
for {set i 0} {$i < 1000} {incr i} {
set y [expr {$i>=25 && $i<=50}]
set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
set x $z
set w $z
set t [expr {$z+0.5}]
switch $z {
0 {set u "alpha"; unset x}
1 {set u "bravo"}
2 {set u "charlie"}
3 {set u "delta"; unset w}
}
if {$i%2} {set v $u} {set v [string toupper $u]}
db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
}
db eval {
CREATE INDEX t1t ON t1(t); -- 0.5, 1.5, 2.5, and 3.5
CREATE INDEX t1u ON t1(u); -- text
CREATE INDEX t1v ON t1(v); -- mixed case text
CREATE INDEX t1w ON t1(w); -- integers 0, 1, 2 and a few NULLs
CREATE INDEX t1x ON t1(x); -- integers 1, 2, 3 and many NULLs
CREATE INDEX t1y ON t1(y); -- integers 0 and very few 1s
CREATE INDEX t1z ON t1(z); -- integers 0, 1, 2, and 3
ANALYZE;
}
ifcapable stat4 {
db eval {
SELECT DISTINCT lindex(test_decode(sample),0)
FROM sqlite_stat4 WHERE idx='t1u' ORDER BY nlt;
}
} else {
db eval {
SELECT sample FROM sqlite_stat3 WHERE idx='t1u' ORDER BY nlt;
}
}
} {alpha bravo charlie delta}
do_test analyze5-1.1 {
ifcapable stat4 {
db eval {
SELECT DISTINCT lower(lindex(test_decode(sample), 0))
FROM sqlite_stat4 WHERE idx='t1v' ORDER BY 1
}
} else {
db eval {
SELECT lower(sample) FROM sqlite_stat3 WHERE idx='t1v' ORDER BY 1
}
}
} {alpha bravo charlie delta}
ifcapable stat4 {
do_test analyze5-1.2 {
db eval {SELECT idx, count(*) FROM sqlite_stat4 GROUP BY 1 ORDER BY 1}
} {t1t 8 t1u 8 t1v 8 t1w 8 t1x 8 t1y 9 t1z 8}
} else {
do_test analyze5-1.2 {
db eval {SELECT idx, count(*) FROM sqlite_stat3 GROUP BY 1 ORDER BY 1}
} {t1t 4 t1u 4 t1v 4 t1w 4 t1x 4 t1y 2 t1z 4}
}
# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
1 {z>=0 AND z<=0} t1z 400
2 {z>=1 AND z<=1} t1z 300
3 {z>=2 AND z<=2} t1z 175
4 {z>=3 AND z<=3} t1z 125
5 {z>=4 AND z<=4} t1z 1
6 {z>=-1 AND z<=-1} t1z 1
7 {z>1 AND z<3} t1z 175
8 {z>0 AND z<100} t1z 600
9 {z>=1 AND z<100} t1z 600
10 {z>1 AND z<100} t1z 300
11 {z>=2 AND z<100} t1z 300
12 {z>2 AND z<100} t1z 125
13 {z>=3 AND z<100} t1z 125
14 {z>3 AND z<100} t1z 1
15 {z>=4 AND z<100} t1z 1
16 {z>=-100 AND z<=-1} t1z 1
17 {z>=-100 AND z<=0} t1z 400
18 {z>=-100 AND z<0} t1z 1
19 {z>=-100 AND z<=1} t1z 700
20 {z>=-100 AND z<2} t1z 700
21 {z>=-100 AND z<=2} t1z 875
22 {z>=-100 AND z<3} t1z 875
31 {z>=0.0 AND z<=0.0} t1z 400
32 {z>=1.0 AND z<=1.0} t1z 300
33 {z>=2.0 AND z<=2.0} t1z 175
34 {z>=3.0 AND z<=3.0} t1z 125
35 {z>=4.0 AND z<=4.0} t1z 1
36 {z>=-1.0 AND z<=-1.0} t1z 1
37 {z>1.5 AND z<3.0} t1z 174
38 {z>0.5 AND z<100} t1z 599
39 {z>=1.0 AND z<100} t1z 600
40 {z>1.5 AND z<100} t1z 299
41 {z>=2.0 AND z<100} t1z 300
42 {z>2.1 AND z<100} t1z 124
43 {z>=3.0 AND z<100} t1z 125
44 {z>3.2 AND z<100} t1z 1
45 {z>=4.0 AND z<100} t1z 1
46 {z>=-100 AND z<=-1.0} t1z 1
47 {z>=-100 AND z<=0.0} t1z 400
48 {z>=-100 AND z<0.0} t1z 1
49 {z>=-100 AND z<=1.0} t1z 700
50 {z>=-100 AND z<2.0} t1z 700
51 {z>=-100 AND z<=2.0} t1z 875
52 {z>=-100 AND z<3.0} t1z 875
101 {z=-1} t1z 1
102 {z=0} t1z 400
103 {z=1} t1z 300
104 {z=2} t1z 175
105 {z=3} t1z 125
106 {z=4} t1z 1
107 {z=-10.0} t1z 1
108 {z=0.0} t1z 400
109 {z=1.0} t1z 300
110 {z=2.0} t1z 175
111 {z=3.0} t1z 125
112 {z=4.0} t1z 1
113 {z=1.5} t1z 1
114 {z=2.5} t1z 1
201 {z IN (-1)} t1z 1
202 {z IN (0)} t1z 400
203 {z IN (1)} t1z 300
204 {z IN (2)} t1z 175
205 {z IN (3)} t1z 125
206 {z IN (4)} t1z 1
207 {z IN (0.5)} t1z 1
208 {z IN (0,1)} t1z 700
209 {z IN (0,1,2)} t1z 875
210 {z IN (0,1,2,3)} {} 100
211 {z IN (0,1,2,3,4,5)} {} 100
212 {z IN (1,2)} t1z 475
213 {z IN (2,3)} t1z 300
214 {z=3 OR z=2} t1z 300
215 {z IN (-1,3)} t1z 126
216 {z=-1 OR z=3} t1z 126
300 {y=0} t1y 974
301 {y=1} t1y 26
302 {y=0.1} t1y 1
400 {x IS NULL} t1x 400
} {
# Verify that the expected index is used with the expected row count
# No longer valid due to an EXPLAIN QUERY PLAN output format change
# do_test analyze5-1.${testid}a {
# set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
# set idx {}
# regexp {INDEX (t1.) } $x all idx
# regexp {~([0-9]+) rows} $x all nrow
# list $idx $nrow
# } [list $index $rows]
# Verify that the same result is achieved regardless of whether or not
# the index is used
do_test analyze5-1.${testid}b {
set w2 [string map {y +y z +z} $where]
set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
ORDER BY +rowid"]
set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
if {$a1==$a2} {
set res ok
} else {
set res "a1=\[$a1\] a2=\[$a2\]"
}
set res
} {ok}
}
# Increase the number of NULLs in column x
#
db eval {
UPDATE t1 SET x=NULL;
UPDATE t1 SET x=rowid
WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
ANALYZE;
}
# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
500 {x IS NULL AND u='charlie'} t1u 17
501 {x=1 AND u='charlie'} t1x 1
502 {x IS NULL} t1x 995
503 {x=1} t1x 1
504 {x IS NOT NULL} t1x 2
505 {+x IS NOT NULL} {} 500
506 {upper(x) IS NOT NULL} {} 500
} {
# Verify that the expected index is used with the expected row count
# No longer valid due to an EXPLAIN QUERY PLAN format change
# do_test analyze5-1.${testid}a {
# set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
# set idx {}
# regexp {INDEX (t1.) } $x all idx
# regexp {~([0-9]+) rows} $x all nrow
# list $idx $nrow
# } [list $index $rows]
# Verify that the same result is achieved regardless of whether or not
# the index is used
do_test analyze5-1.${testid}b {
set w2 [string map {y +y z +z} $where]
set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
ORDER BY +rowid"]
set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
if {$a1==$a2} {
set res ok
} else {
set res "a1=\[$a1\] a2=\[$a2\]"
}
set res
} {ok}
}
finish_test
|