summaryrefslogtreecommitdiff
path: root/ext/fts3
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts3')
-rw-r--r--ext/fts3/fts3.c759
-rw-r--r--ext/fts3/fts3Int.h46
-rw-r--r--ext/fts3/fts3_aux.c129
-rw-r--r--ext/fts3/fts3_expr.c221
-rw-r--r--ext/fts3/fts3_hash.c4
-rw-r--r--ext/fts3/fts3_porter.c74
-rw-r--r--ext/fts3/fts3_snippet.c3
-rw-r--r--ext/fts3/fts3_test.c49
-rw-r--r--ext/fts3/fts3_unicode.c16
-rw-r--r--ext/fts3/fts3_unicode2.c53
-rw-r--r--ext/fts3/fts3_write.c506
-rw-r--r--ext/fts3/tool/fts3view.c15
-rw-r--r--ext/fts3/unicode/mkunicode.tcl15
13 files changed, 1378 insertions, 512 deletions
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c
index c00a13f..4f4b667 100644
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -330,21 +330,37 @@ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
return (int) (q - (unsigned char *)p);
}
+#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \
+ v = (v & mask1) | ( (*ptr++) << shift ); \
+ if( (v & mask2)==0 ){ var = v; return ret; }
+#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \
+ v = (*ptr++); \
+ if( (v & mask2)==0 ){ var = v; return ret; }
+
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read, or 0 on error.
** The value is stored in *v.
*/
int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
- const unsigned char *q = (const unsigned char *) p;
- sqlite_uint64 x = 0, y = 1;
- while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){
- x += y * (*q++ & 0x7f);
- y <<= 7;
- }
- x += y * (*q++);
- *v = (sqlite_int64) x;
- return (int) (q - (unsigned char *)p);
+ const char *pStart = p;
+ u32 a;
+ u64 b;
+ int shift;
+
+ GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1);
+ GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2);
+ GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3);
+ GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4);
+ b = (a & 0x0FFFFFFF );
+
+ for(shift=28; shift<=63; shift+=7){
+ u64 c = *p++;
+ b += (c&0x7F) << shift;
+ if( (c & 0x80)==0 ) break;
+ }
+ *v = b;
+ return (int)(p - pStart);
}
/*
@@ -352,10 +368,21 @@ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
** 32-bit integer before it is returned.
*/
int sqlite3Fts3GetVarint32(const char *p, int *pi){
- sqlite_int64 i;
- int ret = sqlite3Fts3GetVarint(p, &i);
- *pi = (int) i;
- return ret;
+ u32 a;
+
+#ifndef fts3GetVarint32
+ GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1);
+#else
+ a = (*p++);
+ assert( a & 0x80 );
+#endif
+
+ GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2);
+ GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3);
+ GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4);
+ a = (a & 0x0FFFFFFF );
+ *pi = (int)(a | ((u32)(*p & 0x0F) << 28));
+ return 5;
}
/*
@@ -1081,6 +1108,8 @@ static int fts3InitVtab(
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
char *zContent = 0; /* content=? parameter (or NULL) */
char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
+ char **azNotindexed = 0; /* The set of notindexed= columns */
+ int nNotindexed = 0; /* Size of azNotindexed[] array */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
@@ -1090,9 +1119,19 @@ static int fts3InitVtab(
nDb = (int)strlen(argv[1]) + 1;
nName = (int)strlen(argv[2]) + 1;
- aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
- if( !aCol ) return SQLITE_NOMEM;
- memset((void *)aCol, 0, sizeof(const char *) * (argc-2));
+ nByte = sizeof(const char *) * (argc-2);
+ aCol = (const char **)sqlite3_malloc(nByte);
+ if( aCol ){
+ memset((void*)aCol, 0, nByte);
+ azNotindexed = (char **)sqlite3_malloc(nByte);
+ }
+ if( azNotindexed ){
+ memset(azNotindexed, 0, nByte);
+ }
+ if( !aCol || !azNotindexed ){
+ rc = SQLITE_NOMEM;
+ goto fts3_init_out;
+ }
/* Loop through all of the arguments passed by the user to the FTS3/4
** module (i.e. all the column names and special arguments). This loop
@@ -1131,7 +1170,8 @@ static int fts3InitVtab(
{ "uncompress", 10 }, /* 3 -> UNCOMPRESS */
{ "order", 5 }, /* 4 -> ORDER */
{ "content", 7 }, /* 5 -> CONTENT */
- { "languageid", 10 } /* 6 -> LANGUAGEID */
+ { "languageid", 10 }, /* 6 -> LANGUAGEID */
+ { "notindexed", 10 } /* 7 -> NOTINDEXED */
};
int iOpt;
@@ -1197,6 +1237,11 @@ static int fts3InitVtab(
zLanguageid = zVal;
zVal = 0;
break;
+
+ case 7: /* NOTINDEXED */
+ azNotindexed[nNotindexed++] = zVal;
+ zVal = 0;
+ break;
}
}
sqlite3_free(zVal);
@@ -1268,6 +1313,7 @@ static int fts3InitVtab(
nByte = sizeof(Fts3Table) + /* Fts3Table */
nCol * sizeof(char *) + /* azColumn */
nIndex * sizeof(struct Fts3Index) + /* aIndex */
+ nCol * sizeof(u8) + /* abNotindexed */
nName + /* zName */
nDb + /* zDb */
nString; /* Space for azColumn strings */
@@ -1287,7 +1333,7 @@ static int fts3InitVtab(
p->bHasStat = isFts4;
p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
- p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
+ p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
@@ -1301,9 +1347,10 @@ static int fts3InitVtab(
for(i=0; i<nIndex; i++){
fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
}
+ p->abNotindexed = (u8 *)&p->aIndex[nIndex];
/* Fill in the zName and zDb fields of the vtab structure. */
- zCsr = (char *)&p->aIndex[nIndex];
+ zCsr = (char *)&p->abNotindexed[nCol];
p->zName = zCsr;
memcpy(zCsr, argv[2], nName);
zCsr += nName;
@@ -1324,7 +1371,28 @@ static int fts3InitVtab(
assert( zCsr <= &((char *)p)[nByte] );
}
- if( (zCompress==0)!=(zUncompress==0) ){
+ /* Fill in the abNotindexed array */
+ for(iCol=0; iCol<nCol; iCol++){
+ int n = (int)strlen(p->azColumn[iCol]);
+ for(i=0; i<nNotindexed; i++){
+ char *zNot = azNotindexed[i];
+ if( zNot && n==(int)strlen(zNot)
+ && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n)
+ ){
+ p->abNotindexed[iCol] = 1;
+ sqlite3_free(zNot);
+ azNotindexed[i] = 0;
+ }
+ }
+ }
+ for(i=0; i<nNotindexed; i++){
+ if( azNotindexed[i] ){
+ *pzErr = sqlite3_mprintf("no such column: %s", azNotindexed[i]);
+ rc = SQLITE_ERROR;
+ }
+ }
+
+ if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){
char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
rc = SQLITE_ERROR;
*pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
@@ -1344,10 +1412,7 @@ static int fts3InitVtab(
** addition of a %_stat table so that it can use incremental merge.
*/
if( !isFts4 && !isCreate ){
- int rc2 = SQLITE_OK;
- fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
- p->zDb, p->zName);
- if( rc2==SQLITE_OK ) p->bHasStat = 1;
+ p->bHasStat = 2;
}
/* Figure out the page-size for the database. This is required in order to
@@ -1365,7 +1430,9 @@ fts3_init_out:
sqlite3_free(zUncompress);
sqlite3_free(zContent);
sqlite3_free(zLanguageid);
+ for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]);
sqlite3_free((void *)aCol);
+ sqlite3_free((void *)azNotindexed);
if( rc!=SQLITE_OK ){
if( p ){
fts3DisconnectMethod((sqlite3_vtab *)p);
@@ -1404,6 +1471,19 @@ static int fts3CreateMethod(
return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
}
+/*
+** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
+** extension is currently being used by a version of SQLite too old to
+** support estimatedRows. In that case this function is a no-op.
+*/
+static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
+#if SQLITE_VERSION_NUMBER>=3008002
+ if( sqlite3_libversion_number()>=3008002 ){
+ pIdxInfo->estimatedRows = nRow;
+ }
+#endif
+}
+
/*
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
@@ -1416,23 +1496,40 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
+
int iLangidCons = -1; /* Index of langid=x constraint, if present */
+ int iDocidGe = -1; /* Index of docid>=x constraint, if present */
+ int iDocidLe = -1; /* Index of docid<=x constraint, if present */
+ int iIdx;
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
** strategy is possible.
*/
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
- pInfo->estimatedCost = 500000;
+ pInfo->estimatedCost = 5000000;
for(i=0; i<pInfo->nConstraint; i++){
+ int bDocid; /* True if this constraint is on docid */
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
- if( pCons->usable==0 ) continue;
+ if( pCons->usable==0 ){
+ if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
+ /* There exists an unusable MATCH constraint. This means that if
+ ** the planner does elect to use the results of this call as part
+ ** of the overall query plan the user will see an "unable to use
+ ** function MATCH in the requested context" error. To discourage
+ ** this, return a very high cost here. */
+ pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
+ pInfo->estimatedCost = 1e50;
+ fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
+ return SQLITE_OK;
+ }
+ continue;
+ }
+
+ bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
- if( iCons<0
- && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
- && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
- ){
+ if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
pInfo->idxNum = FTS3_DOCID_SEARCH;
pInfo->estimatedCost = 1.0;
iCons = i;
@@ -1461,14 +1558,38 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
){
iLangidCons = i;
}
+
+ if( bDocid ){
+ switch( pCons->op ){
+ case SQLITE_INDEX_CONSTRAINT_GE:
+ case SQLITE_INDEX_CONSTRAINT_GT:
+ iDocidGe = i;
+ break;
+
+ case SQLITE_INDEX_CONSTRAINT_LE:
+ case SQLITE_INDEX_CONSTRAINT_LT:
+ iDocidLe = i;
+ break;
+ }
+ }
}
+ iIdx = 1;
if( iCons>=0 ){
- pInfo->aConstraintUsage[iCons].argvIndex = 1;
+ pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
- pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
+ pInfo->idxNum |= FTS3_HAVE_LANGID;
+ pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
+ }
+ if( iDocidGe>=0 ){
+ pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
+ pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
+ }
+ if( iDocidLe>=0 ){
+ pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
+ pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
@@ -1646,10 +1767,10 @@ static int fts3ScanInteriorNode(
/* Load the next term on the node into zBuffer. Use realloc() to expand
** the size of zBuffer if required. */
if( !isFirstTerm ){
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
+ zCsr += fts3GetVarint32(zCsr, &nPrefix);
}
isFirstTerm = 0;
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
+ zCsr += fts3GetVarint32(zCsr, &nSuffix);
if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
rc = FTS_CORRUPT_VTAB;
@@ -1737,7 +1858,7 @@ static int fts3SelectLeaf(
assert( piLeaf || piLeaf2 );
- sqlite3Fts3GetVarint32(zNode, &iHeight);
+ fts3GetVarint32(zNode, &iHeight);
rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
@@ -1939,11 +2060,11 @@ static void fts3PoslistMerge(
int iCol1; /* The current column index in pp1 */
int iCol2; /* The current column index in pp2 */
- if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
+ if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1);
else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
else iCol1 = 0;
- if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);
+ if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2);
else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
else iCol2 = 0;
@@ -2036,11 +2157,11 @@ static int fts3PoslistPhraseMerge(
assert( p!=0 && *p1!=0 && *p2!=0 );
if( *p1==POS_COLUMN ){
p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
+ p1 += fts3GetVarint32(p1, &iCol1);
}
if( *p2==POS_COLUMN ){
p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
+ p2 += fts3GetVarint32(p2, &iCol2);
}
while( 1 ){
@@ -2090,9 +2211,9 @@ static int fts3PoslistPhraseMerge(
if( 0==*p1 || 0==*p2 ) break;
p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
+ p1 += fts3GetVarint32(p1, &iCol1);
p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
+ p2 += fts3GetVarint32(p2, &iCol2);
}
/* Advance pointer p1 or p2 (whichever corresponds to the smaller of
@@ -2104,12 +2225,12 @@ static int fts3PoslistPhraseMerge(
fts3ColumnlistCopy(0, &p1);
if( 0==*p1 ) break;
p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
+ p1 += fts3GetVarint32(p1, &iCol1);
}else{
fts3ColumnlistCopy(0, &p2);
if( 0==*p2 ) break;
p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
+ p2 += fts3GetVarint32(p2, &iCol2);
}
}
@@ -2916,6 +3037,33 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
}
/*
+** The following are copied from sqliteInt.h.
+**
+** Constants for the largest and smallest possible 64-bit signed integers.
+** These macros are designed to work correctly on both 32-bit and 64-bit
+** compilers.
+*/
+#ifndef SQLITE_AMALGAMATION
+# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
+# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
+#endif
+
+/*
+** If the numeric type of argument pVal is "integer", then return it
+** converted to a 64-bit signed integer. Otherwise, return a copy of
+** the second parameter, iDefault.
+*/
+static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
+ if( pVal ){
+ int eType = sqlite3_value_numeric_type(pVal);
+ if( eType==SQLITE_INTEGER ){
+ return sqlite3_value_int64(pVal);
+ }
+ }
+ return iDefault;
+}
+
+/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
** information.
@@ -2940,40 +3088,58 @@ static int fts3FilterMethod(
){
int rc;
char *zSql; /* SQL statement used to access %_content */
+ int eSearch;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
+ sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
+ sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
+ sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
+ sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
+ int iIdx;
+
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
- assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
- assert( nVal==0 || nVal==1 || nVal==2 );
- assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
+ eSearch = (idxNum & 0x0000FFFF);
+ assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( p->pSegments==0 );
+ /* Collect arguments into local variables */
+ iIdx = 0;
+ if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
+ if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
+ if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
+ if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
+ assert( iIdx==nVal );
+
/* In case the cursor has been used before, clear it now. */
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr->aDoclist);
sqlite3Fts3ExprFree(pCsr->pExpr);
memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
+ /* Set the lower and upper bounds on docids to return */
+ pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
+ pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
+
if( idxStr ){
pCsr->bDesc = (idxStr[0]=='D');
}else{
pCsr->bDesc = p->bDescIdx;
}
- pCsr->eSearch = (i16)idxNum;
+ pCsr->eSearch = (i16)eSearch;
- if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
- int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
- const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
+ if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
+ int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
+ const char *zQuery = (const char *)sqlite3_value_text(pCons);
- if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
+ if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
pCsr->iLangid = 0;
- if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+ if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
@@ -2984,11 +3150,7 @@ static int fts3FilterMethod(
return rc;
}
- rc = sqlite3Fts3ReadLock(p);
- if( rc!=SQLITE_OK ) return rc;
-
rc = fts3EvalStart(pCsr);
-
sqlite3Fts3SegmentsClose(p);
if( rc!=SQLITE_OK ) return rc;
pCsr->pNextId = pCsr->aDoclist;
@@ -3000,7 +3162,7 @@ static int fts3FilterMethod(
** full-text query or docid lookup, the statement retrieves a single
** row by docid.
*/
- if( idxNum==FTS3_FULLSCAN_SEARCH ){
+ if( eSearch==FTS3_FULLSCAN_SEARCH ){
zSql = sqlite3_mprintf(
"SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
@@ -3011,10 +3173,10 @@ static int fts3FilterMethod(
}else{
rc = SQLITE_NOMEM;
}
- }else if( idxNum==FTS3_DOCID_SEARCH ){
+ }else if( eSearch==FTS3_DOCID_SEARCH ){
rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
if( rc==SQLITE_OK ){
- rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
+ rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
}
}
if( rc!=SQLITE_OK ) return rc;
@@ -3142,7 +3304,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
int rc = sqlite3Fts3PendingTermsFlush(p);
- if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
+ if( rc==SQLITE_OK
+ && p->nLeafAdd>(nMinMerge/16)
+ && p->nAutoincrmerge && p->nAutoincrmerge!=0xff
+ ){
int mxLevel = 0; /* Maximum relative level value in db */
int A; /* Incr-merge parameter A */
@@ -3150,14 +3315,41 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
assert( rc==SQLITE_OK || mxLevel==0 );
A = p->nLeafAdd * mxLevel;
A += (A/2);
- if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
+ if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
}
sqlite3Fts3SegmentsClose(p);
return rc;
}
/*
-** Implementation of xBegin() method. This is a no-op.
+** If it is currently unknown whether or not the FTS table has an %_stat
+** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat
+** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code
+** if an error occurs.
+*/
+static int fts3SetHasStat(Fts3Table *p){
+ int rc = SQLITE_OK;
+ if( p->bHasStat==2 ){
+ const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'";
+ char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName);
+ if( zSql ){
+ sqlite3_stmt *pStmt = 0;
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW);
+ rc = sqlite3_finalize(pStmt);
+ if( rc==SQLITE_OK ) p->bHasStat = bHasStat;
+ }
+ sqlite3_free(zSql);
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ }
+ return rc;
+}
+
+/*
+** Implementation of xBegin() method.
*/
static int fts3BeginMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
@@ -3168,7 +3360,7 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){
TESTONLY( p->inTransaction = 1 );
TESTONLY( p->mxSavepoint = -1; );
p->nLeafAdd = 0;
- return SQLITE_OK;
+ return fts3SetHasStat(p);
}
/*
@@ -3417,6 +3609,10 @@ static int fts3RenameMethod(
sqlite3 *db = p->db; /* Database connection */
int rc; /* Return Code */
+ /* At this point it must be known if the %_stat table exists or not.
+ ** So bHasStat may not be 2. */
+ rc = fts3SetHasStat(p);
+
/* As it happens, the pending terms table is always empty here. This is
** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
** always opens a savepoint transaction. And the xSavepoint() method
@@ -3424,7 +3620,9 @@ static int fts3RenameMethod(
** PendingTermsFlush() in in case that changes.
*/
assert( p->nPendingData==0 );
- rc = sqlite3Fts3PendingTermsFlush(p);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3PendingTermsFlush(p);
+ }
if( p->zContentTbl==0 ){
fts3DbExec(&rc, db,
@@ -3552,7 +3750,7 @@ static void hashDestroy(void *p){
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
@@ -3570,7 +3768,7 @@ int sqlite3Fts3Init(sqlite3 *db){
Fts3Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
const sqlite3_tokenizer_module *pUnicode = 0;
#endif
@@ -3579,7 +3777,7 @@ int sqlite3Fts3Init(sqlite3 *db){
sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif
@@ -3607,7 +3805,7 @@ int sqlite3Fts3Init(sqlite3 *db){
if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
|| sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
#endif
#ifdef SQLITE_ENABLE_ICU
@@ -3906,6 +4104,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
}
/*
+** Maximum number of tokens a phrase may have to be considered for the
+** incremental doclists strategy.
+*/
+#define MAX_INCR_PHRASE_TOKENS 4
+
+/*
** This function is called for each Fts3Phrase in a full-text query
** expression to initialize the mechanism for returning rows. Once this
** function has been called successfully on an Fts3Phrase, it may be
@@ -3918,23 +4122,43 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
- int rc; /* Error code */
- Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+ int rc = SQLITE_OK; /* Error code */
+ int i;
- if( pCsr->bDesc==pTab->bDescIdx
- && bOptOk==1
- && p->nToken==1
- && pFirst->pSegcsr
- && pFirst->pSegcsr->bLookup
- && pFirst->bFirst==0
- ){
+ /* Determine if doclists may be loaded from disk incrementally. This is
+ ** possible if the bOptOk argument is true, the FTS doclists will be
+ ** scanned in forward order, and the phrase consists of
+ ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
+ ** tokens or prefix tokens that cannot use a prefix-index. */
+ int bHaveIncr = 0;
+ int bIncrOk = (bOptOk
+ && pCsr->bDesc==pTab->bDescIdx
+ && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
+ && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
+#ifdef SQLITE_TEST
+ && pTab->bNoIncrDoclist==0
+#endif
+ );
+ for(i=0; bIncrOk==1 && i<p->nToken; i++){
+ Fts3PhraseToken *pToken = &p->aToken[i];
+ if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
+ bIncrOk = 0;
+ }
+ if( pToken->pSegcsr ) bHaveIncr = 1;
+ }
+
+ if( bIncrOk && bHaveIncr ){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
- rc = sqlite3Fts3MsrIncrStart(
- pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
+ for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
+ Fts3PhraseToken *pToken = &p->aToken[i];
+ Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
+ if( pSegcsr ){
+ rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
+ }
+ }
p->bIncr = 1;
-
}else{
/* Load the full doclist for the phrase into memory. */
rc = fts3EvalPhraseLoad(pCsr, p);
@@ -4044,15 +4268,125 @@ void sqlite3Fts3DoclistNext(
}
/*
-** Attempt to move the phrase iterator to point to the next matching docid.
+** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
+** to true if EOF is reached.
+*/
+static void fts3EvalDlPhraseNext(
+ Fts3Table *pTab,
+ Fts3Doclist *pDL,
+ u8 *pbEof
+){
+ char *pIter; /* Used to iterate through aAll */
+ char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
+
+ if( pDL->pNextDocid ){
+ pIter = pDL->pNextDocid;
+ }else{
+ pIter = pDL->aAll;
+ }
+
+ if( pIter>=pEnd ){
+ /* We have already reached the end of this doclist. EOF. */
+ *pbEof = 1;
+ }else{
+ sqlite3_int64 iDelta;
+ pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
+ if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
+ pDL->iDocid += iDelta;
+ }else{
+ pDL->iDocid -= iDelta;
+ }
+ pDL->pList = pIter;
+ fts3PoslistCopy(0, &pIter);
+ pDL->nList = (int)(pIter - pDL->pList);
+
+ /* pIter now points just past the 0x00 that terminates the position-
+ ** list for document pDL->iDocid. However, if this position-list was
+ ** edited in place by fts3EvalNearTrim(), then pIter may not actually
+ ** point to the start of the next docid value. The following line deals
+ ** with this case by advancing pIter past the zero-padding added by
+ ** fts3EvalNearTrim(). */
+ while( pIter<pEnd && *pIter==0 ) pIter++;
+
+ pDL->pNextDocid = pIter;
+ assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
+ *pbEof = 0;
+ }
+}
+
+/*
+** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
+*/
+typedef struct TokenDoclist TokenDoclist;
+struct TokenDoclist {
+ int bIgnore;
+ sqlite3_int64 iDocid;
+ char *pList;
+ int nList;
+};
+
+/*
+** Token pToken is an incrementally loaded token that is part of a
+** multi-token phrase. Advance it to the next matching document in the
+** database and populate output variable *p with the details of the new
+** entry. Or, if the iterator has reached EOF, set *pbEof to true.
+**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
+*/
+static int incrPhraseTokenNext(
+ Fts3Table *pTab, /* Virtual table handle */
+ Fts3Phrase *pPhrase, /* Phrase to advance token of */
+ int iToken, /* Specific token to advance */
+ TokenDoclist *p, /* OUT: Docid and doclist for new entry */
+ u8 *pbEof /* OUT: True if iterator is at EOF */
+){
+ int rc = SQLITE_OK;
+
+ if( pPhrase->iDoclistToken==iToken ){
+ assert( p->bIgnore==0 );
+ assert( pPhrase->aToken[iToken].pSegcsr==0 );
+ fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
+ p->pList = pPhrase->doclist.pList;
+ p->nList = pPhrase->doclist.nList;
+ p->iDocid = pPhrase->doclist.iDocid;
+ }else{
+ Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
+ assert( pToken->pDeferred==0 );
+ assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
+ if( pToken->pSegcsr ){
+ assert( p->bIgnore==0 );
+ rc = sqlite3Fts3MsrIncrNext(
+ pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
+ );
+ if( p->pList==0 ) *pbEof = 1;
+ }else{
+ p->bIgnore = 1;
+ }
+ }
+
+ return rc;
+}
+
+
+/*
+** The phrase iterator passed as the second argument:
+**
+** * features at least one token that uses an incremental doclist, and
+**
+** * does not contain any deferred tokens.
+**
+** Advance it to the next matching documnent in the database and populate
+** the Fts3Doclist.pList and nList fields.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
+**
+** If an error occurs, return an SQLite error code. Otherwise, return
+** SQLITE_OK.
*/
-static int fts3EvalPhraseNext(
+static int fts3EvalIncrPhraseNext(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p, /* Phrase object to advance to next docid */
u8 *pbEof /* OUT: Set to 1 if EOF */
@@ -4060,57 +4394,116 @@ static int fts3EvalPhraseNext(
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+ u8 bEof = 0;
- if( p->bIncr ){
- assert( p->nToken==1 );
- assert( pDL->pNextDocid==0 );
+ /* This is only called if it is guaranteed that the phrase has at least
+ ** one incremental token. In which case the bIncr flag is set. */
+ assert( p->bIncr==1 );
+
+ if( p->nToken==1 && p->bIncr ){
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
- if( rc==SQLITE_OK && !pDL->pList ){
- *pbEof = 1;
+ if( pDL->pList==0 ) bEof = 1;
+ }else{
+ int bDescDoclist = pCsr->bDesc;
+ struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
+
+ memset(a, 0, sizeof(a));
+ assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
+ assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
+
+ while( bEof==0 ){
+ int bMaxSet = 0;
+ sqlite3_int64 iMax = 0; /* Largest docid for all iterators */
+ int i; /* Used to iterate through tokens */
+
+ /* Advance the iterator for each token in the phrase once. */
+ for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){
+ rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+ if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
+ iMax = a[i].iDocid;
+ bMaxSet = 1;
+ }
+ }
+ assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 );
+ assert( rc!=SQLITE_OK || bMaxSet );
+
+ /* Keep advancing iterators until they all point to the same document */
+ for(i=0; i<p->nToken; i++){
+ while( rc==SQLITE_OK && bEof==0
+ && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0
+ ){
+ rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
+ if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
+ iMax = a[i].iDocid;
+ i = 0;
+ }
+ }
+ }
+
+ /* Check if the current entries really are a phrase match */
+ if( bEof==0 ){
+ int nList = 0;
+ int nByte = a[p->nToken-1].nList;
+ char *aDoclist = sqlite3_malloc(nByte+1);
+ if( !aDoclist ) return SQLITE_NOMEM;
+ memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
+
+ for(i=0; i<(p->nToken-1); i++){
+ if( a[i].bIgnore==0 ){
+ char *pL = a[i].pList;
+ char *pR = aDoclist;
+ char *pOut = aDoclist;
+ int nDist = p->nToken-1-i;
+ int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
+ if( res==0 ) break;
+ nList = (int)(pOut - aDoclist);
+ }
+ }
+ if( i==(p->nToken-1) ){
+ pDL->iDocid = iMax;
+ pDL->pList = aDoclist;
+ pDL->nList = nList;
+ pDL->bFreeList = 1;
+ break;
+ }
+ sqlite3_free(aDoclist);
+ }
}
+ }
+
+ *pbEof = bEof;
+ return rc;
+}
+
+/*
+** Attempt to move the phrase iterator to point to the next matching docid.
+** If an error occurs, return an SQLite error code. Otherwise, return
+** SQLITE_OK.
+**
+** If there is no "next" entry and no error occurs, then *pbEof is set to
+** 1 before returning. Otherwise, if no error occurs and the iterator is
+** successfully advanced, *pbEof is set to 0.
+*/
+static int fts3EvalPhraseNext(
+ Fts3Cursor *pCsr, /* FTS Cursor handle */
+ Fts3Phrase *p, /* Phrase object to advance to next docid */
+ u8 *pbEof /* OUT: Set to 1 if EOF */
+){
+ int rc = SQLITE_OK;
+ Fts3Doclist *pDL = &p->doclist;
+ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+
+ if( p->bIncr ){
+ rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
}else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
&pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
);
pDL->pList = pDL->pNextDocid;
}else{
- char *pIter; /* Used to iterate through aAll */
- char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
- if( pDL->pNextDocid ){
- pIter = pDL->pNextDocid;
- }else{
- pIter = pDL->aAll;
- }
-
- if( pIter>=pEnd ){
- /* We have already reached the end of this doclist. EOF. */
- *pbEof = 1;
- }else{
- sqlite3_int64 iDelta;
- pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
- if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
- pDL->iDocid += iDelta;
- }else{
- pDL->iDocid -= iDelta;
- }
- pDL->pList = pIter;
- fts3PoslistCopy(0, &pIter);
- pDL->nList = (int)(pIter - pDL->pList);
-
- /* pIter now points just past the 0x00 that terminates the position-
- ** list for document pDL->iDocid. However, if this position-list was
- ** edited in place by fts3EvalNearTrim(), then pIter may not actually
- ** point to the start of the next docid value. The following line deals
- ** with this case by advancing pIter past the zero-padding added by
- ** fts3EvalNearTrim(). */
- while( pIter<pEnd && *pIter==0 ) pIter++;
-
- pDL->pNextDocid = pIter;
- assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
- *pbEof = 0;
- }
+ fts3EvalDlPhraseNext(pTab, pDL, pbEof);
}
return rc;
@@ -4135,7 +4528,6 @@ static int fts3EvalPhraseNext(
static void fts3EvalStartReaders(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pExpr, /* Expression to initialize phrases in */
- int bOptOk, /* True to enable incremental loading */
int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
@@ -4146,10 +4538,10 @@ static void fts3EvalStartReaders(
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
}
pExpr->bDeferred = (i==nToken);
- *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
+ *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
}else{
- fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
- fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
+ fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
+ fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
}
}
@@ -4391,7 +4783,7 @@ static int fts3EvalSelectDeferred(
** overflowing the 32-bit integer it is stored in. */
if( ii<12 ) nLoad4 = nLoad4*4;
- if( ii==0 || pTC->pPhrase->nToken>1 ){
+ if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
/* Either this is the cheapest token in the entire query, or it is
** part of a multi-token phrase. Either way, the entire doclist will
** (eventually) be loaded into memory. It may as well be now. */
@@ -4471,7 +4863,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
}
#endif
- fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
+ fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
return rc;
}
@@ -4954,6 +5346,16 @@ static int fts3EvalNext(Fts3Cursor *pCsr){
pCsr->iPrevId = pExpr->iDocid;
}while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
}
+
+ /* Check if the cursor is past the end of the docid range specified
+ ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */
+ if( rc==SQLITE_OK && (
+ (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid)
+ || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid)
+ )){
+ pCsr->isEof = 1;
+ }
+
return rc;
}
@@ -4977,12 +5379,16 @@ static void fts3EvalRestart(
if( pPhrase ){
fts3EvalInvalidatePoslist(pPhrase);
if( pPhrase->bIncr ){
- assert( pPhrase->nToken==1 );
- assert( pPhrase->aToken[0].pSegcsr );
- sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
+ int i;
+ for(i=0; i<pPhrase->nToken; i++){
+ Fts3PhraseToken *pToken = &pPhrase->aToken[i];
+ assert( pToken->pDeferred==0 );
+ if( pToken->pSegcsr ){
+ sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
+ }
+ }
*pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
}
-
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
}
@@ -5027,7 +5433,7 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
pExpr->aMI[iCol*3 + 2] += (iCnt>0);
if( *p==0x00 ) break;
p++;
- p += sqlite3Fts3GetVarint32(p, &iCol);
+ p += fts3GetVarint32(p, &iCol);
}
}
@@ -5231,15 +5637,23 @@ int sqlite3Fts3EvalPhrasePoslist(
pIter = pPhrase->doclist.pList;
if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
+ int iMul; /* +1 if csr dir matches index dir, else -1 */
int bOr = 0;
u8 bEof = 0;
- Fts3Expr *p;
+ u8 bTreeEof = 0;
+ Fts3Expr *p; /* Used to iterate from pExpr to root */
+ Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
/* Check if this phrase descends from an OR expression node. If not,
** return NULL. Otherwise, the entry that corresponds to docid
- ** pCsr->iPrevId may lie earlier in the doclist buffer. */
+ ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the
+ ** tree that the node is part of has been marked as EOF, but the node
+ ** itself is not EOF, then it may point to an earlier entry. */
+ pNear = pExpr;
for(p=pExpr->pParent; p; p=p->pParent){
if( p->eType==FTSQUERY_OR ) bOr = 1;
+ if( p->eType==FTSQUERY_NEAR ) pNear = p;
+ if( p->bEof ) bTreeEof = 1;
}
if( bOr==0 ) return SQLITE_OK;
@@ -5258,29 +5672,59 @@ int sqlite3Fts3EvalPhrasePoslist(
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
if( rc!=SQLITE_OK ) return rc;
}
-
- if( pExpr->bEof ){
- pIter = 0;
- iDocid = 0;
+
+ iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1);
+ while( bTreeEof==1
+ && pNear->bEof==0
+ && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0
+ ){
+ int rc = SQLITE_OK;
+ fts3EvalNextRow(pCsr, pExpr, &rc);
+ if( rc!=SQLITE_OK ) return rc;
+ iDocid = pExpr->iDocid;
+ pIter = pPhrase->doclist.pList;
}
+
bEof = (pPhrase->doclist.nAll==0);
assert( bDescDoclist==0 || bDescDoclist==1 );
assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
- if( pCsr->bDesc==bDescDoclist ){
- int dummy;
- while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
- sqlite3Fts3DoclistPrev(
- bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
- &pIter, &iDocid, &dummy, &bEof
- );
- }
- }else{
- while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
- sqlite3Fts3DoclistNext(
- bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
- &pIter, &iDocid, &bEof
- );
+ if( bEof==0 ){
+ if( pCsr->bDesc==bDescDoclist ){
+ int dummy;
+ if( pNear->bEof ){
+ /* This expression is already at EOF. So position it to point to the
+ ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable
+ ** iDocid is already set for this entry, so all that is required is
+ ** to set pIter to point to the first byte of the last position-list
+ ** in the doclist.
+ **
+ ** It would also be correct to set pIter and iDocid to zero. In
+ ** this case, the first call to sqltie3Fts4DoclistPrev() below
+ ** would also move the iterator to point to the last entry in the
+ ** doclist. However, this is expensive, as to do so it has to
+ ** iterate through the entire doclist from start to finish (since
+ ** it does not know the docid for the last entry). */
+ pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1];
+ fts3ReversePoslist(pPhrase->doclist.aAll, &pIter);
+ }
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
+ sqlite3Fts3DoclistPrev(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &dummy, &bEof
+ );
+ }
+ }else{
+ if( pNear->bEof ){
+ pIter = 0;
+ iDocid = 0;
+ }
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
+ sqlite3Fts3DoclistNext(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &bEof
+ );
+ }
}
}
@@ -5290,7 +5734,7 @@ int sqlite3Fts3EvalPhrasePoslist(
if( *pIter==0x01 ){
pIter++;
- pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
+ pIter += fts3GetVarint32(pIter, &iThis);
}else{
iThis = 0;
}
@@ -5298,7 +5742,7 @@ int sqlite3Fts3EvalPhrasePoslist(
fts3ColumnlistCopy(0, &pIter);
if( *pIter==0x00 ) return 0;
pIter++;
- pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
+ pIter += fts3GetVarint32(pIter, &iThis);
}
*ppOut = ((iCol==iThis)?pIter:0);
@@ -5339,7 +5783,10 @@ int sqlite3Fts3Corrupt(){
/*
** Initialize API pointer table, if required.
*/
-int sqlite3_extension_init(
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+int sqlite3_fts3_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h
index b19064c..b2827b7 100644
--- a/ext/fts3/fts3Int.h
+++ b/ext/fts3/fts3Int.h
@@ -32,7 +32,7 @@
/* If not building as part of the core, include sqlite3ext.h. */
#ifndef SQLITE_CORE
# include "sqlite3ext.h"
-extern const sqlite3_api_routines *sqlite3_api;
+SQLITE_EXTENSION_INIT3
#endif
#include "sqlite3.h"
@@ -40,6 +40,18 @@ extern const sqlite3_api_routines *sqlite3_api;
#include "fts3_hash.h"
/*
+** This constant determines the maximum depth of an FTS expression tree
+** that the library will create and use. FTS uses recursion to perform
+** various operations on the query tree, so the disadvantage of a large
+** limit is that it may allow very large queries to use large amounts
+** of stack space (perhaps causing a stack overflow).
+*/
+#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
+# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
+#endif
+
+
+/*
** This constant controls how often segments are merged. Once there are
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
** segment of level N+1.
@@ -194,23 +206,24 @@ struct Fts3Table {
const char *zName; /* virtual table name */
int nColumn; /* number of named columns in virtual table */
char **azColumn; /* column names. malloced */
+ u8 *abNotindexed; /* True for 'notindexed' columns */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
- u8 bAutoincrmerge; /* True if automerge=1 */
+ int nAutoincrmerge; /* Value configured by 'automerge' */
u32 nLeafAdd; /* Number of leaf blocks added this trans */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
- sqlite3_stmt *aStmt[37];
+ sqlite3_stmt *aStmt[40];
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
u8 bFts4; /* True for FTS4, false for FTS3 */
- u8 bHasStat; /* True if %_stat table exists */
+ u8 bHasStat; /* True if %_stat table exists (2==unknown) */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 bDescIdx; /* True if doclists are in reverse order */
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
@@ -254,6 +267,12 @@ struct Fts3Table {
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
#endif
+
+#ifdef SQLITE_TEST
+ /* True to disable the incremental doclist optimization. This is controled
+ ** by special insert command 'test-no-incr-doclist'. */
+ int bNoIncrDoclist;
+#endif
};
/*
@@ -279,7 +298,8 @@ struct Fts3Cursor {
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
sqlite3_int64 nDoc; /* Documents in table */
-
+ i64 iMinDocid; /* Minimum docid to return */
+ i64 iMaxDocid; /* Maximum docid to return */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
u32 *aMatchinfo; /* Information about most recent match */
int nMatchinfo; /* Number of elements in aMatchinfo[] */
@@ -309,6 +329,15 @@ struct Fts3Cursor {
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
+/*
+** The lower 16-bits of the sqlite3_index_info.idxNum value set by
+** the xBestIndex() method contains the Fts3Cursor.eSearch value described
+** above. The upper 16-bits contain a combination of the following
+** bits, used to describe extra constraints on full-text searches.
+*/
+#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
+#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
+#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
struct Fts3Doclist {
char *aAll; /* Array containing doclist (or NULL) */
@@ -421,7 +450,6 @@ int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
-int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
@@ -496,6 +524,10 @@ struct Fts3MultiSegReader {
int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
+#define fts3GetVarint32(p, piVal) ( \
+ (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
+)
+
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
@@ -553,7 +585,7 @@ int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
int sqlite3FtsUnicodeFold(int, int);
int sqlite3FtsUnicodeIsalnum(int);
int sqlite3FtsUnicodeIsdiacritic(int);
diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c
index 9b582fc..c68b1a9 100644
--- a/ext/fts3/fts3_aux.c
+++ b/ext/fts3/fts3_aux.c
@@ -31,6 +31,7 @@ struct Fts3auxCursor {
Fts3SegFilter filter;
char *zStop;
int nStop; /* Byte-length of string zStop */
+ int iLangid; /* Language id to query */
int isEof; /* True if cursor is at EOF */
sqlite3_int64 iRowid; /* Current rowid */
@@ -45,7 +46,8 @@ struct Fts3auxCursor {
/*
** Schema of the terms table.
*/
-#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)"
+#define FTS3_AUX_SCHEMA \
+ "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
/*
** This function does all the work for both the xConnect and xCreate methods.
@@ -92,7 +94,7 @@ static int fts3auxConnectMethod(
}
nFts3 = (int)strlen(zFts3);
- rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
+ rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
@@ -152,6 +154,8 @@ static int fts3auxBestIndexMethod(
int iEq = -1;
int iGe = -1;
int iLe = -1;
+ int iLangid = -1;
+ int iNext = 1; /* Next free argvIndex value */
UNUSED_PARAMETER(pVTab);
@@ -163,36 +167,48 @@ static int fts3auxBestIndexMethod(
pInfo->orderByConsumed = 1;
}
- /* Search for equality and range constraints on the "term" column. */
+ /* Search for equality and range constraints on the "term" column.
+ ** And equality constraints on the hidden "languageid" column. */
for(i=0; i<pInfo->nConstraint; i++){
- if( pInfo->aConstraint[i].usable && pInfo->aConstraint[i].iColumn==0 ){
+ if( pInfo->aConstraint[i].usable ){
int op = pInfo->aConstraint[i].op;
- if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
- if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
+ int iCol = pInfo->aConstraint[i].iColumn;
+
+ if( iCol==0 ){
+ if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
+ if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
+ if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
+ if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
+ if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
+ }
+ if( iCol==4 ){
+ if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
+ }
}
}
if( iEq>=0 ){
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
- pInfo->aConstraintUsage[iEq].argvIndex = 1;
+ pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
pInfo->estimatedCost = 5;
}else{
pInfo->idxNum = 0;
pInfo->estimatedCost = 20000;
if( iGe>=0 ){
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
- pInfo->aConstraintUsage[iGe].argvIndex = 1;
+ pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
if( iLe>=0 ){
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
- pInfo->aConstraintUsage[iLe].argvIndex = 1 + (iGe>=0);
+ pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
pInfo->estimatedCost /= 2;
}
}
+ if( iLangid>=0 ){
+ pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
+ pInfo->estimatedCost--;
+ }
return SQLITE_OK;
}
@@ -352,7 +368,14 @@ static int fts3auxFilterMethod(
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
- int isScan;
+ int isScan = 0;
+ int iLangVal = 0; /* Language id to query */
+
+ int iEq = -1; /* Index of term=? value in apVal */
+ int iGe = -1; /* Index of term>=? value in apVal */
+ int iLe = -1; /* Index of term<=? value in apVal */
+ int iLangid = -1; /* Index of languageid=? value in apVal */
+ int iNext = 0;
UNUSED_PARAMETER(nVal);
UNUSED_PARAMETER(idxStr);
@@ -362,7 +385,21 @@ static int fts3auxFilterMethod(
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
);
- isScan = (idxNum!=FTS4AUX_EQ_CONSTRAINT);
+
+ if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
+ iEq = iNext++;
+ }else{
+ isScan = 1;
+ if( idxNum & FTS4AUX_GE_CONSTRAINT ){
+ iGe = iNext++;
+ }
+ if( idxNum & FTS4AUX_LE_CONSTRAINT ){
+ iLe = iNext++;
+ }
+ }
+ if( iNext<nVal ){
+ iLangid = iNext++;
+ }
/* In case this cursor is being reused, close and zero it. */
testcase(pCsr->filter.zTerm);
@@ -374,22 +411,35 @@ static int fts3auxFilterMethod(
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
- if( idxNum&(FTS4AUX_EQ_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ){
+ if( iEq>=0 || iGe>=0 ){
const unsigned char *zStr = sqlite3_value_text(apVal[0]);
+ assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
if( zStr ){
pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
}
}
- if( idxNum&FTS4AUX_LE_CONSTRAINT ){
- int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0;
- pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx]));
- pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]);
+
+ if( iLe>=0 ){
+ pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
+ pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
+
+ if( iLangid>=0 ){
+ iLangVal = sqlite3_value_int(apVal[iLangid]);
+
+ /* If the user specified a negative value for the languageid, use zero
+ ** instead. This works, as the "languageid=?" constraint will also
+ ** be tested by the VDBE layer. The test will always be false (since
+ ** this module will not return a row with a negative languageid), and
+ ** so the overall query will return zero rows. */
+ if( iLangVal<0 ) iLangVal = 0;
+ }
+ pCsr->iLangid = iLangVal;
- rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
@@ -413,24 +463,37 @@ static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
*/
static int fts3auxColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
assert( p->isEof==0 );
- if( iCol==0 ){ /* Column "term" */
- sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
- }else if( iCol==1 ){ /* Column "col" */
- if( p->iCol ){
- sqlite3_result_int(pContext, p->iCol-1);
- }else{
- sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC);
- }
- }else if( iCol==2 ){ /* Column "documents" */
- sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc);
- }else{ /* Column "occurrences" */
- sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc);
+ switch( iCol ){
+ case 0: /* term */
+ sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
+ break;
+
+ case 1: /* col */
+ if( p->iCol ){
+ sqlite3_result_int(pCtx, p->iCol-1);
+ }else{
+ sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
+ }
+ break;
+
+ case 2: /* documents */
+ sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
+ break;
+
+ case 3: /* occurrences */
+ sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
+ break;
+
+ default: /* languageid */
+ assert( iCol==4 );
+ sqlite3_result_int(pCtx, p->iLangid);
+ break;
}
return SQLITE_OK;
diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c
index c046d7d..f5d28cb 100644
--- a/ext/fts3/fts3_expr.c
+++ b/ext/fts3/fts3_expr.c
@@ -155,6 +155,11 @@ int sqlite3Fts3OpenTokenizer(
return rc;
}
+/*
+** Function getNextNode(), which is called by fts3ExprParse(), may itself
+** call fts3ExprParse(). So this forward declaration is required.
+*/
+static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
/*
** Extract the next token from buffer z (length n) using the tokenizer
@@ -180,9 +185,16 @@ static int getNextToken(
int rc;
sqlite3_tokenizer_cursor *pCursor;
Fts3Expr *pRet = 0;
- int nConsumed = 0;
+ int i = 0;
+
+ /* Set variable i to the maximum number of bytes of input to tokenize. */
+ for(i=0; i<n; i++){
+ if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
+ if( z[i]=='*' || z[i]=='"' ) break;
+ }
- rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
+ *pnConsumed = i;
+ rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
@@ -223,13 +235,14 @@ static int getNextToken(
}
}
- nConsumed = iEnd;
+ *pnConsumed = iEnd;
+ }else if( i && rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
}
pModule->xClose(pCursor);
}
- *pnConsumed = nConsumed;
*ppExpr = pRet;
return rc;
}
@@ -370,12 +383,6 @@ no_mem:
}
/*
-** Function getNextNode(), which is called by fts3ExprParse(), may itself
-** call fts3ExprParse(). So this forward declaration is required.
-*/
-static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
-
-/*
** The output variable *ppExpr is populated with an allocated Fts3Expr
** structure, or set to 0 if the end of the input buffer is reached.
**
@@ -471,27 +478,6 @@ static int getNextNode(
}
}
- /* Check for an open bracket. */
- if( sqlite3_fts3_enable_parentheses ){
- if( *zInput=='(' ){
- int nConsumed;
- pParse->nNest++;
- rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
- if( rc==SQLITE_OK && !*ppExpr ){
- rc = SQLITE_DONE;
- }
- *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
- return rc;
- }
-
- /* Check for a close bracket. */
- if( *zInput==')' ){
- pParse->nNest--;
- *pnConsumed = (int)((zInput - z) + 1);
- return SQLITE_DONE;
- }
- }
-
/* See if we are dealing with a quoted phrase. If this is the case, then
** search for the closing quote and pass the whole string to getNextString()
** for processing. This is easy to do, as fts3 has no syntax for escaping
@@ -506,6 +492,21 @@ static int getNextNode(
return getNextString(pParse, &zInput[1], ii-1, ppExpr);
}
+ if( sqlite3_fts3_enable_parentheses ){
+ if( *zInput=='(' ){
+ int nConsumed = 0;
+ pParse->nNest++;
+ rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
+ if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
+ *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
+ return rc;
+ }else if( *zInput==')' ){
+ pParse->nNest--;
+ *pnConsumed = (int)((zInput - z) + 1);
+ *ppExpr = 0;
+ return SQLITE_DONE;
+ }
+ }
/* If control flows to this point, this must be a regular token, or
** the end of the input. Read a regular token using the sqlite3_tokenizer
@@ -624,96 +625,100 @@ static int fts3ExprParse(
while( rc==SQLITE_OK ){
Fts3Expr *p = 0;
int nByte = 0;
+
rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
+ assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
if( rc==SQLITE_OK ){
- int isPhrase;
-
- if( !sqlite3_fts3_enable_parentheses
- && p->eType==FTSQUERY_PHRASE && pParse->isNot
- ){
- /* Create an implicit NOT operator. */
- Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
- if( !pNot ){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_NOMEM;
- goto exprparse_out;
- }
- pNot->eType = FTSQUERY_NOT;
- pNot->pRight = p;
- p->pParent = pNot;
- if( pNotBranch ){
- pNot->pLeft = pNotBranch;
- pNotBranch->pParent = pNot;
- }
- pNotBranch = pNot;
- p = pPrev;
- }else{
- int eType = p->eType;
- isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
-
- /* The isRequirePhrase variable is set to true if a phrase or
- ** an expression contained in parenthesis is required. If a
- ** binary operator (AND, OR, NOT or NEAR) is encounted when
- ** isRequirePhrase is set, this is a syntax error.
- */
- if( !isPhrase && isRequirePhrase ){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_ERROR;
- goto exprparse_out;
- }
-
- if( isPhrase && !isRequirePhrase ){
- /* Insert an implicit AND operator. */
- Fts3Expr *pAnd;
- assert( pRet && pPrev );
- pAnd = fts3MallocZero(sizeof(Fts3Expr));
- if( !pAnd ){
+ if( p ){
+ int isPhrase;
+
+ if( !sqlite3_fts3_enable_parentheses
+ && p->eType==FTSQUERY_PHRASE && pParse->isNot
+ ){
+ /* Create an implicit NOT operator. */
+ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
+ if( !pNot ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_NOMEM;
goto exprparse_out;
}
- pAnd->eType = FTSQUERY_AND;
- insertBinaryOperator(&pRet, pPrev, pAnd);
- pPrev = pAnd;
- }
+ pNot->eType = FTSQUERY_NOT;
+ pNot->pRight = p;
+ p->pParent = pNot;
+ if( pNotBranch ){
+ pNot->pLeft = pNotBranch;
+ pNotBranch->pParent = pNot;
+ }
+ pNotBranch = pNot;
+ p = pPrev;
+ }else{
+ int eType = p->eType;
+ isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
+
+ /* The isRequirePhrase variable is set to true if a phrase or
+ ** an expression contained in parenthesis is required. If a
+ ** binary operator (AND, OR, NOT or NEAR) is encounted when
+ ** isRequirePhrase is set, this is a syntax error.
+ */
+ if( !isPhrase && isRequirePhrase ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_ERROR;
+ goto exprparse_out;
+ }
+
+ if( isPhrase && !isRequirePhrase ){
+ /* Insert an implicit AND operator. */
+ Fts3Expr *pAnd;
+ assert( pRet && pPrev );
+ pAnd = fts3MallocZero(sizeof(Fts3Expr));
+ if( !pAnd ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_NOMEM;
+ goto exprparse_out;
+ }
+ pAnd->eType = FTSQUERY_AND;
+ insertBinaryOperator(&pRet, pPrev, pAnd);
+ pPrev = pAnd;
+ }
- /* This test catches attempts to make either operand of a NEAR
- ** operator something other than a phrase. For example, either of
- ** the following:
- **
- ** (bracketed expression) NEAR phrase
- ** phrase NEAR (bracketed expression)
- **
- ** Return an error in either case.
- */
- if( pPrev && (
+ /* This test catches attempts to make either operand of a NEAR
+ ** operator something other than a phrase. For example, either of
+ ** the following:
+ **
+ ** (bracketed expression) NEAR phrase
+ ** phrase NEAR (bracketed expression)
+ **
+ ** Return an error in either case.
+ */
+ if( pPrev && (
(eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
|| (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
- )){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_ERROR;
- goto exprparse_out;
- }
-
- if( isPhrase ){
- if( pRet ){
- assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
- pPrev->pRight = p;
- p->pParent = pPrev;
+ )){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_ERROR;
+ goto exprparse_out;
+ }
+
+ if( isPhrase ){
+ if( pRet ){
+ assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
+ pPrev->pRight = p;
+ p->pParent = pPrev;
+ }else{
+ pRet = p;
+ }
}else{
- pRet = p;
+ insertBinaryOperator(&pRet, pPrev, p);
}
- }else{
- insertBinaryOperator(&pRet, pPrev, p);
+ isRequirePhrase = !isPhrase;
}
- isRequirePhrase = !isPhrase;
+ pPrev = p;
}
assert( nByte>0 );
}
assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
nIn -= nByte;
zIn += nByte;
- pPrev = p;
}
if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
@@ -1000,17 +1005,16 @@ int sqlite3Fts3ExprParse(
Fts3Expr **ppExpr, /* OUT: Parsed query structure */
char **pzErr /* OUT: Error message (sqlite3_malloc) */
){
- static const int MAX_EXPR_DEPTH = 12;
int rc = fts3ExprParseUnbalanced(
pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
);
/* Rebalance the expression. And check that its depth does not exceed
- ** MAX_EXPR_DEPTH. */
+ ** SQLITE_FTS3_MAX_EXPR_DEPTH. */
if( rc==SQLITE_OK && *ppExpr ){
- rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
+ rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
if( rc==SQLITE_OK ){
- rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
+ rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
}
}
@@ -1019,7 +1023,8 @@ int sqlite3Fts3ExprParse(
*ppExpr = 0;
if( rc==SQLITE_TOOBIG ){
*pzErr = sqlite3_mprintf(
- "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH
+ "FTS expression tree is too large (maximum depth %d)",
+ SQLITE_FTS3_MAX_EXPR_DEPTH
);
rc = SQLITE_ERROR;
}else if( rc==SQLITE_ERROR ){
diff --git a/ext/fts3/fts3_hash.c b/ext/fts3/fts3_hash.c
index 57c59b5..1a32a53 100644
--- a/ext/fts3/fts3_hash.c
+++ b/ext/fts3/fts3_hash.c
@@ -96,13 +96,13 @@ void sqlite3Fts3HashClear(Fts3Hash *pH){
*/
static int fts3StrHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
- int h = 0;
+ unsigned h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
- return h & 0x7fffffff;
+ return (int)(h & 0x7fffffff);
}
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
diff --git a/ext/fts3/fts3_porter.c b/ext/fts3/fts3_porter.c
index 579745b..db175ac 100644
--- a/ext/fts3/fts3_porter.c
+++ b/ext/fts3/fts3_porter.c
@@ -403,12 +403,14 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
/* Step 2 */
switch( z[1] ){
case 'a':
- stem(&z, "lanoita", "ate", m_gt_0) ||
- stem(&z, "lanoit", "tion", m_gt_0);
+ if( !stem(&z, "lanoita", "ate", m_gt_0) ){
+ stem(&z, "lanoit", "tion", m_gt_0);
+ }
break;
case 'c':
- stem(&z, "icne", "ence", m_gt_0) ||
- stem(&z, "icna", "ance", m_gt_0);
+ if( !stem(&z, "icne", "ence", m_gt_0) ){
+ stem(&z, "icna", "ance", m_gt_0);
+ }
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
@@ -417,43 +419,54 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
- stem(&z, "ilb", "ble", m_gt_0) ||
- stem(&z, "illa", "al", m_gt_0) ||
- stem(&z, "iltne", "ent", m_gt_0) ||
- stem(&z, "ile", "e", m_gt_0) ||
- stem(&z, "ilsuo", "ous", m_gt_0);
+ if( !stem(&z, "ilb", "ble", m_gt_0)
+ && !stem(&z, "illa", "al", m_gt_0)
+ && !stem(&z, "iltne", "ent", m_gt_0)
+ && !stem(&z, "ile", "e", m_gt_0)
+ ){
+ stem(&z, "ilsuo", "ous", m_gt_0);
+ }
break;
case 'o':
- stem(&z, "noitazi", "ize", m_gt_0) ||
- stem(&z, "noita", "ate", m_gt_0) ||
- stem(&z, "rota", "ate", m_gt_0);
+ if( !stem(&z, "noitazi", "ize", m_gt_0)
+ && !stem(&z, "noita", "ate", m_gt_0)
+ ){
+ stem(&z, "rota", "ate", m_gt_0);
+ }
break;
case 's':
- stem(&z, "msila", "al", m_gt_0) ||
- stem(&z, "ssenevi", "ive", m_gt_0) ||
- stem(&z, "ssenluf", "ful", m_gt_0) ||
- stem(&z, "ssensuo", "ous", m_gt_0);
+ if( !stem(&z, "msila", "al", m_gt_0)
+ && !stem(&z, "ssenevi", "ive", m_gt_0)
+ && !stem(&z, "ssenluf", "ful", m_gt_0)
+ ){
+ stem(&z, "ssensuo", "ous", m_gt_0);
+ }
break;
case 't':
- stem(&z, "itila", "al", m_gt_0) ||
- stem(&z, "itivi", "ive", m_gt_0) ||
- stem(&z, "itilib", "ble", m_gt_0);
+ if( !stem(&z, "itila", "al", m_gt_0)
+ && !stem(&z, "itivi", "ive", m_gt_0)
+ ){
+ stem(&z, "itilib", "ble", m_gt_0);
+ }
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
- stem(&z, "etaci", "ic", m_gt_0) ||
- stem(&z, "evita", "", m_gt_0) ||
- stem(&z, "ezila", "al", m_gt_0);
+ if( !stem(&z, "etaci", "ic", m_gt_0)
+ && !stem(&z, "evita", "", m_gt_0)
+ ){
+ stem(&z, "ezila", "al", m_gt_0);
+ }
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
- stem(&z, "laci", "ic", m_gt_0) ||
- stem(&z, "luf", "", m_gt_0);
+ if( !stem(&z, "laci", "ic", m_gt_0) ){
+ stem(&z, "luf", "", m_gt_0);
+ }
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
@@ -494,9 +507,11 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
z += 3;
}
}else if( z[2]=='e' ){
- stem(&z, "tneme", "", m_gt_1) ||
- stem(&z, "tnem", "", m_gt_1) ||
- stem(&z, "tne", "", m_gt_1);
+ if( !stem(&z, "tneme", "", m_gt_1)
+ && !stem(&z, "tnem", "", m_gt_1)
+ ){
+ stem(&z, "tne", "", m_gt_1);
+ }
}
}
break;
@@ -515,8 +530,9 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
}
break;
case 't':
- stem(&z, "eta", "", m_gt_1) ||
- stem(&z, "iti", "", m_gt_1);
+ if( !stem(&z, "eta", "", m_gt_1) ){
+ stem(&z, "iti", "", m_gt_1);
+ }
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c
index d54a787..aa8779f 100644
--- a/ext/fts3/fts3_snippet.c
+++ b/ext/fts3/fts3_snippet.c
@@ -128,7 +128,7 @@ struct StrBuffer {
*/
static void fts3GetDeltaPosition(char **pp, int *piPos){
int iVal;
- *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
+ *pp += fts3GetVarint32(*pp, &iVal);
*piPos += (iVal-2);
}
@@ -504,6 +504,7 @@ static int fts3StringAppend(
pStr->z = zNew;
pStr->nAlloc = nAlloc;
}
+ assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
/* Append the data to the string buffer. */
memcpy(&pStr->z[pStr->n], zAppend, nAppend);
diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c
index 75ec6bd..36dcc94 100644
--- a/ext/fts3/fts3_test.c
+++ b/ext/fts3/fts3_test.c
@@ -517,6 +517,51 @@ static int fts3_test_tokenizer_cmd(
return TCL_OK;
}
+static int fts3_test_varint_cmd(
+ ClientData clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+ char aBuf[24];
+ int rc;
+ Tcl_WideInt w, w2;
+ int nByte, nByte2;
+
+ if( objc!=2 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");
+ return TCL_ERROR;
+ }
+
+ rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);
+ if( rc!=TCL_OK ) return rc;
+
+ nByte = sqlite3Fts3PutVarint(aBuf, w);
+ nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);
+ if( w!=w2 || nByte!=nByte2 ){
+ char *zErr = sqlite3_mprintf("error testing %lld", w);
+ Tcl_ResetResult(interp);
+ Tcl_AppendResult(interp, zErr, 0);
+ return TCL_ERROR;
+ }
+
+ if( w<=2147483647 && w>=0 ){
+ int i;
+ nByte2 = fts3GetVarint32(aBuf, &i);
+ if( (int)w!=i || nByte!=nByte2 ){
+ char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);
+ Tcl_ResetResult(interp);
+ Tcl_AppendResult(interp, zErr, 0);
+ return TCL_ERROR;
+ }
+ }
+
+#endif
+ UNUSED_PARAMETER(clientData);
+ return TCL_OK;
+}
+
/*
** End of tokenizer code.
**************************************************************************/
@@ -529,6 +574,10 @@ int Sqlitetestfts3_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(
interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
);
+
+ Tcl_CreateObjCommand(
+ interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0
+ );
return TCL_OK;
}
#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c
index 188358e..94fc27b 100644
--- a/ext/fts3/fts3_unicode.c
+++ b/ext/fts3/fts3_unicode.c
@@ -13,7 +13,7 @@
** Implementation of the "unicode" full-text-search tokenizer.
*/
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
@@ -231,7 +231,7 @@ static int unicodeCreate(
for(i=0; rc==SQLITE_OK && i<nArg; i++){
const char *z = azArg[i];
- int n = strlen(z);
+ int n = (int)strlen(z);
if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
pNew->bRemoveDiacritic = 1;
@@ -318,7 +318,7 @@ static int unicodeNext(
){
unicode_cursor *pCsr = (unicode_cursor *)pC;
unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
- int iCode;
+ int iCode = 0;
char *zOut;
const unsigned char *z = &pCsr->aInput[pCsr->iOff];
const unsigned char *zStart = z;
@@ -363,11 +363,11 @@ static int unicodeNext(
);
/* Set the output variables and return. */
- pCsr->iOff = (z - pCsr->aInput);
+ pCsr->iOff = (int)(z - pCsr->aInput);
*paToken = pCsr->zToken;
- *pnToken = zOut - pCsr->zToken;
- *piStart = (zStart - pCsr->aInput);
- *piEnd = (zEnd - pCsr->aInput);
+ *pnToken = (int)(zOut - pCsr->zToken);
+ *piStart = (int)(zStart - pCsr->aInput);
+ *piEnd = (int)(zEnd - pCsr->aInput);
*piPos = pCsr->iToken++;
return SQLITE_OK;
}
@@ -390,4 +390,4 @@ void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
-#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */
+#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */
diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c
index 3c24569..20b7a25 100644
--- a/ext/fts3/fts3_unicode2.c
+++ b/ext/fts3/fts3_unicode2.c
@@ -15,7 +15,7 @@
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
-#if defined(SQLITE_ENABLE_FTS4_UNICODE61)
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
#include <assert.h>
@@ -39,7 +39,7 @@ int sqlite3FtsUnicodeIsalnum(int c){
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
- const static unsigned int aEntry[] = {
+ static const unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
@@ -101,28 +101,27 @@ int sqlite3FtsUnicodeIsalnum(int c){
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
- 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
- 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
- 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
- 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
- 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
- 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
- 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
- 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
- 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
- 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
- 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
- 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
- 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
- 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
- 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
- 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
- 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
- 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
- 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
- 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
- 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
- 0x43FFF401,
+ 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
+ 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
+ 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
+ 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
+ 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
+ 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
+ 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
+ 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
+ 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
+ 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
+ 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
+ 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
+ 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
+ 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
+ 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
+ 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
+ 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
+ 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
+ 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
+ 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
+ 0x380400F0,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
@@ -132,7 +131,7 @@ int sqlite3FtsUnicodeIsalnum(int c){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
- int iRes;
+ int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
@@ -203,7 +202,7 @@ static int remove_diacritic(int c){
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
-};
+}
/*
@@ -363,4 +362,4 @@ int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
-#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */
+#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c
index 269d1dd..0da08c6 100644
--- a/ext/fts3/fts3_write.c
+++ b/ext/fts3/fts3_write.c
@@ -193,6 +193,7 @@ struct SegmentWriter {
int nSize; /* Size of allocation at aData */
int nData; /* Bytes of data in aData */
char *aData; /* Pointer to block from malloc() */
+ i64 nLeafData; /* Number of bytes of leaf data written */
};
/*
@@ -268,6 +269,10 @@ struct SegmentNode {
#define SQL_SELECT_INDEXES 35
#define SQL_SELECT_MXLEVEL 36
+#define SQL_SELECT_LEVEL_RANGE2 37
+#define SQL_UPDATE_LEVEL_IDX 38
+#define SQL_UPDATE_LEVEL 39
+
/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
@@ -369,7 +374,18 @@ static int fts3SqlStmt(
/* SQL_SELECT_MXLEVEL
** Return the largest relative level in the FTS index or indexes. */
-/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
+/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'",
+
+ /* Return segments in order from oldest to newest.*/
+/* 37 */ "SELECT level, idx, end_block "
+ "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? "
+ "ORDER BY level DESC, idx ASC",
+
+ /* Update statements used while promoting segments */
+/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? "
+ "WHERE level=? AND idx=?",
+/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1"
+
};
int rc = SQLITE_OK;
sqlite3_stmt *pStmt;
@@ -489,37 +505,30 @@ static void fts3SqlExec(
/*
-** This function ensures that the caller has obtained a shared-cache
-** table-lock on the %_content table. This is required before reading
-** data from the fts3 table. If this lock is not acquired first, then
-** the caller may end up holding read-locks on the %_segments and %_segdir
-** tables, but no read-lock on the %_content table. If this happens
-** a second connection will be able to write to the fts3 table, but
-** attempting to commit those writes might return SQLITE_LOCKED or
-** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain
-** write-locks on the %_segments and %_segdir ** tables).
-**
-** We try to avoid this because if FTS3 returns any error when committing
-** a transaction, the whole transaction will be rolled back. And this is
-** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
-** still happen if the user reads data directly from the %_segments or
-** %_segdir tables instead of going through FTS3 though.
+** This function ensures that the caller has obtained an exclusive
+** shared-cache table-lock on the %_segdir table. This is required before
+** writing data to the fts3 table. If this lock is not acquired first, then
+** the caller may end up attempting to take this lock as part of committing
+** a transaction, causing SQLite to return SQLITE_LOCKED or
+** LOCKED_SHAREDCACHEto a COMMIT command.
**
-** This reasoning does not apply to a content=xxx table.
+** It is best to avoid this because if FTS3 returns any error when
+** committing a transaction, the whole transaction will be rolled back.
+** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE.
+** It can still happen if the user locks the underlying tables directly
+** instead of accessing them via FTS.
*/
-int sqlite3Fts3ReadLock(Fts3Table *p){
- int rc; /* Return code */
- sqlite3_stmt *pStmt; /* Statement used to obtain lock */
-
- if( p->zContentTbl==0 ){
- rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
+static int fts3Writelock(Fts3Table *p){
+ int rc = SQLITE_OK;
+
+ if( p->nPendingData==0 ){
+ sqlite3_stmt *pStmt;
+ rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_null(pStmt, 1);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
}
- }else{
- rc = SQLITE_OK;
}
return rc;
@@ -907,12 +916,15 @@ static int fts3InsertTerms(
){
int i; /* Iterator variable */
for(i=2; i<p->nColumn+2; i++){
- const char *zText = (const char *)sqlite3_value_text(apVal[i]);
- int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
- if( rc!=SQLITE_OK ){
- return rc;
+ int iCol = i-2;
+ if( p->abNotindexed[iCol]==0 ){
+ const char *zText = (const char *)sqlite3_value_text(apVal[i]);
+ int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]);
+ if( rc!=SQLITE_OK ){
+ return rc;
+ }
+ aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
}
- aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
}
return SQLITE_OK;
}
@@ -1059,9 +1071,12 @@ static void fts3DeleteTerms(
int iLangid = langidFromSelect(p, pSelect);
rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
- const char *zText = (const char *)sqlite3_column_text(pSelect, i);
- rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
- aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
+ int iCol = i-1;
+ if( p->abNotindexed[iCol]==0 ){
+ const char *zText = (const char *)sqlite3_column_text(pSelect, i);
+ rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]);
+ aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
+ }
}
if( rc!=SQLITE_OK ){
sqlite3_reset(pSelect);
@@ -1345,8 +1360,8 @@ static int fts3SegReaderNext(
/* Because of the FTS3_NODE_PADDING bytes of padding, the following is
** safe (no risk of overread) even if the node data is corrupted. */
- pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
- pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
+ pNext += fts3GetVarint32(pNext, &nPrefix);
+ pNext += fts3GetVarint32(pNext, &nSuffix);
if( nPrefix<0 || nSuffix<=0
|| &pNext[nSuffix]>&pReader->aNode[pReader->nNode]
){
@@ -1369,7 +1384,7 @@ static int fts3SegReaderNext(
memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
pReader->nTerm = nPrefix+nSuffix;
pNext += nSuffix;
- pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
+ pNext += fts3GetVarint32(pNext, &pReader->nDoclist);
pReader->aDoclist = pNext;
pReader->pOffsetList = 0;
@@ -1462,7 +1477,7 @@ static int fts3SegReaderNextDocid(
/* The following line of code (and the "p++" below the while() loop) is
** normally all that is required to move pointer p to the desired
** position. The exception is if this node is being loaded from disk
- ** incrementally and pointer "p" now points to the first byte passed
+ ** incrementally and pointer "p" now points to the first byte past
** the populated part of pReader->aNode[].
*/
while( *p | c ) c = *p++ & 0x80;
@@ -1911,6 +1926,7 @@ static int fts3WriteSegdir(
sqlite3_int64 iStartBlock, /* Value for "start_block" field */
sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
sqlite3_int64 iEndBlock, /* Value for "end_block" field */
+ sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */
char *zRoot, /* Blob value for "root" field */
int nRoot /* Number of bytes in buffer zRoot */
){
@@ -1921,7 +1937,13 @@ static int fts3WriteSegdir(
sqlite3_bind_int(pStmt, 2, iIdx);
sqlite3_bind_int64(pStmt, 3, iStartBlock);
sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
- sqlite3_bind_int64(pStmt, 5, iEndBlock);
+ if( nLeafData==0 ){
+ sqlite3_bind_int64(pStmt, 5, iEndBlock);
+ }else{
+ char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData);
+ if( !zEnd ) return SQLITE_NOMEM;
+ sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free);
+ }
sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
@@ -2247,6 +2269,9 @@ static int fts3SegWriterAdd(
nDoclist; /* Doclist data */
}
+ /* Increase the total number of bytes written to account for the new entry. */
+ pWriter->nLeafData += nReq;
+
/* If the buffer currently allocated is too small for this entry, realloc
** the buffer to make it large enough.
*/
@@ -2318,13 +2343,13 @@ static int fts3SegWriterFlush(
pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
}
if( rc==SQLITE_OK ){
- rc = fts3WriteSegdir(
- p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
+ rc = fts3WriteSegdir(p, iLevel, iIdx,
+ pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot);
}
}else{
/* The entire tree fits on the root node. Write it to the segdir table. */
- rc = fts3WriteSegdir(
- p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
+ rc = fts3WriteSegdir(p, iLevel, iIdx,
+ 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);
}
p->nLeafAdd++;
return rc;
@@ -2409,6 +2434,37 @@ static int fts3SegmentMaxLevel(
}
/*
+** iAbsLevel is an absolute level that may be assumed to exist within
+** the database. This function checks if it is the largest level number
+** within its index. Assuming no error occurs, *pbMax is set to 1 if
+** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK
+** is returned. If an error occurs, an error code is returned and the
+** final value of *pbMax is undefined.
+*/
+static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){
+
+ /* Set pStmt to the compiled version of:
+ **
+ ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
+ **
+ ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
+ */
+ sqlite3_stmt *pStmt;
+ int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
+ if( rc!=SQLITE_OK ) return rc;
+ sqlite3_bind_int64(pStmt, 1, iAbsLevel+1);
+ sqlite3_bind_int64(pStmt, 2,
+ ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL
+ );
+
+ *pbMax = 0;
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL;
+ }
+ return sqlite3_reset(pStmt);
+}
+
+/*
** Delete all entries in the %_segments table associated with the segment
** opened with seg-reader pSeg. This function does not affect the contents
** of the %_segdir table.
@@ -2530,7 +2586,7 @@ static void fts3ColumnFilter(
break;
}
p = &pList[1];
- p += sqlite3Fts3GetVarint32(p, &iCurrent);
+ p += fts3GetVarint32(p, &iCurrent);
}
if( bZero && &pList[nList]!=pEnd ){
@@ -2849,8 +2905,8 @@ int sqlite3Fts3SegReaderStep(
fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp);
while( apSegment[0]->pOffsetList ){
int j; /* Number of segments that share a docid */
- char *pList;
- int nList;
+ char *pList = 0;
+ int nList = 0;
int nByte;
sqlite3_int64 iDocid = apSegment[0]->iDocid;
fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
@@ -2944,6 +3000,140 @@ void sqlite3Fts3SegReaderFinish(
}
/*
+** Decode the "end_block" field, selected by column iCol of the SELECT
+** statement passed as the first argument.
+**
+** The "end_block" field may contain either an integer, or a text field
+** containing the text representation of two non-negative integers separated
+** by one or more space (0x20) characters. In the first case, set *piEndBlock
+** to the integer value and *pnByte to zero before returning. In the second,
+** set *piEndBlock to the first value and *pnByte to the second.
+*/
+static void fts3ReadEndBlockField(
+ sqlite3_stmt *pStmt,
+ int iCol,
+ i64 *piEndBlock,
+ i64 *pnByte
+){
+ const unsigned char *zText = sqlite3_column_text(pStmt, iCol);
+ if( zText ){
+ int i;
+ int iMul = 1;
+ i64 iVal = 0;
+ for(i=0; zText[i]>='0' && zText[i]<='9'; i++){
+ iVal = iVal*10 + (zText[i] - '0');
+ }
+ *piEndBlock = iVal;
+ while( zText[i]==' ' ) i++;
+ iVal = 0;
+ if( zText[i]=='-' ){
+ i++;
+ iMul = -1;
+ }
+ for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){
+ iVal = iVal*10 + (zText[i] - '0');
+ }
+ *pnByte = (iVal * (i64)iMul);
+ }
+}
+
+
+/*
+** A segment of size nByte bytes has just been written to absolute level
+** iAbsLevel. Promote any segments that should be promoted as a result.
+*/
+static int fts3PromoteSegments(
+ Fts3Table *p, /* FTS table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level just updated */
+ sqlite3_int64 nByte /* Size of new segment at iAbsLevel */
+){
+ int rc = SQLITE_OK;
+ sqlite3_stmt *pRange;
+
+ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0);
+
+ if( rc==SQLITE_OK ){
+ int bOk = 0;
+ i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1;
+ i64 nLimit = (nByte*3)/2;
+
+ /* Loop through all entries in the %_segdir table corresponding to
+ ** segments in this index on levels greater than iAbsLevel. If there is
+ ** at least one such segment, and it is possible to determine that all
+ ** such segments are smaller than nLimit bytes in size, they will be
+ ** promoted to level iAbsLevel. */
+ sqlite3_bind_int64(pRange, 1, iAbsLevel+1);
+ sqlite3_bind_int64(pRange, 2, iLast);
+ while( SQLITE_ROW==sqlite3_step(pRange) ){
+ i64 nSize = 0, dummy;
+ fts3ReadEndBlockField(pRange, 2, &dummy, &nSize);
+ if( nSize<=0 || nSize>nLimit ){
+ /* If nSize==0, then the %_segdir.end_block field does not not
+ ** contain a size value. This happens if it was written by an
+ ** old version of FTS. In this case it is not possible to determine
+ ** the size of the segment, and so segment promotion does not
+ ** take place. */
+ bOk = 0;
+ break;
+ }
+ bOk = 1;
+ }
+ rc = sqlite3_reset(pRange);
+
+ if( bOk ){
+ int iIdx = 0;
+ sqlite3_stmt *pUpdate1;
+ sqlite3_stmt *pUpdate2;
+
+ if( rc==SQLITE_OK ){
+ rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0);
+ }
+ if( rc==SQLITE_OK ){
+ rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0);
+ }
+
+ if( rc==SQLITE_OK ){
+
+ /* Loop through all %_segdir entries for segments in this index with
+ ** levels equal to or greater than iAbsLevel. As each entry is visited,
+ ** updated it to set (level = -1) and (idx = N), where N is 0 for the
+ ** oldest segment in the range, 1 for the next oldest, and so on.
+ **
+ ** In other words, move all segments being promoted to level -1,
+ ** setting the "idx" fields as appropriate to keep them in the same
+ ** order. The contents of level -1 (which is never used, except
+ ** transiently here), will be moved back to level iAbsLevel below. */
+ sqlite3_bind_int64(pRange, 1, iAbsLevel);
+ while( SQLITE_ROW==sqlite3_step(pRange) ){
+ sqlite3_bind_int(pUpdate1, 1, iIdx++);
+ sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0));
+ sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1));
+ sqlite3_step(pUpdate1);
+ rc = sqlite3_reset(pUpdate1);
+ if( rc!=SQLITE_OK ){
+ sqlite3_reset(pRange);
+ break;
+ }
+ }
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_reset(pRange);
+ }
+
+ /* Move level -1 to level iAbsLevel */
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pUpdate2, 1, iAbsLevel);
+ sqlite3_step(pUpdate2);
+ rc = sqlite3_reset(pUpdate2);
+ }
+ }
+ }
+
+
+ return rc;
+}
+
+/*
** Merge all level iLevel segments in the database into a single
** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
** single segment with a level equal to the numerically largest level
@@ -2967,6 +3157,7 @@ static int fts3SegmentMerge(
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
+ i64 iMaxLevel = 0; /* Max level number for this index/langid */
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
@@ -2978,6 +3169,11 @@ static int fts3SegmentMerge(
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
+ if( iLevel!=FTS3_SEGCURSOR_PENDING ){
+ rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
+ if( rc!=SQLITE_OK ) goto finished;
+ }
+
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the numerically
@@ -2987,21 +3183,21 @@ static int fts3SegmentMerge(
rc = SQLITE_DONE;
goto finished;
}
- rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
+ iNewLevel = iMaxLevel;
bIgnoreEmpty = 1;
- }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
- iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
- rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
- rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+ assert( FTS3_SEGCURSOR_PENDING==-1 );
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
+ rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+ bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
}
if( rc!=SQLITE_OK ) goto finished;
+
assert( csr.nSegment>0 );
assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
@@ -3018,7 +3214,7 @@ static int fts3SegmentMerge(
csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
}
if( rc!=SQLITE_OK ) goto finished;
- assert( pWriter );
+ assert( pWriter || bIgnoreEmpty );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(
@@ -3026,7 +3222,14 @@ static int fts3SegmentMerge(
);
if( rc!=SQLITE_OK ) goto finished;
}
- rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
+ if( pWriter ){
+ rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
+ if( rc==SQLITE_OK ){
+ if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){
+ rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
+ }
+ }
+ }
finished:
fts3SegWriterFree(pWriter);
@@ -3036,7 +3239,7 @@ static int fts3SegmentMerge(
/*
-** Flush the contents of pendingTerms to level 0 segments.
+** Flush the contents of pendingTerms to level 0 segments.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
@@ -3052,14 +3255,19 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
** estimate the number of leaf blocks of content to be written
*/
if( rc==SQLITE_OK && p->bHasStat
- && p->bAutoincrmerge==0xff && p->nLeafAdd>0
+ && p->nAutoincrmerge==0xff && p->nLeafAdd>0
){
sqlite3_stmt *pStmt = 0;
rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
rc = sqlite3_step(pStmt);
- p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
+ if( rc==SQLITE_ROW ){
+ p->nAutoincrmerge = sqlite3_column_int(pStmt, 0);
+ if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8;
+ }else if( rc==SQLITE_DONE ){
+ p->nAutoincrmerge = 0;
+ }
rc = sqlite3_reset(pStmt);
}
}
@@ -3303,9 +3511,11 @@ static int fts3DoRebuild(Fts3Table *p){
rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
- const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
- rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
- aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
+ if( p->abNotindexed[iCol]==0 ){
+ const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
+ rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
+ aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
+ }
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSz);
@@ -3425,6 +3635,8 @@ struct IncrmergeWriter {
int iIdx; /* Index of *output* segment in iAbsLevel+1 */
sqlite3_int64 iStart; /* Block number of first allocated block */
sqlite3_int64 iEnd; /* Block number of last allocated block */
+ sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */
+ u8 bNoLeafData; /* If true, store 0 for segment size */
NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
};
@@ -3493,9 +3705,9 @@ static int nodeReaderNext(NodeReader *p){
p->aNode = 0;
}else{
if( bFirst==0 ){
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
+ p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
}
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
+ p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);
if( rc==SQLITE_OK ){
@@ -3503,7 +3715,7 @@ static int nodeReaderNext(NodeReader *p){
p->term.n = nPrefix+nSuffix;
p->iOff += nSuffix;
if( p->iChild==0 ){
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
+ p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
p->aDoclist = &p->aNode[p->iOff];
p->iOff += p->nDoclist;
}
@@ -3763,8 +3975,8 @@ static int fts3IncrmergeAppend(
nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
}
+ pWriter->nLeafData += nSpace;
blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
-
if( rc==SQLITE_OK ){
if( pLeaf->block.n==0 ){
pLeaf->block.n = 1;
@@ -3863,6 +4075,7 @@ static void fts3IncrmergeRelease(
pWriter->iStart, /* start_block */
pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
pWriter->iEnd, /* end_block */
+ (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */
pRoot->block.a, pRoot->block.n /* root */
);
}
@@ -3964,7 +4177,11 @@ static int fts3IncrmergeLoad(
if( sqlite3_step(pSelect)==SQLITE_ROW ){
iStart = sqlite3_column_int64(pSelect, 1);
iLeafEnd = sqlite3_column_int64(pSelect, 2);
- iEnd = sqlite3_column_int64(pSelect, 3);
+ fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData);
+ if( pWriter->nLeafData<0 ){
+ pWriter->nLeafData = pWriter->nLeafData * -1;
+ }
+ pWriter->bNoLeafData = (pWriter->nLeafData==0);
nRoot = sqlite3_column_bytes(pSelect, 4);
aRoot = sqlite3_column_blob(pSelect, 4);
}else{
@@ -4555,7 +4772,7 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
pHint->n = i;
i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
- i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput);
+ i += fts3GetVarint32(&pHint->a[i], pnInput);
if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
return SQLITE_OK;
@@ -4565,11 +4782,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
/*
** Attempt an incremental merge that writes nMerge leaf blocks.
**
-** Incremental merges happen nMin segments at a time. The two
-** segments to be merged are the nMin oldest segments (the ones with
-** the smallest indexes) in the highest level that contains at least
-** nMin segments. Multiple merges might occur in an attempt to write the
-** quota of nMerge leaf blocks.
+** Incremental merges happen nMin segments at a time. The segments
+** to be merged are the nMin oldest segments (the ones with the smallest
+** values for the _segdir.idx field) in the highest level that contains
+** at least nMin segments. Multiple merges might occur in an attempt to
+** write the quota of nMerge leaf blocks.
*/
int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
int rc; /* Return code */
@@ -4594,6 +4811,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
int bUseHint = 0; /* True if attempting to append */
+ int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
/* Search the %_segdir table for the absolute level with the smallest
** relative level number that contains at least nMin segments, if any.
@@ -4647,6 +4865,19 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
** to start work on some other level. */
memset(pWriter, 0, nAlloc);
pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
+
+ if( rc==SQLITE_OK ){
+ rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
+ assert( bUseHint==1 || bUseHint==0 );
+ if( iIdx==0 || (bUseHint && iIdx==1) ){
+ int bIgnore = 0;
+ rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore);
+ if( bIgnore ){
+ pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY;
+ }
+ }
+ }
+
if( rc==SQLITE_OK ){
rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
}
@@ -4654,16 +4885,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
&& SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
){
- int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
- rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
- if( rc==SQLITE_OK ){
- if( bUseHint && iIdx>0 ){
- const char *zKey = pCsr->zTerm;
- int nKey = pCsr->nTerm;
- rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
- }else{
- rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
- }
+ if( bUseHint && iIdx>0 ){
+ const char *zKey = pCsr->zTerm;
+ int nKey = pCsr->nTerm;
+ rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
+ }else{
+ rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
}
if( rc==SQLITE_OK && pWriter->nLeafEst ){
@@ -4685,7 +4912,13 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
}
}
+ if( nSeg!=0 ){
+ pWriter->nLeafData = pWriter->nLeafData * -1;
+ }
fts3IncrmergeRelease(p, pWriter, &rc);
+ if( nSeg==0 && pWriter->bNoLeafData==0 ){
+ fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData);
+ }
}
sqlite3Fts3SegReaderFinish(pCsr);
@@ -4772,16 +5005,19 @@ static int fts3DoAutoincrmerge(
){
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
- p->bAutoincrmerge = fts3Getint(&zParam)!=0;
+ p->nAutoincrmerge = fts3Getint(&zParam);
+ if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){
+ p->nAutoincrmerge = 8;
+ }
if( !p->bHasStat ){
assert( p->bFts4==0 );
sqlite3Fts3CreateStatTable(&rc, p);
if( rc ) return rc;
}
rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
- if( rc ) return rc;;
+ if( rc ) return rc;
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
- sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
+ sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
return rc;
@@ -4938,34 +5174,36 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
int iCol;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
- const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
- int nText = sqlite3_column_bytes(pStmt, iCol+1);
- sqlite3_tokenizer_cursor *pT = 0;
-
- rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
- while( rc==SQLITE_OK ){
- char const *zToken; /* Buffer containing token */
- int nToken = 0; /* Number of bytes in token */
- int iDum1 = 0, iDum2 = 0; /* Dummy variables */
- int iPos = 0; /* Position of token in zText */
-
- rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
- if( rc==SQLITE_OK ){
- int i;
- cksum2 = cksum2 ^ fts3ChecksumEntry(
- zToken, nToken, iLang, 0, iDocid, iCol, iPos
- );
- for(i=1; i<p->nIndex; i++){
- if( p->aIndex[i].nPrefix<=nToken ){
- cksum2 = cksum2 ^ fts3ChecksumEntry(
- zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
- );
+ if( p->abNotindexed[iCol]==0 ){
+ const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
+ int nText = sqlite3_column_bytes(pStmt, iCol+1);
+ sqlite3_tokenizer_cursor *pT = 0;
+
+ rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT);
+ while( rc==SQLITE_OK ){
+ char const *zToken; /* Buffer containing token */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
+
+ rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+ if( rc==SQLITE_OK ){
+ int i;
+ cksum2 = cksum2 ^ fts3ChecksumEntry(
+ zToken, nToken, iLang, 0, iDocid, iCol, iPos
+ );
+ for(i=1; i<p->nIndex; i++){
+ if( p->aIndex[i].nPrefix<=nToken ){
+ cksum2 = cksum2 ^ fts3ChecksumEntry(
+ zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
+ );
+ }
}
}
}
+ if( pT ) pModule->xClose(pT);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
- if( pT ) pModule->xClose(pT);
- if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
}
@@ -5049,6 +5287,9 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
}else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
p->nMaxPendingData = atoi(&zVal[11]);
rc = SQLITE_OK;
+ }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){
+ p->bNoIncrDoclist = atoi(&zVal[21]);
+ rc = SQLITE_OK;
#endif
}else{
rc = SQLITE_ERROR;
@@ -5108,32 +5349,34 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
- const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
- sqlite3_tokenizer_cursor *pTC = 0;
-
- rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
- while( rc==SQLITE_OK ){
- char const *zToken; /* Buffer containing token */
- int nToken = 0; /* Number of bytes in token */
- int iDum1 = 0, iDum2 = 0; /* Dummy variables */
- int iPos = 0; /* Position of token in zText */
-
- rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
- for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
- Fts3PhraseToken *pPT = pDef->pToken;
- if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
- && (pPT->bFirst==0 || iPos==0)
- && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
- && (0==memcmp(zToken, pPT->z, pPT->n))
- ){
- fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
+ if( p->abNotindexed[i]==0 ){
+ const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
+ sqlite3_tokenizer_cursor *pTC = 0;
+
+ rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
+ while( rc==SQLITE_OK ){
+ char const *zToken; /* Buffer containing token */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
+
+ rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+ for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
+ Fts3PhraseToken *pPT = pDef->pToken;
+ if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
+ && (pPT->bFirst==0 || iPos==0)
+ && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
+ && (0==memcmp(zToken, pPT->z, pPT->n))
+ ){
+ fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
+ }
}
}
+ if( pTC ) pModule->xClose(pTC);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
- if( pTC ) pModule->xClose(pTC);
- if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
-
+
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
if( pDef->pList ){
rc = fts3PendingListAppendVarint(&pDef->pList, 0);
@@ -5265,6 +5508,10 @@ int sqlite3Fts3UpdateMethod(
int nChng = 0; /* Net change in number of documents */
int bInsertDone = 0;
+ /* At this point it must be known if the %_stat table exists or not.
+ ** So bHasStat may not be 2. */
+ assert( p->bHasStat==0 || p->bHasStat==1 );
+
assert( p->pSegments==0 );
assert(
nArg==1 /* DELETE operations */
@@ -5297,6 +5544,9 @@ int sqlite3Fts3UpdateMethod(
aSzIns = &aSzDel[p->nColumn+1];
memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
+ rc = fts3Writelock(p);
+ if( rc!=SQLITE_OK ) goto update_out;
+
/* If this is an INSERT operation, or an UPDATE that modifies the rowid
** value, then this operation requires constraint handling.
**
diff --git a/ext/fts3/tool/fts3view.c b/ext/fts3/tool/fts3view.c
index 479ae98..3dc1ba8 100644
--- a/ext/fts3/tool/fts3view.c
+++ b/ext/fts3/tool/fts3view.c
@@ -376,7 +376,7 @@ static void showSegmentStats(sqlite3 *db, const char *zTab){
sqlite3_finalize(pStmt);
nLeaf = nSeg - nIdx;
printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
- pgsz-45, n, n*100.0/nLeaf);
+ pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
mxLevel = 0;
@@ -554,7 +554,7 @@ static void decodeSegment(
sqlite3_int64 n;
sqlite3_int64 iDocsz;
int iHeight;
- int i = 0;
+ sqlite3_int64 i = 0;
int cnt = 0;
char zTerm[1000];
@@ -576,12 +576,12 @@ static void decodeSegment(
fprintf(stderr, "term to long\n");
exit(1);
}
- memcpy(zTerm+iPrefix, aData+i, nTerm);
+ memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
zTerm[iPrefix+nTerm] = 0;
i += nTerm;
if( iHeight==0 ){
i += getVarint(aData+i, &iDocsz);
- printf("term: %-25s doclist %7lld bytes offset %d\n", zTerm, iDocsz, i);
+ printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
i += iDocsz;
}else{
printf("term: %-25s child %lld\n", zTerm, ++iChild);
@@ -749,18 +749,19 @@ static void decodeDoclist(
*/
static void showDoclist(sqlite3 *db, const char *zTab){
const unsigned char *aData;
- sqlite3_int64 offset, nData;
+ sqlite3_int64 offset;
+ int nData;
sqlite3_stmt *pStmt;
offset = atoi64(azExtra[1]);
- nData = atoi64(azExtra[2]);
+ nData = atoi(azExtra[2]);
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
sqlite3_finalize(pStmt);
return;
}
aData = sqlite3_column_blob(pStmt, 0);
- printf("Doclist at %s offset %lld of size %lld bytes:\n",
+ printf("Doclist at %s offset %lld of size %d bytes:\n",
azExtra[0], offset, nData);
if( findOption("raw", 0, 0)!=0 ){
printBlob(aData+offset, nData);
diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl
index 0d58e8a..c3083ee 100644
--- a/ext/fts3/unicode/mkunicode.tcl
+++ b/ext/fts3/unicode/mkunicode.tcl
@@ -160,7 +160,7 @@ proc print_rd {map} {
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
- puts "\};"
+ puts "\}"
}
proc print_isdiacritic {zFunc map} {
@@ -239,7 +239,10 @@ proc an_load_unicodedata_text {zName} {
foreach $lField $fields {}
set iCode [expr "0x$code"]
- set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}]
+ set bAlnum [expr {
+ [lsearch {L N} [string range $general_category 0 0]] >= 0
+ || $general_category=="Co"
+ }]
if { !$bAlnum } { lappend lRet $iCode }
}
@@ -295,7 +298,7 @@ proc an_print_range_array {lRange} {
** using this format.
*/
}]
- puts -nonewline " const static unsigned int aEntry\[\] = \{"
+ puts -nonewline " static const unsigned int aEntry\[\] = \{"
set i 0
foreach range $lRange {
foreach {iFirst nRange} $range {}
@@ -346,7 +349,7 @@ proc print_isalnum {zFunc lRange} {
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
- int iRes;
+ int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
@@ -360,7 +363,7 @@ proc print_isalnum {zFunc lRange} {
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
- return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+ return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;}
puts "\}"
@@ -729,7 +732,7 @@ proc print_fileheader {} {
*/
}]
puts ""
- puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)"
+ puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
puts ""
puts "#include <assert.h>"