From 569c6676a6ddb0ff73821d7693b5e18ddef809b9 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Thu, 16 Oct 2014 22:51:35 -0400 Subject: Imported Upstream version 3.2.0 --- ext/fts3/fts3.c | 759 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 603 insertions(+), 156 deletions(-) (limited to 'ext/fts3/fts3.c') diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index c00a13f..4f4b667 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -330,21 +330,37 @@ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ return (int) (q - (unsigned char *)p); } +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ + v = (v & mask1) | ( (*ptr++) << shift ); \ + if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ + v = (*ptr++); \ + if( (v & mask2)==0 ){ var = v; return ret; } + /* ** Read a 64-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read, or 0 on error. ** The value is stored in *v. */ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q&0x80)==0x80 && q-(unsigned char *)p UNCOMPRESS */ { "order", 5 }, /* 4 -> ORDER */ { "content", 7 }, /* 5 -> CONTENT */ - { "languageid", 10 } /* 6 -> LANGUAGEID */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ }; int iOpt; @@ -1197,6 +1237,11 @@ static int fts3InitVtab( zLanguageid = zVal; zVal = 0; break; + + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; } } sqlite3_free(zVal); @@ -1268,6 +1313,7 @@ static int fts3InitVtab( nByte = sizeof(Fts3Table) + /* Fts3Table */ nCol * sizeof(char *) + /* azColumn */ nIndex * sizeof(struct Fts3Index) + /* aIndex */ + nCol * sizeof(u8) + /* abNotindexed */ nName + /* zName */ nDb + /* zDb */ nString; /* Space for azColumn strings */ @@ -1287,7 +1333,7 @@ static int fts3InitVtab( p->bHasStat = isFts4; p->bFts4 = isFts4; p->bDescIdx = bDescIdx; - p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */ + p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ p->zContentTbl = zContent; p->zLanguageid = zLanguageid; zContent = 0; @@ -1301,9 +1347,10 @@ static int fts3InitVtab( for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->aIndex[nIndex]; + zCsr = (char *)&p->abNotindexed[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; @@ -1324,7 +1371,28 @@ static int fts3InitVtab( assert( zCsr <= &((char *)p)[nByte] ); } - if( (zCompress==0)!=(zUncompress==0) ){ + /* Fill in the abNotindexed array */ + for(iCol=0; iColazColumn[iCol]); + for(i=0; iazColumn[iCol], zNot, n) + ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; izDb, p->zName); - if( rc2==SQLITE_OK ) p->bHasStat = 1; + p->bHasStat = 2; } /* Figure out the page-size for the database. This is required in order to @@ -1365,7 +1430,9 @@ fts3_init_out: sqlite3_free(zUncompress); sqlite3_free(zContent); sqlite3_free(zLanguageid); + for(i=0; iestimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. +*/ +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; + } +#endif +} + /* ** Implementation of the xBestIndex method for FTS3 tables. There ** are three possible strategies, in order of preference: @@ -1416,23 +1496,40 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts3Table *p = (Fts3Table *)pVTab; int i; /* Iterator variable */ int iCons = -1; /* Index of constraint to use */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient ** strategy is possible. */ pInfo->idxNum = FTS3_FULLSCAN_SEARCH; - pInfo->estimatedCost = 500000; + pInfo->estimatedCost = 5000000; for(i=0; inConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; - if( pCons->usable==0 ) continue; + if( pCons->usable==0 ){ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* There exists an unusable MATCH constraint. This means that if + ** the planner does elect to use the results of this call as part + ** of the overall query plan the user will see an "unable to use + ** function MATCH in the requested context" error. To discourage + ** this, return a very high cost here. */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 1e50; + fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); + return SQLITE_OK; + } + continue; + } + + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( iCons<0 - && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) - ){ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ pInfo->idxNum = FTS3_DOCID_SEARCH; pInfo->estimatedCost = 1.0; iCons = i; @@ -1461,14 +1558,38 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ){ iLangidCons = i; } + + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; + + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } + } } + iIdx = 1; if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = 1; + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; pInfo->aConstraintUsage[iCons].omit = 1; } if( iLangidCons>=0 ){ - pInfo->aConstraintUsage[iLangidCons].argvIndex = 2; + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or @@ -1646,10 +1767,10 @@ static int fts3ScanInteriorNode( /* Load the next term on the node into zBuffer. Use realloc() to expand ** the size of zBuffer if required. */ if( !isFirstTerm ){ - zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); + zCsr += fts3GetVarint32(zCsr, &nPrefix); } isFirstTerm = 0; - zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); + zCsr += fts3GetVarint32(zCsr, &nSuffix); if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){ rc = FTS_CORRUPT_VTAB; @@ -1737,7 +1858,7 @@ static int fts3SelectLeaf( assert( piLeaf || piLeaf2 ); - sqlite3Fts3GetVarint32(zNode, &iHeight); + fts3GetVarint32(zNode, &iHeight); rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); @@ -1939,11 +2060,11 @@ static void fts3PoslistMerge( int iCol1; /* The current column index in pp1 */ int iCol2; /* The current column index in pp2 */ - if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1); + if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; else iCol1 = 0; - if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2); + if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; else iCol2 = 0; @@ -2036,11 +2157,11 @@ static int fts3PoslistPhraseMerge( assert( p!=0 && *p1!=0 && *p2!=0 ); if( *p1==POS_COLUMN ){ p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); } if( *p2==POS_COLUMN ){ p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } while( 1 ){ @@ -2090,9 +2211,9 @@ static int fts3PoslistPhraseMerge( if( 0==*p1 || 0==*p2 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } /* Advance pointer p1 or p2 (whichever corresponds to the smaller of @@ -2104,12 +2225,12 @@ static int fts3PoslistPhraseMerge( fts3ColumnlistCopy(0, &p1); if( 0==*p1 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); }else{ fts3ColumnlistCopy(0, &p2); if( 0==*p2 ) break; p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } } @@ -2915,6 +3036,33 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ return rc; } +/* +** The following are copied from sqliteInt.h. +** +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif + +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } + } + return iDefault; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -2940,40 +3088,58 @@ static int fts3FilterMethod( ){ int rc; char *zSql; /* SQL statement used to access %_content */ + int eSearch; Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; + UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); - assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( nVal==0 || nVal==1 || nVal==2 ); - assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); + eSearch = (idxNum & 0x0000FFFF); + assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( p->pSegments==0 ); + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); + /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + if( idxStr ){ pCsr->bDesc = (idxStr[0]=='D'); }else{ pCsr->bDesc = p->bDescIdx; } - pCsr->eSearch = (i16)idxNum; + pCsr->eSearch = (i16)eSearch; - if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ - int iCol = idxNum-FTS3_FULLTEXT_SEARCH; - const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); - if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ return SQLITE_NOMEM; } pCsr->iLangid = 0; - if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); assert( p->base.zErrMsg==0 ); rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, @@ -2984,11 +3150,7 @@ static int fts3FilterMethod( return rc; } - rc = sqlite3Fts3ReadLock(p); - if( rc!=SQLITE_OK ) return rc; - rc = fts3EvalStart(pCsr); - sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; @@ -3000,7 +3162,7 @@ static int fts3FilterMethod( ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - if( idxNum==FTS3_FULLSCAN_SEARCH ){ + if( eSearch==FTS3_FULLSCAN_SEARCH ){ zSql = sqlite3_mprintf( "SELECT %s ORDER BY rowid %s", p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") @@ -3011,10 +3173,10 @@ static int fts3FilterMethod( }else{ rc = SQLITE_NOMEM; } - }else if( idxNum==FTS3_DOCID_SEARCH ){ + }else if( eSearch==FTS3_DOCID_SEARCH ){ rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); if( rc==SQLITE_OK ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); } } if( rc!=SQLITE_OK ) return rc; @@ -3142,7 +3304,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; int rc = sqlite3Fts3PendingTermsFlush(p); - if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){ + if( rc==SQLITE_OK + && p->nLeafAdd>(nMinMerge/16) + && p->nAutoincrmerge && p->nAutoincrmerge!=0xff + ){ int mxLevel = 0; /* Maximum relative level value in db */ int A; /* Incr-merge parameter A */ @@ -3150,14 +3315,41 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){ assert( rc==SQLITE_OK || mxLevel==0 ); A = p->nLeafAdd * mxLevel; A += (A/2); - if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8); + if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); } sqlite3Fts3SegmentsClose(p); return rc; } /* -** Implementation of xBegin() method. This is a no-op. +** If it is currently unknown whether or not the FTS table has an %_stat +** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat +** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +static int fts3SetHasStat(Fts3Table *p){ + int rc = SQLITE_OK; + if( p->bHasStat==2 ){ + const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; + char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); + rc = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) p->bHasStat = bHasStat; + } + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; + } + } + return rc; +} + +/* +** Implementation of xBegin() method. */ static int fts3BeginMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; @@ -3168,7 +3360,7 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){ TESTONLY( p->inTransaction = 1 ); TESTONLY( p->mxSavepoint = -1; ); p->nLeafAdd = 0; - return SQLITE_OK; + return fts3SetHasStat(p); } /* @@ -3417,6 +3609,10 @@ static int fts3RenameMethod( sqlite3 *db = p->db; /* Database connection */ int rc; /* Return Code */ + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + rc = fts3SetHasStat(p); + /* As it happens, the pending terms table is always empty here. This is ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction ** always opens a savepoint transaction. And the xSavepoint() method @@ -3424,7 +3620,9 @@ static int fts3RenameMethod( ** PendingTermsFlush() in in case that changes. */ assert( p->nPendingData==0 ); - rc = sqlite3Fts3PendingTermsFlush(p); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3PendingTermsFlush(p); + } if( p->zContentTbl==0 ){ fts3DbExec(&rc, db, @@ -3552,7 +3750,7 @@ static void hashDestroy(void *p){ */ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); #endif #ifdef SQLITE_ENABLE_ICU @@ -3570,7 +3768,7 @@ int sqlite3Fts3Init(sqlite3 *db){ Fts3Hash *pHash = 0; const sqlite3_tokenizer_module *pSimple = 0; const sqlite3_tokenizer_module *pPorter = 0; -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE const sqlite3_tokenizer_module *pUnicode = 0; #endif @@ -3579,7 +3777,7 @@ int sqlite3Fts3Init(sqlite3 *db){ sqlite3Fts3IcuTokenizerModule(&pIcu); #endif -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE sqlite3Fts3UnicodeTokenizer(&pUnicode); #endif @@ -3607,7 +3805,7 @@ int sqlite3Fts3Init(sqlite3 *db){ if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) #endif #ifdef SQLITE_ENABLE_ICU @@ -3905,6 +4103,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ return SQLITE_OK; } +/* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this @@ -3918,23 +4122,43 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - int rc; /* Error code */ - Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Error code */ + int i; - if( pCsr->bDesc==pTab->bDescIdx - && bOptOk==1 - && p->nToken==1 - && pFirst->pSegcsr - && pFirst->pSegcsr->bLookup - && pFirst->bFirst==0 - ){ + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } + + if( bIncrOk && bHaveIncr ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); - rc = sqlite3Fts3MsrIncrStart( - pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); + for(i=0; rc==SQLITE_OK && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } p->bIncr = 1; - }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); @@ -4044,15 +4268,125 @@ void sqlite3Fts3DoclistNext( } /* -** Attempt to move the phrase iterator to point to the next matching docid. +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof +){ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIterpNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; + + if( pPhrase->iDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; + } + } + + return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. */ -static int fts3EvalPhraseNext( +static int fts3EvalIncrPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ @@ -4060,57 +4394,116 @@ static int fts3EvalPhraseNext( int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; - if( p->bIncr ){ - assert( p->nToken==1 ); - assert( pDL->pNextDocid==0 ); + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); + + if( p->nToken==1 && p->bIncr ){ rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); - if( rc==SQLITE_OK && !pDL->pList ){ - *pbEof = 1; + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistTokennToken && bEof==0; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; + } + } + assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); + assert( rc!=SQLITE_OK || bMaxSet ); + + /* Keep advancing iterators until they all point to the same document */ + for(i=0; inToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+1); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (int)(pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); + } } + } + + *pbEof = bEof; + return rc; +} + +/* +** Attempt to move the phrase iterator to point to the next matching docid. +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +*/ +static int fts3EvalPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ - char *pIter; /* Used to iterate through aAll */ - char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ - if( pDL->pNextDocid ){ - pIter = pDL->pNextDocid; - }else{ - pIter = pDL->aAll; - } - - if( pIter>=pEnd ){ - /* We have already reached the end of this doclist. EOF. */ - *pbEof = 1; - }else{ - sqlite3_int64 iDelta; - pIter += sqlite3Fts3GetVarint(pIter, &iDelta); - if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ - pDL->iDocid += iDelta; - }else{ - pDL->iDocid -= iDelta; - } - pDL->pList = pIter; - fts3PoslistCopy(0, &pIter); - pDL->nList = (int)(pIter - pDL->pList); - - /* pIter now points just past the 0x00 that terminates the position- - ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim(), then pIter may not actually - ** point to the start of the next docid value. The following line deals - ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim(). */ - while( pIterpNextDocid = pIter; - assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); - *pbEof = 0; - } + fts3EvalDlPhraseNext(pTab, pDL, pbEof); } return rc; @@ -4135,7 +4528,6 @@ static int fts3EvalPhraseNext( static void fts3EvalStartReaders( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Expr *pExpr, /* Expression to initialize phrases in */ - int bOptOk, /* True to enable incremental loading */ int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ @@ -4146,10 +4538,10 @@ static void fts3EvalStartReaders( if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); - *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); }else{ - fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); - fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } @@ -4391,7 +4783,7 @@ static int fts3EvalSelectDeferred( ** overflowing the 32-bit integer it is stored in. */ if( ii<12 ) nLoad4 = nLoad4*4; - if( ii==0 || pTC->pPhrase->nToken>1 ){ + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ /* Either this is the cheapest token in the entire query, or it is ** part of a multi-token phrase. Either way, the entire doclist will ** (eventually) be loaded into memory. It may as well be now. */ @@ -4471,7 +4863,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){ } #endif - fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); + fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); return rc; } @@ -4954,6 +5346,16 @@ static int fts3EvalNext(Fts3Cursor *pCsr){ pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); } + + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) + )){ + pCsr->isEof = 1; + } + return rc; } @@ -4977,12 +5379,16 @@ static void fts3EvalRestart( if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ - assert( pPhrase->nToken==1 ); - assert( pPhrase->aToken[0].pSegcsr ); - sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr); + int i; + for(i=0; inToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); } - pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } @@ -5027,7 +5433,7 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ pExpr->aMI[iCol*3 + 2] += (iCnt>0); if( *p==0x00 ) break; p++; - p += sqlite3Fts3GetVarint32(p, &iCol); + p += fts3GetVarint32(p, &iCol); } } @@ -5231,15 +5637,23 @@ int sqlite3Fts3EvalPhrasePoslist( pIter = pPhrase->doclist.pList; if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int iMul; /* +1 if csr dir matches index dir, else -1 */ int bOr = 0; u8 bEof = 0; - Fts3Expr *p; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ /* Check if this phrase descends from an OR expression node. If not, ** return NULL. Otherwise, the entry that corresponds to docid - ** pCsr->iPrevId may lie earlier in the doclist buffer. */ + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; for(p=pExpr->pParent; p; p=p->pParent){ if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; } if( bOr==0 ) return SQLITE_OK; @@ -5258,29 +5672,59 @@ int sqlite3Fts3EvalPhrasePoslist( assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); if( rc!=SQLITE_OK ) return rc; } - - if( pExpr->bEof ){ - pIter = 0; - iDocid = 0; + + iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); + while( bTreeEof==1 + && pNear->bEof==0 + && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 + ){ + int rc = SQLITE_OK; + fts3EvalNextRow(pCsr, pExpr, &rc); + if( rc!=SQLITE_OK ) return rc; + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; } + bEof = (pPhrase->doclist.nAll==0); assert( bDescDoclist==0 || bDescDoclist==1 ); assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); - if( pCsr->bDesc==bDescDoclist ){ - int dummy; - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ - sqlite3Fts3DoclistPrev( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &dummy, &bEof - ); - } - }else{ - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ - sqlite3Fts3DoclistNext( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &bEof - ); + if( bEof==0 ){ + if( pCsr->bDesc==bDescDoclist ){ + int dummy; + if( pNear->bEof ){ + /* This expression is already at EOF. So position it to point to the + ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable + ** iDocid is already set for this entry, so all that is required is + ** to set pIter to point to the first byte of the last position-list + ** in the doclist. + ** + ** It would also be correct to set pIter and iDocid to zero. In + ** this case, the first call to sqltie3Fts4DoclistPrev() below + ** would also move the iterator to point to the last entry in the + ** doclist. However, this is expensive, as to do so it has to + ** iterate through the entire doclist from start to finish (since + ** it does not know the docid for the last entry). */ + pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; + fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + sqlite3Fts3DoclistPrev( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + }else{ + if( pNear->bEof ){ + pIter = 0; + iDocid = 0; + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } } } @@ -5290,7 +5734,7 @@ int sqlite3Fts3EvalPhrasePoslist( if( *pIter==0x01 ){ pIter++; - pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + pIter += fts3GetVarint32(pIter, &iThis); }else{ iThis = 0; } @@ -5298,7 +5742,7 @@ int sqlite3Fts3EvalPhrasePoslist( fts3ColumnlistCopy(0, &pIter); if( *pIter==0x00 ) return 0; pIter++; - pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + pIter += fts3GetVarint32(pIter, &iThis); } *ppOut = ((iCol==iThis)?pIter:0); @@ -5339,7 +5783,10 @@ int sqlite3Fts3Corrupt(){ /* ** Initialize API pointer table, if required. */ -int sqlite3_extension_init( +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fts3_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi -- cgit v1.2.3