From 569c6676a6ddb0ff73821d7693b5e18ddef809b9 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Thu, 16 Oct 2014 22:51:35 -0400 Subject: Imported Upstream version 3.2.0 --- ext/fts3/fts3.c | 759 ++++++++++++++++++++++++++++++++--------- ext/fts3/fts3Int.h | 46 ++- ext/fts3/fts3_aux.c | 129 +++++-- ext/fts3/fts3_expr.c | 221 ++++++------ ext/fts3/fts3_hash.c | 4 +- ext/fts3/fts3_porter.c | 74 ++-- ext/fts3/fts3_snippet.c | 3 +- ext/fts3/fts3_test.c | 49 +++ ext/fts3/fts3_unicode.c | 16 +- ext/fts3/fts3_unicode2.c | 53 ++- ext/fts3/fts3_write.c | 506 ++++++++++++++++++++------- ext/fts3/tool/fts3view.c | 15 +- ext/fts3/unicode/mkunicode.tcl | 15 +- 13 files changed, 1378 insertions(+), 512 deletions(-) (limited to 'ext/fts3') diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index c00a13f..4f4b667 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -330,21 +330,37 @@ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ return (int) (q - (unsigned char *)p); } +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ + v = (v & mask1) | ( (*ptr++) << shift ); \ + if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ + v = (*ptr++); \ + if( (v & mask2)==0 ){ var = v; return ret; } + /* ** Read a 64-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read, or 0 on error. ** The value is stored in *v. */ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q&0x80)==0x80 && q-(unsigned char *)p UNCOMPRESS */ { "order", 5 }, /* 4 -> ORDER */ { "content", 7 }, /* 5 -> CONTENT */ - { "languageid", 10 } /* 6 -> LANGUAGEID */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ }; int iOpt; @@ -1197,6 +1237,11 @@ static int fts3InitVtab( zLanguageid = zVal; zVal = 0; break; + + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; } } sqlite3_free(zVal); @@ -1268,6 +1313,7 @@ static int fts3InitVtab( nByte = sizeof(Fts3Table) + /* Fts3Table */ nCol * sizeof(char *) + /* azColumn */ nIndex * sizeof(struct Fts3Index) + /* aIndex */ + nCol * sizeof(u8) + /* abNotindexed */ nName + /* zName */ nDb + /* zDb */ nString; /* Space for azColumn strings */ @@ -1287,7 +1333,7 @@ static int fts3InitVtab( p->bHasStat = isFts4; p->bFts4 = isFts4; p->bDescIdx = bDescIdx; - p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */ + p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */ p->zContentTbl = zContent; p->zLanguageid = zLanguageid; zContent = 0; @@ -1301,9 +1347,10 @@ static int fts3InitVtab( for(i=0; iaIndex[i].hPending, FTS3_HASH_STRING, 1); } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->aIndex[nIndex]; + zCsr = (char *)&p->abNotindexed[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; @@ -1324,7 +1371,28 @@ static int fts3InitVtab( assert( zCsr <= &((char *)p)[nByte] ); } - if( (zCompress==0)!=(zUncompress==0) ){ + /* Fill in the abNotindexed array */ + for(iCol=0; iColazColumn[iCol]); + for(i=0; iazColumn[iCol], zNot, n) + ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; izDb, p->zName); - if( rc2==SQLITE_OK ) p->bHasStat = 1; + p->bHasStat = 2; } /* Figure out the page-size for the database. This is required in order to @@ -1365,7 +1430,9 @@ fts3_init_out: sqlite3_free(zUncompress); sqlite3_free(zContent); sqlite3_free(zLanguageid); + for(i=0; iestimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. +*/ +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; + } +#endif +} + /* ** Implementation of the xBestIndex method for FTS3 tables. There ** are three possible strategies, in order of preference: @@ -1416,23 +1496,40 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts3Table *p = (Fts3Table *)pVTab; int i; /* Iterator variable */ int iCons = -1; /* Index of constraint to use */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient ** strategy is possible. */ pInfo->idxNum = FTS3_FULLSCAN_SEARCH; - pInfo->estimatedCost = 500000; + pInfo->estimatedCost = 5000000; for(i=0; inConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; - if( pCons->usable==0 ) continue; + if( pCons->usable==0 ){ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* There exists an unusable MATCH constraint. This means that if + ** the planner does elect to use the results of this call as part + ** of the overall query plan the user will see an "unable to use + ** function MATCH in the requested context" error. To discourage + ** this, return a very high cost here. */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 1e50; + fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); + return SQLITE_OK; + } + continue; + } + + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( iCons<0 - && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) - ){ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ pInfo->idxNum = FTS3_DOCID_SEARCH; pInfo->estimatedCost = 1.0; iCons = i; @@ -1461,14 +1558,38 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ){ iLangidCons = i; } + + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; + + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } + } } + iIdx = 1; if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = 1; + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; pInfo->aConstraintUsage[iCons].omit = 1; } if( iLangidCons>=0 ){ - pInfo->aConstraintUsage[iLangidCons].argvIndex = 2; + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; } /* Regardless of the strategy selected, FTS can deliver rows in rowid (or @@ -1646,10 +1767,10 @@ static int fts3ScanInteriorNode( /* Load the next term on the node into zBuffer. Use realloc() to expand ** the size of zBuffer if required. */ if( !isFirstTerm ){ - zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); + zCsr += fts3GetVarint32(zCsr, &nPrefix); } isFirstTerm = 0; - zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); + zCsr += fts3GetVarint32(zCsr, &nSuffix); if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){ rc = FTS_CORRUPT_VTAB; @@ -1737,7 +1858,7 @@ static int fts3SelectLeaf( assert( piLeaf || piLeaf2 ); - sqlite3Fts3GetVarint32(zNode, &iHeight); + fts3GetVarint32(zNode, &iHeight); rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); @@ -1939,11 +2060,11 @@ static void fts3PoslistMerge( int iCol1; /* The current column index in pp1 */ int iCol2; /* The current column index in pp2 */ - if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1); + if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; else iCol1 = 0; - if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2); + if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; else iCol2 = 0; @@ -2036,11 +2157,11 @@ static int fts3PoslistPhraseMerge( assert( p!=0 && *p1!=0 && *p2!=0 ); if( *p1==POS_COLUMN ){ p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); } if( *p2==POS_COLUMN ){ p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } while( 1 ){ @@ -2090,9 +2211,9 @@ static int fts3PoslistPhraseMerge( if( 0==*p1 || 0==*p2 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } /* Advance pointer p1 or p2 (whichever corresponds to the smaller of @@ -2104,12 +2225,12 @@ static int fts3PoslistPhraseMerge( fts3ColumnlistCopy(0, &p1); if( 0==*p1 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); }else{ fts3ColumnlistCopy(0, &p2); if( 0==*p2 ) break; p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } } @@ -2915,6 +3036,33 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ return rc; } +/* +** The following are copied from sqliteInt.h. +** +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif + +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); + } + } + return iDefault; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -2940,40 +3088,58 @@ static int fts3FilterMethod( ){ int rc; char *zSql; /* SQL statement used to access %_content */ + int eSearch; Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; + UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); - assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( nVal==0 || nVal==1 || nVal==2 ); - assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); + eSearch = (idxNum & 0x0000FFFF); + assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( p->pSegments==0 ); + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); + /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + if( idxStr ){ pCsr->bDesc = (idxStr[0]=='D'); }else{ pCsr->bDesc = p->bDescIdx; } - pCsr->eSearch = (i16)idxNum; + pCsr->eSearch = (i16)eSearch; - if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ - int iCol = idxNum-FTS3_FULLTEXT_SEARCH; - const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); - if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ return SQLITE_NOMEM; } pCsr->iLangid = 0; - if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); assert( p->base.zErrMsg==0 ); rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, @@ -2984,11 +3150,7 @@ static int fts3FilterMethod( return rc; } - rc = sqlite3Fts3ReadLock(p); - if( rc!=SQLITE_OK ) return rc; - rc = fts3EvalStart(pCsr); - sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; @@ -3000,7 +3162,7 @@ static int fts3FilterMethod( ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - if( idxNum==FTS3_FULLSCAN_SEARCH ){ + if( eSearch==FTS3_FULLSCAN_SEARCH ){ zSql = sqlite3_mprintf( "SELECT %s ORDER BY rowid %s", p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") @@ -3011,10 +3173,10 @@ static int fts3FilterMethod( }else{ rc = SQLITE_NOMEM; } - }else if( idxNum==FTS3_DOCID_SEARCH ){ + }else if( eSearch==FTS3_DOCID_SEARCH ){ rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); if( rc==SQLITE_OK ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); } } if( rc!=SQLITE_OK ) return rc; @@ -3142,7 +3304,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; int rc = sqlite3Fts3PendingTermsFlush(p); - if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){ + if( rc==SQLITE_OK + && p->nLeafAdd>(nMinMerge/16) + && p->nAutoincrmerge && p->nAutoincrmerge!=0xff + ){ int mxLevel = 0; /* Maximum relative level value in db */ int A; /* Incr-merge parameter A */ @@ -3150,14 +3315,41 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){ assert( rc==SQLITE_OK || mxLevel==0 ); A = p->nLeafAdd * mxLevel; A += (A/2); - if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8); + if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge); } sqlite3Fts3SegmentsClose(p); return rc; } /* -** Implementation of xBegin() method. This is a no-op. +** If it is currently unknown whether or not the FTS table has an %_stat +** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat +** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +static int fts3SetHasStat(Fts3Table *p){ + int rc = SQLITE_OK; + if( p->bHasStat==2 ){ + const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; + char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); + rc = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) p->bHasStat = bHasStat; + } + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; + } + } + return rc; +} + +/* +** Implementation of xBegin() method. */ static int fts3BeginMethod(sqlite3_vtab *pVtab){ Fts3Table *p = (Fts3Table*)pVtab; @@ -3168,7 +3360,7 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){ TESTONLY( p->inTransaction = 1 ); TESTONLY( p->mxSavepoint = -1; ); p->nLeafAdd = 0; - return SQLITE_OK; + return fts3SetHasStat(p); } /* @@ -3417,6 +3609,10 @@ static int fts3RenameMethod( sqlite3 *db = p->db; /* Database connection */ int rc; /* Return Code */ + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + rc = fts3SetHasStat(p); + /* As it happens, the pending terms table is always empty here. This is ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction ** always opens a savepoint transaction. And the xSavepoint() method @@ -3424,7 +3620,9 @@ static int fts3RenameMethod( ** PendingTermsFlush() in in case that changes. */ assert( p->nPendingData==0 ); - rc = sqlite3Fts3PendingTermsFlush(p); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3PendingTermsFlush(p); + } if( p->zContentTbl==0 ){ fts3DbExec(&rc, db, @@ -3552,7 +3750,7 @@ static void hashDestroy(void *p){ */ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); #endif #ifdef SQLITE_ENABLE_ICU @@ -3570,7 +3768,7 @@ int sqlite3Fts3Init(sqlite3 *db){ Fts3Hash *pHash = 0; const sqlite3_tokenizer_module *pSimple = 0; const sqlite3_tokenizer_module *pPorter = 0; -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE const sqlite3_tokenizer_module *pUnicode = 0; #endif @@ -3579,7 +3777,7 @@ int sqlite3Fts3Init(sqlite3 *db){ sqlite3Fts3IcuTokenizerModule(&pIcu); #endif -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE sqlite3Fts3UnicodeTokenizer(&pUnicode); #endif @@ -3607,7 +3805,7 @@ int sqlite3Fts3Init(sqlite3 *db){ if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) #endif #ifdef SQLITE_ENABLE_ICU @@ -3905,6 +4103,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ return SQLITE_OK; } +/* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + /* ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this @@ -3918,23 +4122,43 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ - int rc; /* Error code */ - Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Error code */ + int i; - if( pCsr->bDesc==pTab->bDescIdx - && bOptOk==1 - && p->nToken==1 - && pFirst->pSegcsr - && pFirst->pSegcsr->bLookup - && pFirst->bFirst==0 - ){ + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } + + if( bIncrOk && bHaveIncr ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); - rc = sqlite3Fts3MsrIncrStart( - pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); + for(i=0; rc==SQLITE_OK && inToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } p->bIncr = 1; - }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); @@ -4044,15 +4268,125 @@ void sqlite3Fts3DoclistNext( } /* -** Attempt to move the phrase iterator to point to the next matching docid. +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof +){ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIterpNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; + + if( pPhrase->iDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; + } + } + + return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. */ -static int fts3EvalPhraseNext( +static int fts3EvalIncrPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ @@ -4060,57 +4394,116 @@ static int fts3EvalPhraseNext( int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; - if( p->bIncr ){ - assert( p->nToken==1 ); - assert( pDL->pNextDocid==0 ); + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); + + if( p->nToken==1 && p->bIncr ){ rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); - if( rc==SQLITE_OK && !pDL->pList ){ - *pbEof = 1; + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistTokennToken && bEof==0; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; + } + } + assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); + assert( rc!=SQLITE_OK || bMaxSet ); + + /* Keep advancing iterators until they all point to the same document */ + for(i=0; inToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+1); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (int)(pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); + } } + } + + *pbEof = bEof; + return rc; +} + +/* +** Attempt to move the phrase iterator to point to the next matching docid. +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +*/ +static int fts3EvalPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ - char *pIter; /* Used to iterate through aAll */ - char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ - if( pDL->pNextDocid ){ - pIter = pDL->pNextDocid; - }else{ - pIter = pDL->aAll; - } - - if( pIter>=pEnd ){ - /* We have already reached the end of this doclist. EOF. */ - *pbEof = 1; - }else{ - sqlite3_int64 iDelta; - pIter += sqlite3Fts3GetVarint(pIter, &iDelta); - if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ - pDL->iDocid += iDelta; - }else{ - pDL->iDocid -= iDelta; - } - pDL->pList = pIter; - fts3PoslistCopy(0, &pIter); - pDL->nList = (int)(pIter - pDL->pList); - - /* pIter now points just past the 0x00 that terminates the position- - ** list for document pDL->iDocid. However, if this position-list was - ** edited in place by fts3EvalNearTrim(), then pIter may not actually - ** point to the start of the next docid value. The following line deals - ** with this case by advancing pIter past the zero-padding added by - ** fts3EvalNearTrim(). */ - while( pIterpNextDocid = pIter; - assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); - *pbEof = 0; - } + fts3EvalDlPhraseNext(pTab, pDL, pbEof); } return rc; @@ -4135,7 +4528,6 @@ static int fts3EvalPhraseNext( static void fts3EvalStartReaders( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Expr *pExpr, /* Expression to initialize phrases in */ - int bOptOk, /* True to enable incremental loading */ int *pRc /* IN/OUT: Error code */ ){ if( pExpr && SQLITE_OK==*pRc ){ @@ -4146,10 +4538,10 @@ static void fts3EvalStartReaders( if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); - *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); }else{ - fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); - fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } @@ -4391,7 +4783,7 @@ static int fts3EvalSelectDeferred( ** overflowing the 32-bit integer it is stored in. */ if( ii<12 ) nLoad4 = nLoad4*4; - if( ii==0 || pTC->pPhrase->nToken>1 ){ + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ /* Either this is the cheapest token in the entire query, or it is ** part of a multi-token phrase. Either way, the entire doclist will ** (eventually) be loaded into memory. It may as well be now. */ @@ -4471,7 +4863,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){ } #endif - fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); + fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); return rc; } @@ -4954,6 +5346,16 @@ static int fts3EvalNext(Fts3Cursor *pCsr){ pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); } + + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevIdiMinDocid) + )){ + pCsr->isEof = 1; + } + return rc; } @@ -4977,12 +5379,16 @@ static void fts3EvalRestart( if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ - assert( pPhrase->nToken==1 ); - assert( pPhrase->aToken[0].pSegcsr ); - sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr); + int i; + for(i=0; inToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); } - pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } @@ -5027,7 +5433,7 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ pExpr->aMI[iCol*3 + 2] += (iCnt>0); if( *p==0x00 ) break; p++; - p += sqlite3Fts3GetVarint32(p, &iCol); + p += fts3GetVarint32(p, &iCol); } } @@ -5231,15 +5637,23 @@ int sqlite3Fts3EvalPhrasePoslist( pIter = pPhrase->doclist.pList; if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int iMul; /* +1 if csr dir matches index dir, else -1 */ int bOr = 0; u8 bEof = 0; - Fts3Expr *p; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ /* Check if this phrase descends from an OR expression node. If not, ** return NULL. Otherwise, the entry that corresponds to docid - ** pCsr->iPrevId may lie earlier in the doclist buffer. */ + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; for(p=pExpr->pParent; p; p=p->pParent){ if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; } if( bOr==0 ) return SQLITE_OK; @@ -5258,29 +5672,59 @@ int sqlite3Fts3EvalPhrasePoslist( assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); if( rc!=SQLITE_OK ) return rc; } - - if( pExpr->bEof ){ - pIter = 0; - iDocid = 0; + + iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); + while( bTreeEof==1 + && pNear->bEof==0 + && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 + ){ + int rc = SQLITE_OK; + fts3EvalNextRow(pCsr, pExpr, &rc); + if( rc!=SQLITE_OK ) return rc; + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; } + bEof = (pPhrase->doclist.nAll==0); assert( bDescDoclist==0 || bDescDoclist==1 ); assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); - if( pCsr->bDesc==bDescDoclist ){ - int dummy; - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ - sqlite3Fts3DoclistPrev( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &dummy, &bEof - ); - } - }else{ - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ - sqlite3Fts3DoclistNext( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &bEof - ); + if( bEof==0 ){ + if( pCsr->bDesc==bDescDoclist ){ + int dummy; + if( pNear->bEof ){ + /* This expression is already at EOF. So position it to point to the + ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable + ** iDocid is already set for this entry, so all that is required is + ** to set pIter to point to the first byte of the last position-list + ** in the doclist. + ** + ** It would also be correct to set pIter and iDocid to zero. In + ** this case, the first call to sqltie3Fts4DoclistPrev() below + ** would also move the iterator to point to the last entry in the + ** doclist. However, this is expensive, as to do so it has to + ** iterate through the entire doclist from start to finish (since + ** it does not know the docid for the last entry). */ + pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; + fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + sqlite3Fts3DoclistPrev( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + }else{ + if( pNear->bEof ){ + pIter = 0; + iDocid = 0; + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } } } @@ -5290,7 +5734,7 @@ int sqlite3Fts3EvalPhrasePoslist( if( *pIter==0x01 ){ pIter++; - pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + pIter += fts3GetVarint32(pIter, &iThis); }else{ iThis = 0; } @@ -5298,7 +5742,7 @@ int sqlite3Fts3EvalPhrasePoslist( fts3ColumnlistCopy(0, &pIter); if( *pIter==0x00 ) return 0; pIter++; - pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + pIter += fts3GetVarint32(pIter, &iThis); } *ppOut = ((iCol==iThis)?pIter:0); @@ -5339,7 +5783,10 @@ int sqlite3Fts3Corrupt(){ /* ** Initialize API pointer table, if required. */ -int sqlite3_extension_init( +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fts3_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index b19064c..b2827b7 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -32,13 +32,25 @@ /* If not building as part of the core, include sqlite3ext.h. */ #ifndef SQLITE_CORE # include "sqlite3ext.h" -extern const sqlite3_api_routines *sqlite3_api; +SQLITE_EXTENSION_INIT3 #endif #include "sqlite3.h" #include "fts3_tokenizer.h" #include "fts3_hash.h" +/* +** This constant determines the maximum depth of an FTS expression tree +** that the library will create and use. FTS uses recursion to perform +** various operations on the query tree, so the disadvantage of a large +** limit is that it may allow very large queries to use large amounts +** of stack space (perhaps causing a stack overflow). +*/ +#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH +# define SQLITE_FTS3_MAX_EXPR_DEPTH 12 +#endif + + /* ** This constant controls how often segments are merged. Once there are ** FTS3_MERGE_COUNT segments of level N, they are merged into a single @@ -194,23 +206,24 @@ struct Fts3Table { const char *zName; /* virtual table name */ int nColumn; /* number of named columns in virtual table */ char **azColumn; /* column names. malloced */ + u8 *abNotindexed; /* True for 'notindexed' columns */ sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ char *zContentTbl; /* content=xxx option, or NULL */ char *zLanguageid; /* languageid=xxx option, or NULL */ - u8 bAutoincrmerge; /* True if automerge=1 */ + int nAutoincrmerge; /* Value configured by 'automerge' */ u32 nLeafAdd; /* Number of leaf blocks added this trans */ /* Precompiled statements used by the implementation. Each of these ** statements is run and reset within a single virtual table API call. */ - sqlite3_stmt *aStmt[37]; + sqlite3_stmt *aStmt[40]; char *zReadExprlist; char *zWriteExprlist; int nNodeSize; /* Soft limit for node size */ u8 bFts4; /* True for FTS4, false for FTS3 */ - u8 bHasStat; /* True if %_stat table exists */ + u8 bHasStat; /* True if %_stat table exists (2==unknown) */ u8 bHasDocsize; /* True if %_docsize table exists */ u8 bDescIdx; /* True if doclists are in reverse order */ u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ @@ -254,6 +267,12 @@ struct Fts3Table { int inTransaction; /* True after xBegin but before xCommit/xRollback */ int mxSavepoint; /* Largest valid xSavepoint integer */ #endif + +#ifdef SQLITE_TEST + /* True to disable the incremental doclist optimization. This is controled + ** by special insert command 'test-no-incr-doclist'. */ + int bNoIncrDoclist; +#endif }; /* @@ -279,7 +298,8 @@ struct Fts3Cursor { int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ sqlite3_int64 nDoc; /* Documents in table */ - + i64 iMinDocid; /* Minimum docid to return */ + i64 iMaxDocid; /* Maximum docid to return */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int nMatchinfo; /* Number of elements in aMatchinfo[] */ @@ -309,6 +329,15 @@ struct Fts3Cursor { #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ +/* +** The lower 16-bits of the sqlite3_index_info.idxNum value set by +** the xBestIndex() method contains the Fts3Cursor.eSearch value described +** above. The upper 16-bits contain a combination of the following +** bits, used to describe extra constraints on full-text searches. +*/ +#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ +#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ +#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ struct Fts3Doclist { char *aAll; /* Array containing doclist (or NULL) */ @@ -421,7 +450,6 @@ int sqlite3Fts3SegReaderPending( Fts3Table*,int,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); -int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); @@ -496,6 +524,10 @@ struct Fts3MultiSegReader { int sqlite3Fts3Incrmerge(Fts3Table*,int,int); +#define fts3GetVarint32(p, piVal) ( \ + (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ +) + /* fts3.c */ int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); @@ -553,7 +585,7 @@ int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); /* fts3_unicode2.c (functions generated by parsing unicode text files) */ -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE int sqlite3FtsUnicodeFold(int, int); int sqlite3FtsUnicodeIsalnum(int); int sqlite3FtsUnicodeIsdiacritic(int); diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c index 9b582fc..c68b1a9 100644 --- a/ext/fts3/fts3_aux.c +++ b/ext/fts3/fts3_aux.c @@ -31,6 +31,7 @@ struct Fts3auxCursor { Fts3SegFilter filter; char *zStop; int nStop; /* Byte-length of string zStop */ + int iLangid; /* Language id to query */ int isEof; /* True if cursor is at EOF */ sqlite3_int64 iRowid; /* Current rowid */ @@ -45,7 +46,8 @@ struct Fts3auxCursor { /* ** Schema of the terms table. */ -#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)" +#define FTS3_AUX_SCHEMA \ + "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)" /* ** This function does all the work for both the xConnect and xCreate methods. @@ -92,7 +94,7 @@ static int fts3auxConnectMethod( } nFts3 = (int)strlen(zFts3); - rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); + rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA); if( rc!=SQLITE_OK ) return rc; nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; @@ -152,6 +154,8 @@ static int fts3auxBestIndexMethod( int iEq = -1; int iGe = -1; int iLe = -1; + int iLangid = -1; + int iNext = 1; /* Next free argvIndex value */ UNUSED_PARAMETER(pVTab); @@ -163,36 +167,48 @@ static int fts3auxBestIndexMethod( pInfo->orderByConsumed = 1; } - /* Search for equality and range constraints on the "term" column. */ + /* Search for equality and range constraints on the "term" column. + ** And equality constraints on the hidden "languageid" column. */ for(i=0; inConstraint; i++){ - if( pInfo->aConstraint[i].usable && pInfo->aConstraint[i].iColumn==0 ){ + if( pInfo->aConstraint[i].usable ){ int op = pInfo->aConstraint[i].op; - if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; - if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; - if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; - if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; - if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; + int iCol = pInfo->aConstraint[i].iColumn; + + if( iCol==0 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; + if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; + if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; + } + if( iCol==4 ){ + if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; + } } } if( iEq>=0 ){ pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; - pInfo->aConstraintUsage[iEq].argvIndex = 1; + pInfo->aConstraintUsage[iEq].argvIndex = iNext++; pInfo->estimatedCost = 5; }else{ pInfo->idxNum = 0; pInfo->estimatedCost = 20000; if( iGe>=0 ){ pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; - pInfo->aConstraintUsage[iGe].argvIndex = 1; + pInfo->aConstraintUsage[iGe].argvIndex = iNext++; pInfo->estimatedCost /= 2; } if( iLe>=0 ){ pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; - pInfo->aConstraintUsage[iLe].argvIndex = 1 + (iGe>=0); + pInfo->aConstraintUsage[iLe].argvIndex = iNext++; pInfo->estimatedCost /= 2; } } + if( iLangid>=0 ){ + pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; + pInfo->estimatedCost--; + } return SQLITE_OK; } @@ -352,7 +368,14 @@ static int fts3auxFilterMethod( Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor; Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab; int rc; - int isScan; + int isScan = 0; + int iLangVal = 0; /* Language id to query */ + + int iEq = -1; /* Index of term=? value in apVal */ + int iGe = -1; /* Index of term>=? value in apVal */ + int iLe = -1; /* Index of term<=? value in apVal */ + int iLangid = -1; /* Index of languageid=? value in apVal */ + int iNext = 0; UNUSED_PARAMETER(nVal); UNUSED_PARAMETER(idxStr); @@ -362,7 +385,21 @@ static int fts3auxFilterMethod( || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ); - isScan = (idxNum!=FTS4AUX_EQ_CONSTRAINT); + + if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ + iEq = iNext++; + }else{ + isScan = 1; + if( idxNum & FTS4AUX_GE_CONSTRAINT ){ + iGe = iNext++; + } + if( idxNum & FTS4AUX_LE_CONSTRAINT ){ + iLe = iNext++; + } + } + if( iNextfilter.zTerm); @@ -374,22 +411,35 @@ static int fts3auxFilterMethod( pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; - if( idxNum&(FTS4AUX_EQ_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ){ + if( iEq>=0 || iGe>=0 ){ const unsigned char *zStr = sqlite3_value_text(apVal[0]); + assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) ); if( zStr ){ pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr); pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]); if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM; } } - if( idxNum&FTS4AUX_LE_CONSTRAINT ){ - int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0; - pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx])); - pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]); + + if( iLe>=0 ){ + pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); + pCsr->nStop = sqlite3_value_bytes(apVal[iLe]); if( pCsr->zStop==0 ) return SQLITE_NOMEM; } + + if( iLangid>=0 ){ + iLangVal = sqlite3_value_int(apVal[iLangid]); + + /* If the user specified a negative value for the languageid, use zero + ** instead. This works, as the "languageid=?" constraint will also + ** be tested by the VDBE layer. The test will always be false (since + ** this module will not return a row with a negative languageid), and + ** so the overall query will return zero rows. */ + if( iLangVal<0 ) iLangVal = 0; + } + pCsr->iLangid = iLangVal; - rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL, + rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL, pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr ); if( rc==SQLITE_OK ){ @@ -413,24 +463,37 @@ static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){ */ static int fts3auxColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts3auxCursor *p = (Fts3auxCursor *)pCursor; assert( p->isEof==0 ); - if( iCol==0 ){ /* Column "term" */ - sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); - }else if( iCol==1 ){ /* Column "col" */ - if( p->iCol ){ - sqlite3_result_int(pContext, p->iCol-1); - }else{ - sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC); - } - }else if( iCol==2 ){ /* Column "documents" */ - sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc); - }else{ /* Column "occurrences" */ - sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc); + switch( iCol ){ + case 0: /* term */ + sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); + break; + + case 1: /* col */ + if( p->iCol ){ + sqlite3_result_int(pCtx, p->iCol-1); + }else{ + sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); + } + break; + + case 2: /* documents */ + sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); + break; + + case 3: /* occurrences */ + sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); + break; + + default: /* languageid */ + assert( iCol==4 ); + sqlite3_result_int(pCtx, p->iLangid); + break; } return SQLITE_OK; diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index c046d7d..f5d28cb 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -155,6 +155,11 @@ int sqlite3Fts3OpenTokenizer( return rc; } +/* +** Function getNextNode(), which is called by fts3ExprParse(), may itself +** call fts3ExprParse(). So this forward declaration is required. +*/ +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); /* ** Extract the next token from buffer z (length n) using the tokenizer @@ -180,9 +185,16 @@ static int getNextToken( int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; - int nConsumed = 0; + int i = 0; + + /* Set variable i to the maximum number of bytes of input to tokenize. */ + for(i=0; iiLangid, z, n, &pCursor); + *pnConsumed = i; + rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; @@ -223,13 +235,14 @@ static int getNextToken( } } - nConsumed = iEnd; + *pnConsumed = iEnd; + }else if( i && rc==SQLITE_DONE ){ + rc = SQLITE_OK; } pModule->xClose(pCursor); } - *pnConsumed = nConsumed; *ppExpr = pRet; return rc; } @@ -369,12 +382,6 @@ no_mem: return SQLITE_NOMEM; } -/* -** Function getNextNode(), which is called by fts3ExprParse(), may itself -** call fts3ExprParse(). So this forward declaration is required. -*/ -static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); - /* ** The output variable *ppExpr is populated with an allocated Fts3Expr ** structure, or set to 0 if the end of the input buffer is reached. @@ -471,27 +478,6 @@ static int getNextNode( } } - /* Check for an open bracket. */ - if( sqlite3_fts3_enable_parentheses ){ - if( *zInput=='(' ){ - int nConsumed; - pParse->nNest++; - rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); - if( rc==SQLITE_OK && !*ppExpr ){ - rc = SQLITE_DONE; - } - *pnConsumed = (int)((zInput - z) + 1 + nConsumed); - return rc; - } - - /* Check for a close bracket. */ - if( *zInput==')' ){ - pParse->nNest--; - *pnConsumed = (int)((zInput - z) + 1); - return SQLITE_DONE; - } - } - /* See if we are dealing with a quoted phrase. If this is the case, then ** search for the closing quote and pass the whole string to getNextString() ** for processing. This is easy to do, as fts3 has no syntax for escaping @@ -506,6 +492,21 @@ static int getNextNode( return getNextString(pParse, &zInput[1], ii-1, ppExpr); } + if( sqlite3_fts3_enable_parentheses ){ + if( *zInput=='(' ){ + int nConsumed = 0; + pParse->nNest++; + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); + if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } + *pnConsumed = (int)(zInput - z) + 1 + nConsumed; + return rc; + }else if( *zInput==')' ){ + pParse->nNest--; + *pnConsumed = (int)((zInput - z) + 1); + *ppExpr = 0; + return SQLITE_DONE; + } + } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer @@ -624,96 +625,100 @@ static int fts3ExprParse( while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; + rc = getNextNode(pParse, zIn, nIn, &p, &nByte); + assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); if( rc==SQLITE_OK ){ - int isPhrase; - - if( !sqlite3_fts3_enable_parentheses - && p->eType==FTSQUERY_PHRASE && pParse->isNot - ){ - /* Create an implicit NOT operator. */ - Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); - if( !pNot ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pNot->eType = FTSQUERY_NOT; - pNot->pRight = p; - p->pParent = pNot; - if( pNotBranch ){ - pNot->pLeft = pNotBranch; - pNotBranch->pParent = pNot; - } - pNotBranch = pNot; - p = pPrev; - }else{ - int eType = p->eType; - isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); - - /* The isRequirePhrase variable is set to true if a phrase or - ** an expression contained in parenthesis is required. If a - ** binary operator (AND, OR, NOT or NEAR) is encounted when - ** isRequirePhrase is set, this is a syntax error. - */ - if( !isPhrase && isRequirePhrase ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase && !isRequirePhrase ){ - /* Insert an implicit AND operator. */ - Fts3Expr *pAnd; - assert( pRet && pPrev ); - pAnd = fts3MallocZero(sizeof(Fts3Expr)); - if( !pAnd ){ + if( p ){ + int isPhrase; + + if( !sqlite3_fts3_enable_parentheses + && p->eType==FTSQUERY_PHRASE && pParse->isNot + ){ + /* Create an implicit NOT operator. */ + Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); + if( !pNot ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } - pAnd->eType = FTSQUERY_AND; - insertBinaryOperator(&pRet, pPrev, pAnd); - pPrev = pAnd; - } + pNot->eType = FTSQUERY_NOT; + pNot->pRight = p; + p->pParent = pNot; + if( pNotBranch ){ + pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; + } + pNotBranch = pNot; + p = pPrev; + }else{ + int eType = p->eType; + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + + /* The isRequirePhrase variable is set to true if a phrase or + ** an expression contained in parenthesis is required. If a + ** binary operator (AND, OR, NOT or NEAR) is encounted when + ** isRequirePhrase is set, this is a syntax error. + */ + if( !isPhrase && isRequirePhrase ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase && !isRequirePhrase ){ + /* Insert an implicit AND operator. */ + Fts3Expr *pAnd; + assert( pRet && pPrev ); + pAnd = fts3MallocZero(sizeof(Fts3Expr)); + if( !pAnd ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pAnd->eType = FTSQUERY_AND; + insertBinaryOperator(&pRet, pPrev, pAnd); + pPrev = pAnd; + } - /* This test catches attempts to make either operand of a NEAR - ** operator something other than a phrase. For example, either of - ** the following: - ** - ** (bracketed expression) NEAR phrase - ** phrase NEAR (bracketed expression) - ** - ** Return an error in either case. - */ - if( pPrev && ( + /* This test catches attempts to make either operand of a NEAR + ** operator something other than a phrase. For example, either of + ** the following: + ** + ** (bracketed expression) NEAR phrase + ** phrase NEAR (bracketed expression) + ** + ** Return an error in either case. + */ + if( pPrev && ( (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) - )){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase ){ - if( pRet ){ - assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); - pPrev->pRight = p; - p->pParent = pPrev; + )){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase ){ + if( pRet ){ + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); + pPrev->pRight = p; + p->pParent = pPrev; + }else{ + pRet = p; + } }else{ - pRet = p; + insertBinaryOperator(&pRet, pPrev, p); } - }else{ - insertBinaryOperator(&pRet, pPrev, p); + isRequirePhrase = !isPhrase; } - isRequirePhrase = !isPhrase; + pPrev = p; } assert( nByte>0 ); } assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); nIn -= nByte; zIn += nByte; - pPrev = p; } if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ @@ -1000,17 +1005,16 @@ int sqlite3Fts3ExprParse( Fts3Expr **ppExpr, /* OUT: Parsed query structure */ char **pzErr /* OUT: Error message (sqlite3_malloc) */ ){ - static const int MAX_EXPR_DEPTH = 12; int rc = fts3ExprParseUnbalanced( pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr ); /* Rebalance the expression. And check that its depth does not exceed - ** MAX_EXPR_DEPTH. */ + ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ if( rc==SQLITE_OK && *ppExpr ){ - rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); + rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); if( rc==SQLITE_OK ){ - rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); + rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); } } @@ -1019,7 +1023,8 @@ int sqlite3Fts3ExprParse( *ppExpr = 0; if( rc==SQLITE_TOOBIG ){ *pzErr = sqlite3_mprintf( - "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH + "FTS expression tree is too large (maximum depth %d)", + SQLITE_FTS3_MAX_EXPR_DEPTH ); rc = SQLITE_ERROR; }else if( rc==SQLITE_ERROR ){ diff --git a/ext/fts3/fts3_hash.c b/ext/fts3/fts3_hash.c index 57c59b5..1a32a53 100644 --- a/ext/fts3/fts3_hash.c +++ b/ext/fts3/fts3_hash.c @@ -96,13 +96,13 @@ void sqlite3Fts3HashClear(Fts3Hash *pH){ */ static int fts3StrHash(const void *pKey, int nKey){ const char *z = (const char *)pKey; - int h = 0; + unsigned h = 0; if( nKey<=0 ) nKey = (int) strlen(z); while( nKey > 0 ){ h = (h<<3) ^ h ^ *z++; nKey--; } - return h & 0x7fffffff; + return (int)(h & 0x7fffffff); } static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ if( n1!=n2 ) return 1; diff --git a/ext/fts3/fts3_porter.c b/ext/fts3/fts3_porter.c index 579745b..db175ac 100644 --- a/ext/fts3/fts3_porter.c +++ b/ext/fts3/fts3_porter.c @@ -403,12 +403,14 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ /* Step 2 */ switch( z[1] ){ case 'a': - stem(&z, "lanoita", "ate", m_gt_0) || - stem(&z, "lanoit", "tion", m_gt_0); + if( !stem(&z, "lanoita", "ate", m_gt_0) ){ + stem(&z, "lanoit", "tion", m_gt_0); + } break; case 'c': - stem(&z, "icne", "ence", m_gt_0) || - stem(&z, "icna", "ance", m_gt_0); + if( !stem(&z, "icne", "ence", m_gt_0) ){ + stem(&z, "icna", "ance", m_gt_0); + } break; case 'e': stem(&z, "rezi", "ize", m_gt_0); @@ -417,43 +419,54 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ stem(&z, "igol", "log", m_gt_0); break; case 'l': - stem(&z, "ilb", "ble", m_gt_0) || - stem(&z, "illa", "al", m_gt_0) || - stem(&z, "iltne", "ent", m_gt_0) || - stem(&z, "ile", "e", m_gt_0) || - stem(&z, "ilsuo", "ous", m_gt_0); + if( !stem(&z, "ilb", "ble", m_gt_0) + && !stem(&z, "illa", "al", m_gt_0) + && !stem(&z, "iltne", "ent", m_gt_0) + && !stem(&z, "ile", "e", m_gt_0) + ){ + stem(&z, "ilsuo", "ous", m_gt_0); + } break; case 'o': - stem(&z, "noitazi", "ize", m_gt_0) || - stem(&z, "noita", "ate", m_gt_0) || - stem(&z, "rota", "ate", m_gt_0); + if( !stem(&z, "noitazi", "ize", m_gt_0) + && !stem(&z, "noita", "ate", m_gt_0) + ){ + stem(&z, "rota", "ate", m_gt_0); + } break; case 's': - stem(&z, "msila", "al", m_gt_0) || - stem(&z, "ssenevi", "ive", m_gt_0) || - stem(&z, "ssenluf", "ful", m_gt_0) || - stem(&z, "ssensuo", "ous", m_gt_0); + if( !stem(&z, "msila", "al", m_gt_0) + && !stem(&z, "ssenevi", "ive", m_gt_0) + && !stem(&z, "ssenluf", "ful", m_gt_0) + ){ + stem(&z, "ssensuo", "ous", m_gt_0); + } break; case 't': - stem(&z, "itila", "al", m_gt_0) || - stem(&z, "itivi", "ive", m_gt_0) || - stem(&z, "itilib", "ble", m_gt_0); + if( !stem(&z, "itila", "al", m_gt_0) + && !stem(&z, "itivi", "ive", m_gt_0) + ){ + stem(&z, "itilib", "ble", m_gt_0); + } break; } /* Step 3 */ switch( z[0] ){ case 'e': - stem(&z, "etaci", "ic", m_gt_0) || - stem(&z, "evita", "", m_gt_0) || - stem(&z, "ezila", "al", m_gt_0); + if( !stem(&z, "etaci", "ic", m_gt_0) + && !stem(&z, "evita", "", m_gt_0) + ){ + stem(&z, "ezila", "al", m_gt_0); + } break; case 'i': stem(&z, "itici", "ic", m_gt_0); break; case 'l': - stem(&z, "laci", "ic", m_gt_0) || - stem(&z, "luf", "", m_gt_0); + if( !stem(&z, "laci", "ic", m_gt_0) ){ + stem(&z, "luf", "", m_gt_0); + } break; case 's': stem(&z, "ssen", "", m_gt_0); @@ -494,9 +507,11 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ z += 3; } }else if( z[2]=='e' ){ - stem(&z, "tneme", "", m_gt_1) || - stem(&z, "tnem", "", m_gt_1) || - stem(&z, "tne", "", m_gt_1); + if( !stem(&z, "tneme", "", m_gt_1) + && !stem(&z, "tnem", "", m_gt_1) + ){ + stem(&z, "tne", "", m_gt_1); + } } } break; @@ -515,8 +530,9 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ } break; case 't': - stem(&z, "eta", "", m_gt_1) || - stem(&z, "iti", "", m_gt_1); + if( !stem(&z, "eta", "", m_gt_1) ){ + stem(&z, "iti", "", m_gt_1); + } break; case 'u': if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index d54a787..aa8779f 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -128,7 +128,7 @@ struct StrBuffer { */ static void fts3GetDeltaPosition(char **pp, int *piPos){ int iVal; - *pp += sqlite3Fts3GetVarint32(*pp, &iVal); + *pp += fts3GetVarint32(*pp, &iVal); *piPos += (iVal-2); } @@ -504,6 +504,7 @@ static int fts3StringAppend( pStr->z = zNew; pStr->nAlloc = nAlloc; } + assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); /* Append the data to the string buffer. */ memcpy(&pStr->z[pStr->n], zAppend, nAppend); diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c index 75ec6bd..36dcc94 100644 --- a/ext/fts3/fts3_test.c +++ b/ext/fts3/fts3_test.c @@ -517,6 +517,51 @@ static int fts3_test_tokenizer_cmd( return TCL_OK; } +static int fts3_test_varint_cmd( + ClientData clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ +#ifdef SQLITE_ENABLE_FTS3 + char aBuf[24]; + int rc; + Tcl_WideInt w, w2; + int nByte, nByte2; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "INTEGER"); + return TCL_ERROR; + } + + rc = Tcl_GetWideIntFromObj(interp, objv[1], &w); + if( rc!=TCL_OK ) return rc; + + nByte = sqlite3Fts3PutVarint(aBuf, w); + nByte2 = sqlite3Fts3GetVarint(aBuf, &w2); + if( w!=w2 || nByte!=nByte2 ){ + char *zErr = sqlite3_mprintf("error testing %lld", w); + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, zErr, 0); + return TCL_ERROR; + } + + if( w<=2147483647 && w>=0 ){ + int i; + nByte2 = fts3GetVarint32(aBuf, &i); + if( (int)w!=i || nByte!=nByte2 ){ + char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w); + Tcl_ResetResult(interp); + Tcl_AppendResult(interp, zErr, 0); + return TCL_ERROR; + } + } + +#endif + UNUSED_PARAMETER(clientData); + return TCL_OK; +} + /* ** End of tokenizer code. **************************************************************************/ @@ -529,6 +574,10 @@ int Sqlitetestfts3_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand( interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0 ); + + Tcl_CreateObjCommand( + interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0 + ); return TCL_OK; } #endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */ diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index 188358e..94fc27b 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -13,7 +13,7 @@ ** Implementation of the "unicode" full-text-search tokenizer. */ -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE #include "fts3Int.h" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) @@ -231,7 +231,7 @@ static int unicodeCreate( for(i=0; rc==SQLITE_OK && ibRemoveDiacritic = 1; @@ -318,7 +318,7 @@ static int unicodeNext( ){ unicode_cursor *pCsr = (unicode_cursor *)pC; unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); - int iCode; + int iCode = 0; char *zOut; const unsigned char *z = &pCsr->aInput[pCsr->iOff]; const unsigned char *zStart = z; @@ -363,11 +363,11 @@ static int unicodeNext( ); /* Set the output variables and return. */ - pCsr->iOff = (z - pCsr->aInput); + pCsr->iOff = (int)(z - pCsr->aInput); *paToken = pCsr->zToken; - *pnToken = zOut - pCsr->zToken; - *piStart = (zStart - pCsr->aInput); - *piEnd = (zEnd - pCsr->aInput); + *pnToken = (int)(zOut - pCsr->zToken); + *piStart = (int)(zStart - pCsr->aInput); + *piEnd = (int)(zEnd - pCsr->aInput); *piPos = pCsr->iToken++; return SQLITE_OK; } @@ -390,4 +390,4 @@ void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */ +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index 3c24569..20b7a25 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -15,7 +15,7 @@ ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ -#if defined(SQLITE_ENABLE_FTS4_UNICODE61) +#ifndef SQLITE_DISABLE_FTS3_UNICODE #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) #include @@ -39,7 +39,7 @@ int sqlite3FtsUnicodeIsalnum(int c){ ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ - const static unsigned int aEntry[] = { + static const unsigned int aEntry[] = { 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, @@ -101,28 +101,27 @@ int sqlite3FtsUnicodeIsalnum(int c){ 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, - 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, - 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, - 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, - 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, - 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, - 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, - 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, - 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, - 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, - 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, - 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, - 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, - 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, - 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, - 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, - 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, - 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, - 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, - 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, - 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, - 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, - 0x43FFF401, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, }; static const unsigned int aAscii[4] = { 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, @@ -132,7 +131,7 @@ int sqlite3FtsUnicodeIsalnum(int c){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; - int iRes; + int iRes = 0; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ @@ -203,7 +202,7 @@ static int remove_diacritic(int c){ } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); -}; +} /* @@ -363,4 +362,4 @@ int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ return ret; } #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ -#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */ +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 269d1dd..0da08c6 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -193,6 +193,7 @@ struct SegmentWriter { int nSize; /* Size of allocation at aData */ int nData; /* Bytes of data in aData */ char *aData; /* Pointer to block from malloc() */ + i64 nLeafData; /* Number of bytes of leaf data written */ }; /* @@ -268,6 +269,10 @@ struct SegmentNode { #define SQL_SELECT_INDEXES 35 #define SQL_SELECT_MXLEVEL 36 +#define SQL_SELECT_LEVEL_RANGE2 37 +#define SQL_UPDATE_LEVEL_IDX 38 +#define SQL_UPDATE_LEVEL 39 + /* ** This function is used to obtain an SQLite prepared statement handle ** for the statement identified by the second argument. If successful, @@ -369,7 +374,18 @@ static int fts3SqlStmt( /* SQL_SELECT_MXLEVEL ** Return the largest relative level in the FTS index or indexes. */ -/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'" +/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + + /* Return segments in order from oldest to newest.*/ +/* 37 */ "SELECT level, idx, end_block " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " + "ORDER BY level DESC, idx ASC", + + /* Update statements used while promoting segments */ +/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " + "WHERE level=? AND idx=?", +/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + }; int rc = SQLITE_OK; sqlite3_stmt *pStmt; @@ -489,37 +505,30 @@ static void fts3SqlExec( /* -** This function ensures that the caller has obtained a shared-cache -** table-lock on the %_content table. This is required before reading -** data from the fts3 table. If this lock is not acquired first, then -** the caller may end up holding read-locks on the %_segments and %_segdir -** tables, but no read-lock on the %_content table. If this happens -** a second connection will be able to write to the fts3 table, but -** attempting to commit those writes might return SQLITE_LOCKED or -** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain -** write-locks on the %_segments and %_segdir ** tables). -** -** We try to avoid this because if FTS3 returns any error when committing -** a transaction, the whole transaction will be rolled back. And this is -** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can -** still happen if the user reads data directly from the %_segments or -** %_segdir tables instead of going through FTS3 though. +** This function ensures that the caller has obtained an exclusive +** shared-cache table-lock on the %_segdir table. This is required before +** writing data to the fts3 table. If this lock is not acquired first, then +** the caller may end up attempting to take this lock as part of committing +** a transaction, causing SQLite to return SQLITE_LOCKED or +** LOCKED_SHAREDCACHEto a COMMIT command. ** -** This reasoning does not apply to a content=xxx table. +** It is best to avoid this because if FTS3 returns any error when +** committing a transaction, the whole transaction will be rolled back. +** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. +** It can still happen if the user locks the underlying tables directly +** instead of accessing them via FTS. */ -int sqlite3Fts3ReadLock(Fts3Table *p){ - int rc; /* Return code */ - sqlite3_stmt *pStmt; /* Statement used to obtain lock */ - - if( p->zContentTbl==0 ){ - rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0); +static int fts3Writelock(Fts3Table *p){ + int rc = SQLITE_OK; + + if( p->nPendingData==0 ){ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); if( rc==SQLITE_OK ){ sqlite3_bind_null(pStmt, 1); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); } - }else{ - rc = SQLITE_OK; } return rc; @@ -907,12 +916,15 @@ static int fts3InsertTerms( ){ int i; /* Iterator variable */ for(i=2; inColumn+2; i++){ - const char *zText = (const char *)sqlite3_value_text(apVal[i]); - int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]); - if( rc!=SQLITE_OK ){ - return rc; + int iCol = i-2; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_value_text(apVal[i]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); + if( rc!=SQLITE_OK ){ + return rc; + } + aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); } - aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); } return SQLITE_OK; } @@ -1059,9 +1071,12 @@ static void fts3DeleteTerms( int iLangid = langidFromSelect(p, pSelect); rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ - const char *zText = (const char *)sqlite3_column_text(pSelect, i); - rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]); - aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + int iCol = i-1; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pSelect, i); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + } } if( rc!=SQLITE_OK ){ sqlite3_reset(pSelect); @@ -1345,8 +1360,8 @@ static int fts3SegReaderNext( /* Because of the FTS3_NODE_PADDING bytes of padding, the following is ** safe (no risk of overread) even if the node data is corrupted. */ - pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix); - pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix); + pNext += fts3GetVarint32(pNext, &nPrefix); + pNext += fts3GetVarint32(pNext, &nSuffix); if( nPrefix<0 || nSuffix<=0 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] ){ @@ -1369,7 +1384,7 @@ static int fts3SegReaderNext( memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); pReader->nTerm = nPrefix+nSuffix; pNext += nSuffix; - pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist); + pNext += fts3GetVarint32(pNext, &pReader->nDoclist); pReader->aDoclist = pNext; pReader->pOffsetList = 0; @@ -1462,7 +1477,7 @@ static int fts3SegReaderNextDocid( /* The following line of code (and the "p++" below the while() loop) is ** normally all that is required to move pointer p to the desired ** position. The exception is if this node is being loaded from disk - ** incrementally and pointer "p" now points to the first byte passed + ** incrementally and pointer "p" now points to the first byte past ** the populated part of pReader->aNode[]. */ while( *p | c ) c = *p++ & 0x80; @@ -1911,6 +1926,7 @@ static int fts3WriteSegdir( sqlite3_int64 iStartBlock, /* Value for "start_block" field */ sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ sqlite3_int64 iEndBlock, /* Value for "end_block" field */ + sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ char *zRoot, /* Blob value for "root" field */ int nRoot /* Number of bytes in buffer zRoot */ ){ @@ -1921,7 +1937,13 @@ static int fts3WriteSegdir( sqlite3_bind_int(pStmt, 2, iIdx); sqlite3_bind_int64(pStmt, 3, iStartBlock); sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); - sqlite3_bind_int64(pStmt, 5, iEndBlock); + if( nLeafData==0 ){ + sqlite3_bind_int64(pStmt, 5, iEndBlock); + }else{ + char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); + if( !zEnd ) return SQLITE_NOMEM; + sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + } sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); @@ -2247,6 +2269,9 @@ static int fts3SegWriterAdd( nDoclist; /* Doclist data */ } + /* Increase the total number of bytes written to account for the new entry. */ + pWriter->nLeafData += nReq; + /* If the buffer currently allocated is too small for this entry, realloc ** the buffer to make it large enough. */ @@ -2318,13 +2343,13 @@ static int fts3SegWriterFlush( pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); } if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir( - p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot); + rc = fts3WriteSegdir(p, iLevel, iIdx, + pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); } }else{ /* The entire tree fits on the root node. Write it to the segdir table. */ - rc = fts3WriteSegdir( - p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); + rc = fts3WriteSegdir(p, iLevel, iIdx, + 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); } p->nLeafAdd++; return rc; @@ -2408,6 +2433,37 @@ static int fts3SegmentMaxLevel( return sqlite3_reset(pStmt); } +/* +** iAbsLevel is an absolute level that may be assumed to exist within +** the database. This function checks if it is the largest level number +** within its index. Assuming no error occurs, *pbMax is set to 1 if +** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK +** is returned. If an error occurs, an error code is returned and the +** final value of *pbMax is undefined. +*/ +static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ + + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); + sqlite3_bind_int64(pStmt, 2, + ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL + ); + + *pbMax = 0; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; + } + return sqlite3_reset(pStmt); +} + /* ** Delete all entries in the %_segments table associated with the segment ** opened with seg-reader pSeg. This function does not affect the contents @@ -2530,7 +2586,7 @@ static void fts3ColumnFilter( break; } p = &pList[1]; - p += sqlite3Fts3GetVarint32(p, &iCurrent); + p += fts3GetVarint32(p, &iCurrent); } if( bZero && &pList[nList]!=pEnd ){ @@ -2849,8 +2905,8 @@ int sqlite3Fts3SegReaderStep( fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp); while( apSegment[0]->pOffsetList ){ int j; /* Number of segments that share a docid */ - char *pList; - int nList; + char *pList = 0; + int nList = 0; int nByte; sqlite3_int64 iDocid = apSegment[0]->iDocid; fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); @@ -2943,6 +2999,140 @@ void sqlite3Fts3SegReaderFinish( } } +/* +** Decode the "end_block" field, selected by column iCol of the SELECT +** statement passed as the first argument. +** +** The "end_block" field may contain either an integer, or a text field +** containing the text representation of two non-negative integers separated +** by one or more space (0x20) characters. In the first case, set *piEndBlock +** to the integer value and *pnByte to zero before returning. In the second, +** set *piEndBlock to the first value and *pnByte to the second. +*/ +static void fts3ReadEndBlockField( + sqlite3_stmt *pStmt, + int iCol, + i64 *piEndBlock, + i64 *pnByte +){ + const unsigned char *zText = sqlite3_column_text(pStmt, iCol); + if( zText ){ + int i; + int iMul = 1; + i64 iVal = 0; + for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *piEndBlock = iVal; + while( zText[i]==' ' ) i++; + iVal = 0; + if( zText[i]=='-' ){ + i++; + iMul = -1; + } + for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *pnByte = (iVal * (i64)iMul); + } +} + + +/* +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. +*/ +static int fts3PromoteSegments( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level just updated */ + sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ +){ + int rc = SQLITE_OK; + sqlite3_stmt *pRange; + + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); + + if( rc==SQLITE_OK ){ + int bOk = 0; + i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; + i64 nLimit = (nByte*3)/2; + + /* Loop through all entries in the %_segdir table corresponding to + ** segments in this index on levels greater than iAbsLevel. If there is + ** at least one such segment, and it is possible to determine that all + ** such segments are smaller than nLimit bytes in size, they will be + ** promoted to level iAbsLevel. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel+1); + sqlite3_bind_int64(pRange, 2, iLast); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + i64 nSize = 0, dummy; + fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); + if( nSize<=0 || nSize>nLimit ){ + /* If nSize==0, then the %_segdir.end_block field does not not + ** contain a size value. This happens if it was written by an + ** old version of FTS. In this case it is not possible to determine + ** the size of the segment, and so segment promotion does not + ** take place. */ + bOk = 0; + break; + } + bOk = 1; + } + rc = sqlite3_reset(pRange); + + if( bOk ){ + int iIdx = 0; + sqlite3_stmt *pUpdate1; + sqlite3_stmt *pUpdate2; + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); + } + + if( rc==SQLITE_OK ){ + + /* Loop through all %_segdir entries for segments in this index with + ** levels equal to or greater than iAbsLevel. As each entry is visited, + ** updated it to set (level = -1) and (idx = N), where N is 0 for the + ** oldest segment in the range, 1 for the next oldest, and so on. + ** + ** In other words, move all segments being promoted to level -1, + ** setting the "idx" fields as appropriate to keep them in the same + ** order. The contents of level -1 (which is never used, except + ** transiently here), will be moved back to level iAbsLevel below. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + sqlite3_bind_int(pUpdate1, 1, iIdx++); + sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); + sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); + sqlite3_step(pUpdate1); + rc = sqlite3_reset(pUpdate1); + if( rc!=SQLITE_OK ){ + sqlite3_reset(pRange); + break; + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3_reset(pRange); + } + + /* Move level -1 to level iAbsLevel */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); + sqlite3_step(pUpdate2); + rc = sqlite3_reset(pUpdate2); + } + } + } + + + return rc; +} + /* ** Merge all level iLevel segments in the database into a single ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a @@ -2967,6 +3157,7 @@ static int fts3SegmentMerge( Fts3SegFilter filter; /* Segment term filter condition */ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ + i64 iMaxLevel = 0; /* Max level number for this index/langid */ assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING @@ -2978,6 +3169,11 @@ static int fts3SegmentMerge( rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); + if( rc!=SQLITE_OK ) goto finished; + } + if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the numerically @@ -2987,21 +3183,21 @@ static int fts3SegmentMerge( rc = SQLITE_DONE; goto finished; } - rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel); + iNewLevel = iMaxLevel; bIgnoreEmpty = 1; - }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ - iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0); - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx); }else{ /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + assert( FTS3_SEGCURSOR_PENDING==-1 ); iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } if( rc!=SQLITE_OK ) goto finished; + assert( csr.nSegment>0 ); assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); assert( iNewLevelnLeafData); + } + } + } finished: fts3SegWriterFree(pWriter); @@ -3036,7 +3239,7 @@ static int fts3SegmentMerge( /* -** Flush the contents of pendingTerms to level 0 segments. +** Flush the contents of pendingTerms to level 0 segments. */ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ int rc = SQLITE_OK; @@ -3052,14 +3255,19 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ ** estimate the number of leaf blocks of content to be written */ if( rc==SQLITE_OK && p->bHasStat - && p->bAutoincrmerge==0xff && p->nLeafAdd>0 + && p->nAutoincrmerge==0xff && p->nLeafAdd>0 ){ sqlite3_stmt *pStmt = 0; rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); rc = sqlite3_step(pStmt); - p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0)); + if( rc==SQLITE_ROW ){ + p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); + if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; + }else if( rc==SQLITE_DONE ){ + p->nAutoincrmerge = 0; + } rc = sqlite3_reset(pStmt); } } @@ -3303,9 +3511,11 @@ static int fts3DoRebuild(Fts3Table *p){ rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ - const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); - rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); + if( p->abNotindexed[iCol]==0 ){ + const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); + } } if( p->bHasDocsize ){ fts3InsertDocsize(&rc, p, aSz); @@ -3425,6 +3635,8 @@ struct IncrmergeWriter { int iIdx; /* Index of *output* segment in iAbsLevel+1 */ sqlite3_int64 iStart; /* Block number of first allocated block */ sqlite3_int64 iEnd; /* Block number of last allocated block */ + sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ + u8 bNoLeafData; /* If true, store 0 for segment size */ NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; }; @@ -3493,9 +3705,9 @@ static int nodeReaderNext(NodeReader *p){ p->aNode = 0; }else{ if( bFirst==0 ){ - p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); } - p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); if( rc==SQLITE_OK ){ @@ -3503,7 +3715,7 @@ static int nodeReaderNext(NodeReader *p){ p->term.n = nPrefix+nSuffix; p->iOff += nSuffix; if( p->iChild==0 ){ - p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); p->aDoclist = &p->aNode[p->iOff]; p->iOff += p->nDoclist; } @@ -3763,8 +3975,8 @@ static int fts3IncrmergeAppend( nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; } + pWriter->nLeafData += nSpace; blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); - if( rc==SQLITE_OK ){ if( pLeaf->block.n==0 ){ pLeaf->block.n = 1; @@ -3863,6 +4075,7 @@ static void fts3IncrmergeRelease( pWriter->iStart, /* start_block */ pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ pWriter->iEnd, /* end_block */ + (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ pRoot->block.a, pRoot->block.n /* root */ ); } @@ -3964,7 +4177,11 @@ static int fts3IncrmergeLoad( if( sqlite3_step(pSelect)==SQLITE_ROW ){ iStart = sqlite3_column_int64(pSelect, 1); iLeafEnd = sqlite3_column_int64(pSelect, 2); - iEnd = sqlite3_column_int64(pSelect, 3); + fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); + if( pWriter->nLeafData<0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + pWriter->bNoLeafData = (pWriter->nLeafData==0); nRoot = sqlite3_column_bytes(pSelect, 4); aRoot = sqlite3_column_blob(pSelect, 4); }else{ @@ -4555,7 +4772,7 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ pHint->n = i; i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); - i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput); + i += fts3GetVarint32(&pHint->a[i], pnInput); if( i!=nHint ) return SQLITE_CORRUPT_VTAB; return SQLITE_OK; @@ -4565,11 +4782,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ /* ** Attempt an incremental merge that writes nMerge leaf blocks. ** -** Incremental merges happen nMin segments at a time. The two -** segments to be merged are the nMin oldest segments (the ones with -** the smallest indexes) in the highest level that contains at least -** nMin segments. Multiple merges might occur in an attempt to write the -** quota of nMerge leaf blocks. +** Incremental merges happen nMin segments at a time. The segments +** to be merged are the nMin oldest segments (the ones with the smallest +** values for the _segdir.idx field) in the highest level that contains +** at least nMin segments. Multiple merges might occur in an attempt to +** write the quota of nMerge leaf blocks. */ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ int rc; /* Return code */ @@ -4594,6 +4811,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ int bUseHint = 0; /* True if attempting to append */ + int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ /* Search the %_segdir table for the absolute level with the smallest ** relative level number that contains at least nMin segments, if any. @@ -4647,6 +4865,19 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ ** to start work on some other level. */ memset(pWriter, 0, nAlloc); pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); + assert( bUseHint==1 || bUseHint==0 ); + if( iIdx==0 || (bUseHint && iIdx==1) ){ + int bIgnore = 0; + rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); + if( bIgnore ){ + pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + } + } + } + if( rc==SQLITE_OK ){ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); } @@ -4654,16 +4885,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ - int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ - rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); - if( rc==SQLITE_OK ){ - if( bUseHint && iIdx>0 ){ - const char *zKey = pCsr->zTerm; - int nKey = pCsr->nTerm; - rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); - }else{ - rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); - } + if( bUseHint && iIdx>0 ){ + const char *zKey = pCsr->zTerm; + int nKey = pCsr->nTerm; + rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + }else{ + rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); } if( rc==SQLITE_OK && pWriter->nLeafEst ){ @@ -4685,7 +4912,13 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ } } + if( nSeg!=0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } fts3IncrmergeRelease(p, pWriter, &rc); + if( nSeg==0 && pWriter->bNoLeafData==0 ){ + fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + } } sqlite3Fts3SegReaderFinish(pCsr); @@ -4772,16 +5005,19 @@ static int fts3DoAutoincrmerge( ){ int rc = SQLITE_OK; sqlite3_stmt *pStmt = 0; - p->bAutoincrmerge = fts3Getint(&zParam)!=0; + p->nAutoincrmerge = fts3Getint(&zParam); + if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ + p->nAutoincrmerge = 8; + } if( !p->bHasStat ){ assert( p->bFts4==0 ); sqlite3Fts3CreateStatTable(&rc, p); if( rc ) return rc; } rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); - if( rc ) return rc;; + if( rc ) return rc; sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); - sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge); + sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); sqlite3_step(pStmt); rc = sqlite3_reset(pStmt); return rc; @@ -4938,34 +5174,36 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ int iCol; for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ - const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); - int nText = sqlite3_column_bytes(pStmt, iCol+1); - sqlite3_tokenizer_cursor *pT = 0; - - rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ - - rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); - if( rc==SQLITE_OK ){ - int i; - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, nToken, iLang, 0, iDocid, iCol, iPos - ); - for(i=1; inIndex; i++){ - if( p->aIndex[i].nPrefix<=nToken ){ - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos - ); + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); + int nText = sqlite3_column_bytes(pStmt, iCol+1); + sqlite3_tokenizer_cursor *pT = 0; + + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); + if( rc==SQLITE_OK ){ + int i; + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, nToken, iLang, 0, iDocid, iCol, iPos + ); + for(i=1; inIndex; i++){ + if( p->aIndex[i].nPrefix<=nToken ){ + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos + ); + } } } } + if( pT ) pModule->xClose(pT); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - if( pT ) pModule->xClose(pT); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; } } @@ -5049,6 +5287,9 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ p->nMaxPendingData = atoi(&zVal[11]); rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); + rc = SQLITE_OK; #endif }else{ rc = SQLITE_ERROR; @@ -5108,32 +5349,34 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ iDocid = sqlite3_column_int64(pCsr->pStmt, 0); for(i=0; inColumn && rc==SQLITE_OK; i++){ - const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); - sqlite3_tokenizer_cursor *pTC = 0; - - rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ - - rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - Fts3PhraseToken *pPT = pDef->pToken; - if( (pDef->iCol>=p->nColumn || pDef->iCol==i) - && (pPT->bFirst==0 || iPos==0) - && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) - ){ - fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + if( p->abNotindexed[i]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ + + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) + && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + } } } + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - if( pTC ) pModule->xClose(pTC); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ if( pDef->pList ){ rc = fts3PendingListAppendVarint(&pDef->pList, 0); @@ -5265,6 +5508,10 @@ int sqlite3Fts3UpdateMethod( int nChng = 0; /* Net change in number of documents */ int bInsertDone = 0; + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + assert( p->bHasStat==0 || p->bHasStat==1 ); + assert( p->pSegments==0 ); assert( nArg==1 /* DELETE operations */ @@ -5297,6 +5544,9 @@ int sqlite3Fts3UpdateMethod( aSzIns = &aSzDel[p->nColumn+1]; memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); + rc = fts3Writelock(p); + if( rc!=SQLITE_OK ) goto update_out; + /* If this is an INSERT operation, or an UPDATE that modifies the rowid ** value, then this operation requires constraint handling. ** diff --git a/ext/fts3/tool/fts3view.c b/ext/fts3/tool/fts3view.c index 479ae98..3dc1ba8 100644 --- a/ext/fts3/tool/fts3view.c +++ b/ext/fts3/tool/fts3view.c @@ -376,7 +376,7 @@ static void showSegmentStats(sqlite3 *db, const char *zTab){ sqlite3_finalize(pStmt); nLeaf = nSeg - nIdx; printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n", - pgsz-45, n, n*100.0/nLeaf); + pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0); pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab); mxLevel = 0; @@ -554,7 +554,7 @@ static void decodeSegment( sqlite3_int64 n; sqlite3_int64 iDocsz; int iHeight; - int i = 0; + sqlite3_int64 i = 0; int cnt = 0; char zTerm[1000]; @@ -576,12 +576,12 @@ static void decodeSegment( fprintf(stderr, "term to long\n"); exit(1); } - memcpy(zTerm+iPrefix, aData+i, nTerm); + memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm); zTerm[iPrefix+nTerm] = 0; i += nTerm; if( iHeight==0 ){ i += getVarint(aData+i, &iDocsz); - printf("term: %-25s doclist %7lld bytes offset %d\n", zTerm, iDocsz, i); + printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i); i += iDocsz; }else{ printf("term: %-25s child %lld\n", zTerm, ++iChild); @@ -749,18 +749,19 @@ static void decodeDoclist( */ static void showDoclist(sqlite3 *db, const char *zTab){ const unsigned char *aData; - sqlite3_int64 offset, nData; + sqlite3_int64 offset; + int nData; sqlite3_stmt *pStmt; offset = atoi64(azExtra[1]); - nData = atoi64(azExtra[2]); + nData = atoi(azExtra[2]); pStmt = prepareToGetSegment(db, zTab, azExtra[0]); if( sqlite3_step(pStmt)!=SQLITE_ROW ){ sqlite3_finalize(pStmt); return; } aData = sqlite3_column_blob(pStmt, 0); - printf("Doclist at %s offset %lld of size %lld bytes:\n", + printf("Doclist at %s offset %lld of size %d bytes:\n", azExtra[0], offset, nData); if( findOption("raw", 0, 0)!=0 ){ printBlob(aData+offset, nData); diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 0d58e8a..c3083ee 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -160,7 +160,7 @@ proc print_rd {map} { } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);} - puts "\};" + puts "\}" } proc print_isdiacritic {zFunc map} { @@ -239,7 +239,10 @@ proc an_load_unicodedata_text {zName} { foreach $lField $fields {} set iCode [expr "0x$code"] - set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}] + set bAlnum [expr { + [lsearch {L N} [string range $general_category 0 0]] >= 0 + || $general_category=="Co" + }] if { !$bAlnum } { lappend lRet $iCode } } @@ -295,7 +298,7 @@ proc an_print_range_array {lRange} { ** using this format. */ }] - puts -nonewline " const static unsigned int aEntry\[\] = \{" + puts -nonewline " static const unsigned int aEntry\[\] = \{" set i 0 foreach range $lRange { foreach {iFirst nRange} $range {} @@ -346,7 +349,7 @@ proc print_isalnum {zFunc lRange} { return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; - int iRes; + int iRes = 0; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ @@ -360,7 +363,7 @@ proc print_isalnum {zFunc lRange} { } assert( aEntry[0]=aEntry[iRes] ); - return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } return 1;} puts "\}" @@ -729,7 +732,7 @@ proc print_fileheader {} { */ }] puts "" - puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)" + puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" puts "" puts "#include " -- cgit v1.2.3