From 08119c361d1181b3e8f1abb429236e488a664753 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Tue, 13 Aug 2013 15:42:54 -0400 Subject: Imported Upstream version 2.2.1 --- ext/fts3/fts3.c | 74 +++---- ext/fts3/fts3Int.h | 5 +- ext/fts3/fts3_aux.c | 29 ++- ext/fts3/fts3_expr.c | 335 +++++++++++++++++++++++++++---- ext/fts3/fts3_hash.h | 2 +- ext/fts3/fts3_icu.c | 4 +- ext/fts3/fts3_snippet.c | 39 ++-- ext/fts3/fts3_test.c | 2 +- ext/fts3/fts3_tokenize_vtab.c | 454 ++++++++++++++++++++++++++++++++++++++++++ ext/fts3/fts3_tokenizer.c | 10 +- ext/fts3/fts3_tokenizer.h | 2 +- ext/fts3/fts3_unicode.c | 2 +- ext/fts3/fts3_write.c | 113 ++++++----- 13 files changed, 919 insertions(+), 152 deletions(-) create mode 100644 ext/fts3/fts3_tokenize_vtab.c (limited to 'ext/fts3') diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 58414f6..c00a13f 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1571,7 +1571,7 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ }else{ rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ - /* If no row was found and no error has occured, then the %_content + /* If no row was found and no error has occurred, then the %_content ** table is missing a row that is present in the full-text index. ** The data structures are corrupt. */ rc = FTS_CORRUPT_VTAB; @@ -2811,7 +2811,7 @@ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ } /* -** This function retreives the doclist for the specified term (or term +** This function retrieves the doclist for the specified term (or term ** prefix) from the database. */ static int fts3TermSelect( @@ -2975,14 +2975,12 @@ static int fts3FilterMethod( pCsr->iLangid = 0; if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); + assert( p->base.zErrMsg==0 ); rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, - p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr + p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, + &p->base.zErrMsg ); if( rc!=SQLITE_OK ){ - if( rc==SQLITE_ERROR ){ - static const char *zErr = "malformed MATCH expression: [%s]"; - p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery); - } return rc; } @@ -3562,7 +3560,7 @@ void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); #endif /* -** Initialise the fts3 extension. If this extension is built as part +** Initialize the fts3 extension. If this extension is built as part ** of the sqlite library, then this function is called directly by ** SQLite. If fts3 is built as a dynamically loadable extension, this ** function is called by the sqlite3_extension_init() entry point. @@ -3596,7 +3594,7 @@ int sqlite3Fts3Init(sqlite3 *db){ sqlite3Fts3SimpleTokenizerModule(&pSimple); sqlite3Fts3PorterTokenizerModule(&pPorter); - /* Allocate and initialise the hash-table used to store tokenizers. */ + /* Allocate and initialize the hash-table used to store tokenizers. */ pHash = sqlite3_malloc(sizeof(Fts3Hash)); if( !pHash ){ rc = SQLITE_NOMEM; @@ -3646,9 +3644,13 @@ int sqlite3Fts3Init(sqlite3 *db){ db, "fts4", &fts3Module, (void *)pHash, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3InitTok(db, (void *)pHash); + } return rc; } + /* An error has occurred. Delete the hash table and return the error code. */ assert( rc!=SQLITE_OK ); if( pHash ){ @@ -4743,35 +4745,39 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc(nTmp*2); - if( !aTmp ){ - *pRc = SQLITE_NOMEM; + if( nTmp==0 ){ res = 0; }else{ - char *aPoslist = p->pPhrase->doclist.pList; - int nToken = p->pPhrase->nToken; + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; - for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ - Fts3Phrase *pPhrase = p->pRight->pPhrase; - int nNear = p->nNear; - res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); - } - - aPoslist = pExpr->pRight->pPhrase->doclist.pList; - nToken = pExpr->pRight->pPhrase->nToken; - for(p=pExpr->pLeft; p && res; p=p->pLeft){ - int nNear; - Fts3Phrase *pPhrase; - assert( p->pParent && p->pParent->pLeft==p ); - nNear = p->pParent->nNear; - pPhrase = ( - p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase - ); - res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear; + Fts3Phrase *pPhrase; + assert( p->pParent && p->pParent->pLeft==p ); + nNear = p->pParent->nNear; + pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } } - } - sqlite3_free(aTmp); + sqlite3_free(aTmp); + } } return res; @@ -5191,7 +5197,7 @@ int sqlite3Fts3EvalPhraseStats( ** of the current row. ** ** More specifically, the returned buffer contains 1 varint for each -** occurence of the phrase in the column, stored using the normal (delta+2) +** occurrence of the phrase in the column, stored using the normal (delta+2) ** compression and is terminated by either an 0x01 or 0x00 byte. For example, ** if the requested column contains "a b X c d X X" and the position-list ** for 'X' is requested, the buffer returned may contain: diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 77ca470..b19064c 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -524,7 +524,7 @@ void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); /* fts3_expr.c */ int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, - char **, int, int, int, const char *, int, Fts3Expr ** + char **, int, int, int, const char *, int, Fts3Expr **, char ** ); void sqlite3Fts3ExprFree(Fts3Expr *); #ifdef SQLITE_TEST @@ -549,6 +549,9 @@ int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); +/* fts3_tokenize_vtab.c */ +int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); + /* fts3_unicode2.c (functions generated by parsing unicode text files) */ #ifdef SQLITE_ENABLE_FTS4_UNICODE61 int sqlite3FtsUnicodeFold(int, int); diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c index a2bff2e..9b582fc 100644 --- a/ext/fts3/fts3_aux.c +++ b/ext/fts3/fts3_aux.c @@ -70,17 +70,26 @@ static int fts3auxConnectMethod( UNUSED_PARAMETER(pUnused); - /* The user should specify a single argument - the name of an fts3 table. */ - if( argc!=4 ){ - *pzErr = sqlite3_mprintf( - "wrong number of arguments to fts4aux constructor" - ); - return SQLITE_ERROR; - } + /* The user should invoke this in one of two forms: + ** + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); + ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); + */ + if( argc!=4 && argc!=5 ) goto bad_args; zDb = argv[1]; nDb = (int)strlen(zDb); - zFts3 = argv[3]; + if( argc==5 ){ + if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ + zDb = argv[3]; + nDb = (int)strlen(zDb); + zFts3 = argv[4]; + }else{ + goto bad_args; + } + }else{ + zFts3 = argv[3]; + } nFts3 = (int)strlen(zFts3); rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); @@ -103,6 +112,10 @@ static int fts3auxConnectMethod( *ppVtab = (sqlite3_vtab *)p; return SQLITE_OK; + + bad_args: + *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor"); + return SQLITE_ERROR; } /* diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index a6e3492..c046d7d 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -106,7 +106,7 @@ struct ParseContext { ** This function is equivalent to the standard isspace() function. ** ** The standard isspace() can be awkward to use safely, because although it -** is defined to accept an argument of type int, its behaviour when passed +** is defined to accept an argument of type int, its behavior when passed ** an integer that falls outside of the range of the unsigned char type ** is undefined (and sometimes, "undefined" means segfault). This wrapper ** is defined to accept an argument of type char, and always returns 0 for @@ -185,7 +185,7 @@ static int getNextToken( rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; - int nToken, iStart, iEnd, iPosition; + int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); @@ -300,7 +300,7 @@ static int getNextString( int ii; for(ii=0; rc==SQLITE_OK; ii++){ const char *zByte; - int nByte, iBegin, iEnd, iPos; + int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ Fts3PhraseToken *pToken; @@ -640,8 +640,10 @@ static int fts3ExprParse( } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; + p->pParent = pNot; if( pNotBranch ){ pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; } pNotBranch = pNot; p = pPrev; @@ -729,6 +731,7 @@ static int fts3ExprParse( pIter = pIter->pLeft; } pIter->pLeft = pRet; + pRet->pParent = pIter; pRet = pNotBranch; } } @@ -746,30 +749,184 @@ exprparse_out: } /* -** Parameters z and n contain a pointer to and length of a buffer containing -** an fts3 query expression, respectively. This function attempts to parse the -** query expression and create a tree of Fts3Expr structures representing the -** parsed expression. If successful, *ppExpr is set to point to the head -** of the parsed expression tree and SQLITE_OK is returned. If an error -** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse -** error) is returned and *ppExpr is set to 0. +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth<0 ){ + rc = SQLITE_TOOBIG; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); + } + } + } + return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. ** -** If parameter n is a negative number, then z is assumed to point to a -** nul-terminated string and the length is determined using strlen(). +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. ** -** The first parameter, pTokenizer, is passed the fts3 tokenizer module to -** use to normalize query tokens while parsing the expression. The azCol[] -** array, which is assumed to contain nCol entries, should contain the names -** of each column in the target fts3 table, in order from left to right. -** Column names must be nul-terminated strings. +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ + + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; + }else{ + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } + + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; + + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); + } + + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ + + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; + + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; + } + } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_TOOBIG; + break; + } + + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; + + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; + } + + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + assert( pFree!=0 ); + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } + } + } + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } + } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: ** -** The iDefaultCol parameter should be passed the index of the table column -** that appears on the left-hand-side of the MATCH operator (the default -** column to match against for tokens for which a column name is not explicitly -** specified as part of the query string), or -1 if tokens may by default -** match any table column. +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. */ -int sqlite3Fts3ExprParse( +static int fts3ExprParseUnbalanced( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ int iLangid, /* Language id for tokenizer */ char **azCol, /* Array of column names for fts3 table */ @@ -798,28 +955,116 @@ int sqlite3Fts3ExprParse( n = (int)strlen(z); } rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); /* Check for mismatched parenthesis */ if( rc==SQLITE_OK && sParse.nNest ){ rc = SQLITE_ERROR; + } + + return rc; +} + +/* +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right. +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. +*/ +int sqlite3Fts3ExprParse( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ +){ + static const int MAX_EXPR_DEPTH = 12; + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); + } + } + + if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + *pzErr = sqlite3_mprintf( + "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); + } } return rc; } +/* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} + /* ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. */ -void sqlite3Fts3ExprFree(Fts3Expr *p){ - if( p ){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3ExprFree(p->pLeft); - sqlite3Fts3ExprFree(p->pRight); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } } } @@ -871,6 +1116,9 @@ static int queryTestTokenizer( ** the returned expression text and then freed using sqlite3_free(). */ static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } switch( pExpr->eType ){ case FTSQUERY_PHRASE: { Fts3Phrase *pPhrase = pExpr->pPhrase; @@ -978,10 +1226,21 @@ static void fts3ExprTest( azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); } - rc = sqlite3Fts3ExprParse( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr - ); + if( sqlite3_user_data(context) ){ + char *zDummy = 0; + rc = sqlite3Fts3ExprParse( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy + ); + assert( rc==SQLITE_OK || pExpr==0 ); + sqlite3_free(zDummy); + }else{ + rc = fts3ExprParseUnbalanced( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr + ); + } + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ + sqlite3Fts3ExprFree(pExpr); sqlite3_result_error(context, "Error parsing expression", -1); }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ sqlite3_result_error_nomem(context); @@ -1004,9 +1263,15 @@ exprtest_out: ** with database connection db. */ int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ - return sqlite3_create_function( + int rc = sqlite3_create_function( db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + ); + } + return rc; } #endif diff --git a/ext/fts3/fts3_hash.h b/ext/fts3/fts3_hash.h index 399f515..dc3fcf8 100644 --- a/ext/fts3/fts3_hash.h +++ b/ext/fts3/fts3_hash.h @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** This is the header file for the generic hash-table implemenation +** This is the header file for the generic hash-table implementation ** used in SQLite. We've modified it slightly to serve as a standalone ** hash table implementation for the full-text indexing module. ** diff --git a/ext/fts3/fts3_icu.c b/ext/fts3/fts3_icu.c index 18b7948..52df8c7 100644 --- a/ext/fts3/fts3_icu.c +++ b/ext/fts3/fts3_icu.c @@ -119,7 +119,7 @@ static int icuOpen( nChar = nInput+1; pCsr = (IcuCursor *)sqlite3_malloc( sizeof(IcuCursor) + /* IcuCursor */ - nChar * sizeof(UChar) + /* IcuCursor.aChar[] */ + ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ ); if( !pCsr ){ @@ -127,7 +127,7 @@ static int icuOpen( } memset(pCsr, 0, sizeof(IcuCursor)); pCsr->aChar = (UChar *)&pCsr[1]; - pCsr->aOffset = (int *)&pCsr->aChar[nChar]; + pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; pCsr->aOffset[iOut] = iInput; U8_NEXT(zInput, iInput, nInput, c); diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 6fce3d0..d54a787 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -389,9 +389,9 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ ** is the snippet with the highest score, where scores are calculated ** by adding: ** -** (a) +1 point for each occurence of a matchable phrase in the snippet. +** (a) +1 point for each occurrence of a matchable phrase in the snippet. ** -** (b) +1000 points for the first occurence of each matchable phrase in +** (b) +1000 points for the first occurrence of each matchable phrase in ** the snippet for which the corresponding mCovered bit is not set. ** ** The selected snippet parameters are stored in structure *pFragment before @@ -576,7 +576,7 @@ static int fts3SnippetShift( return rc; } while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ - const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; + const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); } pMod->xClose(pC); @@ -620,8 +620,6 @@ static int fts3SnippetText( int iCol = pFragment->iCol+1; /* Query column to extract text from */ sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ - const char *ZDUMMY; /* Dummy argument used with tokenizer */ - int DUMMY1; /* Dummy argument used with tokenizer */ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); if( zDoc==0 ){ @@ -640,10 +638,23 @@ static int fts3SnippetText( } while( rc==SQLITE_OK ){ - int iBegin; /* Offset in zDoc of start of token */ - int iFin; /* Offset in zDoc of end of token */ - int isHighlight; /* True for highlighted terms */ - + const char *ZDUMMY; /* Dummy argument used with tokenizer */ + int DUMMY1 = -1; /* Dummy argument used with tokenizer */ + int iBegin = 0; /* Offset in zDoc of start of token */ + int iFin = 0; /* Offset in zDoc of end of token */ + int isHighlight = 0; /* True for highlighted terms */ + + /* Variable DUMMY1 is initialized to a negative value above. Elsewhere + ** in the FTS code the variable that the third argument to xNext points to + ** is initialized to zero before the first (*but not necessarily + ** subsequent*) call to xNext(). This is done for a particular application + ** that needs to know whether or not the tokenizer is being used for + ** snippet generation or for some other purpose. + ** + ** Extreme care is required when writing code to depend on this + ** initialization. It is not a documented part of the tokenizer interface. + ** If a tokenizer is used directly by any code outside of FTS, this + ** convention might not be respected. */ rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ){ @@ -1333,8 +1344,6 @@ void sqlite3Fts3Offsets( ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; - const char *ZDUMMY; /* Dummy argument used with xNext() */ - int NDUMMY; /* Dummy argument used with xNext() */ int rc; /* Return Code */ int nToken; /* Number of tokens in query */ int iCol; /* Column currently being processed */ @@ -1367,9 +1376,11 @@ void sqlite3Fts3Offsets( */ for(iCol=0; iColnColumn; iCol++){ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ - int iStart; - int iEnd; - int iCurrent; + const char *ZDUMMY; /* Dummy argument used with xNext() */ + int NDUMMY = 0; /* Dummy argument used with xNext() */ + int iStart = 0; + int iEnd = 0; + int iCurrent = 0; const char *zDoc; int nDoc; diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c index 4da0b8f..75ec6bd 100644 --- a/ext/fts3/fts3_test.c +++ b/ext/fts3/fts3_test.c @@ -267,7 +267,7 @@ static int fts3_near_match_cmd( ** ** Whether or not the arguments are present, this command returns a list of ** two integers - the initial chunksize and threshold when the command is -** invoked. This can be used to restore the default behaviour after running +** invoked. This can be used to restore the default behavior after running ** tests. For example: ** ** # Override incr-load settings for testing: diff --git a/ext/fts3/fts3_tokenize_vtab.c b/ext/fts3/fts3_tokenize_vtab.c new file mode 100644 index 0000000..364852e --- /dev/null +++ b/ext/fts3/fts3_tokenize_vtab.c @@ -0,0 +1,454 @@ +/* +** 2013 Apr 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +** CREATE VIRTUAL TABLE USING fts3tokenize( +** , , ... +** ); +** +** The table created has the following schema: +** +** CREATE TABLE (input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +** input = +** +** The virtual table module tokenizes this , using the FTS3 +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE +** statement and returns one row for each token in the result. With +** fields set as follows: +** +** input: Always set to a copy of +** token: A token from the input. +** start: Byte offset of the token within the input . +** end: Byte offset of the byte immediately following the end of the +** token within the input string. +** pos: Token offset of token within input. +** +*/ +#include "fts3Int.h" +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +#include +#include + +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; + +/* +** Virtual table structure. +*/ +struct Fts3tokTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + const sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer *pTok; +}; + +/* +** Virtual table cursor structure. +*/ +struct Fts3tokCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + char *zInput; /* Input string */ + sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ + int iRowid; /* Current 'rowid' value */ + const char *zToken; /* Current 'token' value */ + int nToken; /* Size of zToken in bytes */ + int iStart; /* Current 'start' value */ + int iEnd; /* Current 'end' value */ + int iPos; /* Current 'pos' value */ +}; + +/* +** Query FTS for the tokenizer implementation named zName. +*/ +static int fts3tokQueryTokenizer( + Fts3Hash *pHash, + const char *zName, + const sqlite3_tokenizer_module **pp, + char **pzErr +){ + sqlite3_tokenizer_module *p; + int nName = (int)strlen(zName); + + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + if( !p ){ + *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + return SQLITE_ERROR; + } + + *pp = p; + return SQLITE_OK; +} + +/* +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. +** +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. +*/ +static int fts3tokDequoteArray( + int argc, /* Number of elements in argv[] */ + const char * const *argv, /* Input array */ + char ***pazDequote /* Output array */ +){ + int rc = SQLITE_OK; /* Return code */ + if( argc==0 ){ + *pazDequote = 0; + }else{ + int i; + int nByte = 0; + char **azDequote; + + for(i=0; ixCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); + } + + if( rc==SQLITE_OK ){ + pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + } + } + + if( rc==SQLITE_OK ){ + memset(pTab, 0, sizeof(Fts3tokTable)); + pTab->pMod = pMod; + pTab->pTok = pTok; + *ppVtab = &pTab->base; + }else{ + if( pTok ){ + pMod->xDestroy(pTok); + } + } + + sqlite3_free(azDequote); + return rc; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + + pTab->pMod->xDestroy(pTab->pTok); + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3tokBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + UNUSED_PARAMETER(pVTab); + + for(i=0; inConstraint; i++){ + if( pInfo->aConstraint[i].usable + && pInfo->aConstraint[i].iColumn==0 + && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + pInfo->idxNum = 1; + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + pInfo->estimatedCost = 1; + return SQLITE_OK; + } + } + + pInfo->idxNum = 0; + assert( pInfo->estimatedCost>1000000.0 ); + + return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3tokCursor *pCsr; + UNUSED_PARAMETER(pVTab); + + pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(Fts3tokCursor)); + + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). +*/ +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ + if( pCsr->pCsr ){ + Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); + pTab->pMod->xClose(pCsr->pCsr); + pCsr->pCsr = 0; + } + sqlite3_free(pCsr->zInput); + pCsr->zInput = 0; + pCsr->zToken = 0; + pCsr->nToken = 0; + pCsr->iStart = 0; + pCsr->iEnd = 0; + pCsr->iPos = 0; + pCsr->iRowid = 0; +} + +/* +** xClose - Close a cursor. +*/ +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + fts3tokResetCursor(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + int rc; /* Return code */ + + pCsr->iRowid++; + rc = pTab->pMod->xNext(pCsr->pCsr, + &pCsr->zToken, &pCsr->nToken, + &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + ); + + if( rc!=SQLITE_OK ){ + fts3tokResetCursor(pCsr); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + + return rc; +} + +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3tokFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_ERROR; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); + + fts3tokResetCursor(pCsr); + if( idxNum==1 ){ + const char *zByte = (const char *)sqlite3_value_text(apVal[0]); + int nByte = sqlite3_value_bytes(apVal[0]); + pCsr->zInput = sqlite3_malloc(nByte+1); + if( pCsr->zInput==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCsr->zInput, zByte, nByte); + pCsr->zInput[nByte] = 0; + rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); + if( rc==SQLITE_OK ){ + pCsr->pCsr->pTokenizer = pTab->pTok; + } + } + } + + if( rc!=SQLITE_OK ) return rc; + return fts3tokNextMethod(pCursor); +} + +/* +** xEof - Return true if the cursor is at EOF, or false otherwise. +*/ +static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + return (pCsr->zToken==0); +} + +/* +** xColumn - Return a column value. +*/ +static int fts3tokColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + /* CREATE TABLE x(input, token, start, end, position) */ + switch( iCol ){ + case 0: + sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); + break; + case 1: + sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_int(pCtx, pCsr->iStart); + break; + case 3: + sqlite3_result_int(pCtx, pCsr->iEnd); + break; + default: + assert( iCol==4 ); + sqlite3_result_int(pCtx, pCsr->iPos); + break; + } + return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + *pRowid = (sqlite3_int64)pCsr->iRowid; + return SQLITE_OK; +} + +/* +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ + static const sqlite3_module fts3tok_module = { + 0, /* iVersion */ + fts3tokConnectMethod, /* xCreate */ + fts3tokConnectMethod, /* xConnect */ + fts3tokBestIndexMethod, /* xBestIndex */ + fts3tokDisconnectMethod, /* xDisconnect */ + fts3tokDisconnectMethod, /* xDestroy */ + fts3tokOpenMethod, /* xOpen */ + fts3tokCloseMethod, /* xClose */ + fts3tokFilterMethod, /* xFilter */ + fts3tokNextMethod, /* xNext */ + fts3tokEofMethod, /* xEof */ + fts3tokColumnMethod, /* xColumn */ + fts3tokRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); + return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ diff --git a/ext/fts3/fts3_tokenizer.c b/ext/fts3/fts3_tokenizer.c index 4a7a175..04f8446 100644 --- a/ext/fts3/fts3_tokenizer.c +++ b/ext/fts3/fts3_tokenizer.c @@ -251,10 +251,10 @@ static void testFunc( const char *azArg[64]; const char *zToken; - int nToken; - int iStart; - int iEnd; - int iPos; + int nToken = 0; + int iStart = 0; + int iEnd = 0; + int iPos = 0; int i; Tcl_Obj *pRet; @@ -428,7 +428,7 @@ static void intTestFunc( /* ** Set up SQL objects in database db used to access the contents of ** the hash table pointed to by argument pHash. The hash table must -** been initialised to use string keys, and to take a private copy +** been initialized to use string keys, and to take a private copy ** of the key when a value is inserted. i.e. by a call similar to: ** ** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); diff --git a/ext/fts3/fts3_tokenizer.h b/ext/fts3/fts3_tokenizer.h index c91c7ed..4a40b2b 100644 --- a/ext/fts3/fts3_tokenizer.h +++ b/ext/fts3/fts3_tokenizer.h @@ -70,7 +70,7 @@ struct sqlite3_tokenizer_module { ** This method should return either SQLITE_OK (0), or an SQLite error ** code. If SQLITE_OK is returned, then *ppTokenizer should be set ** to point at the newly created tokenizer structure. The generic - ** sqlite3_tokenizer.pModule variable should not be initialised by + ** sqlite3_tokenizer.pModule variable should not be initialized by ** this callback. The caller will do so. */ int (*xCreate)( diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index 79941ed..188358e 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -125,7 +125,7 @@ static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ ** ** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() ** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. -** It is not possible to change the behaviour of the tokenizer with respect +** It is not possible to change the behavior of the tokenizer with respect ** to these codepoints. */ static int unicodeAddExceptions( diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index c9f1743..269d1dd 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -776,16 +776,16 @@ static int fts3PendingTermsAdd( int iLangid, /* Language id to use */ const char *zText, /* Text of document to be inserted */ int iCol, /* Column into which text is being inserted */ - u32 *pnWord /* OUT: Number of tokens inserted */ + u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ ){ int rc; - int iStart; - int iEnd; - int iPos; + int iStart = 0; + int iEnd = 0; + int iPos = 0; int nWord = 0; char const *zToken; - int nToken; + int nToken = 0; sqlite3_tokenizer *pTokenizer = p->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; @@ -840,7 +840,7 @@ static int fts3PendingTermsAdd( } pModule->xClose(pCsr); - *pnWord = nWord; + *pnWord += nWord; return (rc==SQLITE_DONE ? SQLITE_OK : rc); } @@ -1044,11 +1044,13 @@ static void fts3DeleteTerms( int *pRC, /* Result code */ Fts3Table *p, /* The FTS table to delete from */ sqlite3_value *pRowid, /* The docid to be deleted */ - u32 *aSz /* Sizes of deleted document written here */ + u32 *aSz, /* Sizes of deleted document written here */ + int *pbFound /* OUT: Set to true if row really does exist */ ){ int rc; sqlite3_stmt *pSelect; + assert( *pbFound==0 ); if( *pRC ) return; rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); if( rc==SQLITE_OK ){ @@ -1066,6 +1068,7 @@ static void fts3DeleteTerms( *pRC = rc; return; } + *pbFound = 1; } rc = sqlite3_reset(pSelect); }else{ @@ -1479,6 +1482,7 @@ static int fts3SegReaderNextDocid( *pnOffsetList = (int)(p - pReader->pOffsetList - 1); } + /* List may have been edited in place by fts3EvalNearTrim() */ while( papSegment, nMerge, j, xCmp); + if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pMsr, pList, nList+1); + if( rc!=SQLITE_OK ) return rc; + assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); + pList = pMsr->aBuffer; + } + if( pMsr->iColFilter>=0 ){ - fts3ColumnFilter(pMsr->iColFilter, &pList, &nList); + fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); } if( nList>0 ){ - if( fts3SegReaderIsPending(apSegment[0]) ){ - rc = fts3MsrBufferData(pMsr, pList, nList+1); - if( rc!=SQLITE_OK ) return rc; - *paPoslist = pMsr->aBuffer; - assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); - }else{ - *paPoslist = pList; - } + *paPoslist = pList; *piDocid = iDocid; *pnPoslist = nList; break; @@ -2853,7 +2864,7 @@ int sqlite3Fts3SegReaderStep( } if( isColFilter ){ - fts3ColumnFilter(pFilter->iCol, &pList, &nList); + fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); } if( !isIgnoreEmpty || nList>0 ){ @@ -3290,7 +3301,7 @@ static int fts3DoRebuild(Fts3Table *p){ int iCol; int iLangid = langidFromSelect(p, pStmt); rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); - aSz[p->nColumn] = 0; + memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); for(iCol=0; rc==SQLITE_OK && iColnColumn; iCol++){ const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); @@ -4934,9 +4945,9 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT); while( rc==SQLITE_OK ){ char const *zToken; /* Buffer containing token */ - int nToken; /* Number of bytes in token */ - int iDum1, iDum2; /* Dummy variables */ - int iPos; /* Position of token in zText */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); if( rc==SQLITE_OK ){ @@ -5103,9 +5114,9 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); while( rc==SQLITE_OK ){ char const *zToken; /* Buffer containing token */ - int nToken; /* Number of bytes in token */ - int iDum1, iDum2; /* Dummy variables */ - int iPos; /* Position of token in zText */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ @@ -5194,28 +5205,32 @@ int sqlite3Fts3DeferToken( static int fts3DeleteByRowid( Fts3Table *p, sqlite3_value *pRowid, - int *pnDoc, + int *pnChng, /* IN/OUT: Decrement if row is deleted */ u32 *aSzDel ){ - int isEmpty = 0; - int rc = fts3IsEmpty(p, pRowid, &isEmpty); - if( rc==SQLITE_OK ){ - if( isEmpty ){ - /* Deleting this row means the whole table is empty. In this case - ** delete the contents of all three tables and throw away any - ** data in the pendingTerms hash table. */ - rc = fts3DeleteAll(p, 1); - *pnDoc = *pnDoc - 1; - }else{ - fts3DeleteTerms(&rc, p, pRowid, aSzDel); - if( p->zContentTbl==0 ){ - fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); - if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1; + int rc = SQLITE_OK; /* Return code */ + int bFound = 0; /* True if *pRowid really is in the table */ + + fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); + if( bFound && rc==SQLITE_OK ){ + int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ + rc = fts3IsEmpty(p, pRowid, &isEmpty); + if( rc==SQLITE_OK ){ + if( isEmpty ){ + /* Deleting this row means the whole table is empty. In this case + ** delete the contents of all three tables and throw away any + ** data in the pendingTerms hash table. */ + rc = fts3DeleteAll(p, 1); + *pnChng = 0; + memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); }else{ - *pnDoc = *pnDoc - 1; - } - if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); + *pnChng = *pnChng - 1; + if( p->zContentTbl==0 ){ + fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); + } + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); + } } } } @@ -5246,7 +5261,7 @@ int sqlite3Fts3UpdateMethod( int rc = SQLITE_OK; /* Return Code */ int isRemove = 0; /* True for an UPDATE or DELETE */ u32 *aSzIns = 0; /* Sizes of inserted documents */ - u32 *aSzDel; /* Sizes of deleted documents */ + u32 *aSzDel = 0; /* Sizes of deleted documents */ int nChng = 0; /* Net change in number of documents */ int bInsertDone = 0; @@ -5274,13 +5289,13 @@ int sqlite3Fts3UpdateMethod( } /* Allocate space to hold the change in document sizes */ - aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 ); - if( aSzIns==0 ){ + aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); + if( aSzDel==0 ){ rc = SQLITE_NOMEM; goto update_out; } - aSzDel = &aSzIns[p->nColumn+1]; - memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2); + aSzIns = &aSzDel[p->nColumn+1]; + memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); /* If this is an INSERT operation, or an UPDATE that modifies the rowid ** value, then this operation requires constraint handling. @@ -5365,7 +5380,7 @@ int sqlite3Fts3UpdateMethod( } update_out: - sqlite3_free(aSzIns); + sqlite3_free(aSzDel); sqlite3Fts3SegmentsClose(p); return rc; } -- cgit v1.2.3