summaryrefslogtreecommitdiff
path: root/ext/fts3
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts3')
-rw-r--r--ext/fts3/fts3.c74
-rw-r--r--ext/fts3/fts3Int.h5
-rw-r--r--ext/fts3/fts3_aux.c29
-rw-r--r--ext/fts3/fts3_expr.c335
-rw-r--r--ext/fts3/fts3_hash.h2
-rw-r--r--ext/fts3/fts3_icu.c4
-rw-r--r--ext/fts3/fts3_snippet.c39
-rw-r--r--ext/fts3/fts3_test.c2
-rw-r--r--ext/fts3/fts3_tokenize_vtab.c454
-rw-r--r--ext/fts3/fts3_tokenizer.c10
-rw-r--r--ext/fts3/fts3_tokenizer.h2
-rw-r--r--ext/fts3/fts3_unicode.c2
-rw-r--r--ext/fts3/fts3_write.c113
13 files changed, 919 insertions, 152 deletions
diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c
index 58414f6..c00a13f 100644
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -1571,7 +1571,7 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
}else{
rc = sqlite3_reset(pCsr->pStmt);
if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
- /* If no row was found and no error has occured, then the %_content
+ /* If no row was found and no error has occurred, then the %_content
** table is missing a row that is present in the full-text index.
** The data structures are corrupt. */
rc = FTS_CORRUPT_VTAB;
@@ -2811,7 +2811,7 @@ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
}
/*
-** This function retreives the doclist for the specified term (or term
+** This function retrieves the doclist for the specified term (or term
** prefix) from the database.
*/
static int fts3TermSelect(
@@ -2975,14 +2975,12 @@ static int fts3FilterMethod(
pCsr->iLangid = 0;
if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+ assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
- p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
+ p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,
+ &p->base.zErrMsg
);
if( rc!=SQLITE_OK ){
- if( rc==SQLITE_ERROR ){
- static const char *zErr = "malformed MATCH expression: [%s]";
- p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
- }
return rc;
}
@@ -3562,7 +3560,7 @@ void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif
/*
-** Initialise the fts3 extension. If this extension is built as part
+** Initialize the fts3 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
** SQLite. If fts3 is built as a dynamically loadable extension, this
** function is called by the sqlite3_extension_init() entry point.
@@ -3596,7 +3594,7 @@ int sqlite3Fts3Init(sqlite3 *db){
sqlite3Fts3SimpleTokenizerModule(&pSimple);
sqlite3Fts3PorterTokenizerModule(&pPorter);
- /* Allocate and initialise the hash-table used to store tokenizers. */
+ /* Allocate and initialize the hash-table used to store tokenizers. */
pHash = sqlite3_malloc(sizeof(Fts3Hash));
if( !pHash ){
rc = SQLITE_NOMEM;
@@ -3646,9 +3644,13 @@ int sqlite3Fts3Init(sqlite3 *db){
db, "fts4", &fts3Module, (void *)pHash, 0
);
}
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3InitTok(db, (void *)pHash);
+ }
return rc;
}
+
/* An error has occurred. Delete the hash table and return the error code. */
assert( rc!=SQLITE_OK );
if( pHash ){
@@ -4743,35 +4745,39 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
nTmp += p->pRight->pPhrase->doclist.nList;
}
nTmp += p->pPhrase->doclist.nList;
- aTmp = sqlite3_malloc(nTmp*2);
- if( !aTmp ){
- *pRc = SQLITE_NOMEM;
+ if( nTmp==0 ){
res = 0;
}else{
- char *aPoslist = p->pPhrase->doclist.pList;
- int nToken = p->pPhrase->nToken;
+ aTmp = sqlite3_malloc(nTmp*2);
+ if( !aTmp ){
+ *pRc = SQLITE_NOMEM;
+ res = 0;
+ }else{
+ char *aPoslist = p->pPhrase->doclist.pList;
+ int nToken = p->pPhrase->nToken;
- for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
- Fts3Phrase *pPhrase = p->pRight->pPhrase;
- int nNear = p->nNear;
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
-
- aPoslist = pExpr->pRight->pPhrase->doclist.pList;
- nToken = pExpr->pRight->pPhrase->nToken;
- for(p=pExpr->pLeft; p && res; p=p->pLeft){
- int nNear;
- Fts3Phrase *pPhrase;
- assert( p->pParent && p->pParent->pLeft==p );
- nNear = p->pParent->nNear;
- pPhrase = (
- p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
- );
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
+ Fts3Phrase *pPhrase = p->pRight->pPhrase;
+ int nNear = p->nNear;
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
+
+ aPoslist = pExpr->pRight->pPhrase->doclist.pList;
+ nToken = pExpr->pRight->pPhrase->nToken;
+ for(p=pExpr->pLeft; p && res; p=p->pLeft){
+ int nNear;
+ Fts3Phrase *pPhrase;
+ assert( p->pParent && p->pParent->pLeft==p );
+ nNear = p->pParent->nNear;
+ pPhrase = (
+ p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
+ );
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
}
- }
- sqlite3_free(aTmp);
+ sqlite3_free(aTmp);
+ }
}
return res;
@@ -5191,7 +5197,7 @@ int sqlite3Fts3EvalPhraseStats(
** of the current row.
**
** More specifically, the returned buffer contains 1 varint for each
-** occurence of the phrase in the column, stored using the normal (delta+2)
+** occurrence of the phrase in the column, stored using the normal (delta+2)
** compression and is terminated by either an 0x01 or 0x00 byte. For example,
** if the requested column contains "a b X c d X X" and the position-list
** for 'X' is requested, the buffer returned may contain:
diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h
index 77ca470..b19064c 100644
--- a/ext/fts3/fts3Int.h
+++ b/ext/fts3/fts3Int.h
@@ -524,7 +524,7 @@ void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
- char **, int, int, int, const char *, int, Fts3Expr **
+ char **, int, int, int, const char *, int, Fts3Expr **, char **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
@@ -549,6 +549,9 @@ int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
+/* fts3_tokenize_vtab.c */
+int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
+
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
int sqlite3FtsUnicodeFold(int, int);
diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c
index a2bff2e..9b582fc 100644
--- a/ext/fts3/fts3_aux.c
+++ b/ext/fts3/fts3_aux.c
@@ -70,17 +70,26 @@ static int fts3auxConnectMethod(
UNUSED_PARAMETER(pUnused);
- /* The user should specify a single argument - the name of an fts3 table. */
- if( argc!=4 ){
- *pzErr = sqlite3_mprintf(
- "wrong number of arguments to fts4aux constructor"
- );
- return SQLITE_ERROR;
- }
+ /* The user should invoke this in one of two forms:
+ **
+ ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
+ ** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
+ */
+ if( argc!=4 && argc!=5 ) goto bad_args;
zDb = argv[1];
nDb = (int)strlen(zDb);
- zFts3 = argv[3];
+ if( argc==5 ){
+ if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
+ zDb = argv[3];
+ nDb = (int)strlen(zDb);
+ zFts3 = argv[4];
+ }else{
+ goto bad_args;
+ }
+ }else{
+ zFts3 = argv[3];
+ }
nFts3 = (int)strlen(zFts3);
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
@@ -103,6 +112,10 @@ static int fts3auxConnectMethod(
*ppVtab = (sqlite3_vtab *)p;
return SQLITE_OK;
+
+ bad_args:
+ *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor");
+ return SQLITE_ERROR;
}
/*
diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c
index a6e3492..c046d7d 100644
--- a/ext/fts3/fts3_expr.c
+++ b/ext/fts3/fts3_expr.c
@@ -106,7 +106,7 @@ struct ParseContext {
** This function is equivalent to the standard isspace() function.
**
** The standard isspace() can be awkward to use safely, because although it
-** is defined to accept an argument of type int, its behaviour when passed
+** is defined to accept an argument of type int, its behavior when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
@@ -185,7 +185,7 @@ static int getNextToken(
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
- int nToken, iStart, iEnd, iPosition;
+ int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
int nByte; /* total space to allocate */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
@@ -300,7 +300,7 @@ static int getNextString(
int ii;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
- int nByte, iBegin, iEnd, iPos;
+ int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
Fts3PhraseToken *pToken;
@@ -640,8 +640,10 @@ static int fts3ExprParse(
}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
+ p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
+ pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
@@ -729,6 +731,7 @@ static int fts3ExprParse(
pIter = pIter->pLeft;
}
pIter->pLeft = pRet;
+ pRet->pParent = pIter;
pRet = pNotBranch;
}
}
@@ -746,30 +749,184 @@ exprparse_out:
}
/*
-** Parameters z and n contain a pointer to and length of a buffer containing
-** an fts3 query expression, respectively. This function attempts to parse the
-** query expression and create a tree of Fts3Expr structures representing the
-** parsed expression. If successful, *ppExpr is set to point to the head
-** of the parsed expression tree and SQLITE_OK is returned. If an error
-** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
-** error) is returned and *ppExpr is set to 0.
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed
+** as the only argument is more than nMaxDepth.
+*/
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
+ int rc = SQLITE_OK;
+ if( p ){
+ if( nMaxDepth<0 ){
+ rc = SQLITE_TOOBIG;
+ }else{
+ rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
+ }
+ }
+ }
+ return rc;
+}
+
+/*
+** This function attempts to transform the expression tree at (*pp) to
+** an equivalent but more balanced form. The tree is modified in place.
+** If successful, SQLITE_OK is returned and (*pp) set to point to the
+** new root expression node.
**
-** If parameter n is a negative number, then z is assumed to point to a
-** nul-terminated string and the length is determined using strlen().
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
**
-** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
-** use to normalize query tokens while parsing the expression. The azCol[]
-** array, which is assumed to contain nCol entries, should contain the names
-** of each column in the target fts3 table, in order from left to right.
-** Column names must be nul-terminated strings.
+** Otherwise, if an error occurs, an SQLite error code is returned and
+** expression (*pp) freed.
+*/
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
+ int rc = SQLITE_OK; /* Return code */
+ Fts3Expr *pRoot = *pp; /* Initial root node */
+ Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
+ int eType = pRoot->eType; /* Type of node in this tree */
+
+ if( nMaxDepth==0 ){
+ rc = SQLITE_ERROR;
+ }
+
+ if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
+ Fts3Expr **apLeaf;
+ apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
+ if( 0==apLeaf ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
+ }
+
+ if( rc==SQLITE_OK ){
+ int i;
+ Fts3Expr *p;
+
+ /* Set $p to point to the left-most leaf in the tree of eType nodes. */
+ for(p=pRoot; p->eType==eType; p=p->pLeft){
+ assert( p->pParent==0 || p->pParent->pLeft==p );
+ assert( p->pLeft && p->pRight );
+ }
+
+ /* This loop runs once for each leaf in the tree of eType nodes. */
+ while( 1 ){
+ int iLvl;
+ Fts3Expr *pParent = p->pParent; /* Current parent of p */
+
+ assert( pParent==0 || pParent->pLeft==p );
+ p->pParent = 0;
+ if( pParent ){
+ pParent->pLeft = 0;
+ }else{
+ pRoot = 0;
+ }
+ rc = fts3ExprBalance(&p, nMaxDepth-1);
+ if( rc!=SQLITE_OK ) break;
+
+ for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
+ if( apLeaf[iLvl]==0 ){
+ apLeaf[iLvl] = p;
+ p = 0;
+ }else{
+ assert( pFree );
+ pFree->pLeft = apLeaf[iLvl];
+ pFree->pRight = p;
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ apLeaf[iLvl] = 0;
+ }
+ }
+ if( p ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_TOOBIG;
+ break;
+ }
+
+ /* If that was the last leaf node, break out of the loop */
+ if( pParent==0 ) break;
+
+ /* Set $p to point to the next leaf in the tree of eType nodes */
+ for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
+
+ /* Remove pParent from the original tree. */
+ assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
+ pParent->pRight->pParent = pParent->pParent;
+ if( pParent->pParent ){
+ pParent->pParent->pLeft = pParent->pRight;
+ }else{
+ assert( pParent==pRoot );
+ pRoot = pParent->pRight;
+ }
+
+ /* Link pParent into the free node list. It will be used as an
+ ** internal node of the new tree. */
+ pParent->pParent = pFree;
+ pFree = pParent;
+ }
+
+ if( rc==SQLITE_OK ){
+ p = 0;
+ for(i=0; i<nMaxDepth; i++){
+ if( apLeaf[i] ){
+ if( p==0 ){
+ p = apLeaf[i];
+ p->pParent = 0;
+ }else{
+ assert( pFree!=0 );
+ pFree->pRight = p;
+ pFree->pLeft = apLeaf[i];
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ }
+ }
+ }
+ pRoot = p;
+ }else{
+ /* An error occurred. Delete the contents of the apLeaf[] array
+ ** and pFree list. Everything else is cleaned up by the call to
+ ** sqlite3Fts3ExprFree(pRoot) below. */
+ Fts3Expr *pDel;
+ for(i=0; i<nMaxDepth; i++){
+ sqlite3Fts3ExprFree(apLeaf[i]);
+ }
+ while( (pDel=pFree)!=0 ){
+ pFree = pDel->pParent;
+ sqlite3_free(pDel);
+ }
+ }
+
+ assert( pFree==0 );
+ sqlite3_free( apLeaf );
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts3ExprFree(pRoot);
+ pRoot = 0;
+ }
+ *pp = pRoot;
+ return rc;
+}
+
+/*
+** This function is similar to sqlite3Fts3ExprParse(), with the following
+** differences:
**
-** The iDefaultCol parameter should be passed the index of the table column
-** that appears on the left-hand-side of the MATCH operator (the default
-** column to match against for tokens for which a column name is not explicitly
-** specified as part of the query string), or -1 if tokens may by default
-** match any table column.
+** 1. It does not do expression rebalancing.
+** 2. It does not check that the expression does not exceed the
+** maximum allowable depth.
+** 3. Even if it fails, *ppExpr may still be set to point to an
+** expression tree. It should be deleted using sqlite3Fts3ExprFree()
+** in this case.
*/
-int sqlite3Fts3ExprParse(
+static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
@@ -798,28 +955,116 @@ int sqlite3Fts3ExprParse(
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+ assert( rc==SQLITE_OK || *ppExpr==0 );
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right.
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+int sqlite3Fts3ExprParse(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
+ int iLangid, /* Language id for tokenizer */
+ char **azCol, /* Array of column names for fts3 table */
+ int bFts4, /* True to allow FTS4-only syntax */
+ int nCol, /* Number of entries in azCol[] */
+ int iDefaultCol, /* Default column to query */
+ const char *z, int n, /* Text of MATCH query */
+ Fts3Expr **ppExpr, /* OUT: Parsed query structure */
+ char **pzErr /* OUT: Error message (sqlite3_malloc) */
+){
+ static const int MAX_EXPR_DEPTH = 12;
+ int rc = fts3ExprParseUnbalanced(
+ pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
+ );
+
+ /* Rebalance the expression. And check that its depth does not exceed
+ ** MAX_EXPR_DEPTH. */
+ if( rc==SQLITE_OK && *ppExpr ){
+ rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
+ if( rc==SQLITE_TOOBIG ){
+ *pzErr = sqlite3_mprintf(
+ "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH
+ );
+ rc = SQLITE_ERROR;
+ }else if( rc==SQLITE_ERROR ){
+ *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
+ }
}
return rc;
}
/*
+** Free a single node of an expression tree.
+*/
+static void fts3FreeExprNode(Fts3Expr *p){
+ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
+ sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
+ sqlite3_free(p->aMI);
+ sqlite3_free(p);
+}
+
+/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+**
+** This function would be simpler if it recursively called itself. But
+** that would mean passing a sufficiently large expression to ExprParse()
+** could cause a stack overflow.
*/
-void sqlite3Fts3ExprFree(Fts3Expr *p){
- if( p ){
- assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
- sqlite3Fts3ExprFree(p->pLeft);
- sqlite3Fts3ExprFree(p->pRight);
- sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
- sqlite3_free(p->aMI);
- sqlite3_free(p);
+void sqlite3Fts3ExprFree(Fts3Expr *pDel){
+ Fts3Expr *p;
+ assert( pDel==0 || pDel->pParent==0 );
+ for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
+ assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
+ }
+ while( p ){
+ Fts3Expr *pParent = p->pParent;
+ fts3FreeExprNode(p);
+ if( pParent && p==pParent->pLeft && pParent->pRight ){
+ p = pParent->pRight;
+ while( p && (p->pLeft || p->pRight) ){
+ assert( p==p->pParent->pRight || p==p->pParent->pLeft );
+ p = (p->pLeft ? p->pLeft : p->pRight);
+ }
+ }else{
+ p = pParent;
+ }
}
}
@@ -871,6 +1116,9 @@ static int queryTestTokenizer(
** the returned expression text and then freed using sqlite3_free().
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
+ if( pExpr==0 ){
+ return sqlite3_mprintf("");
+ }
switch( pExpr->eType ){
case FTSQUERY_PHRASE: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
@@ -978,10 +1226,21 @@ static void fts3ExprTest(
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
}
- rc = sqlite3Fts3ExprParse(
- pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
- );
+ if( sqlite3_user_data(context) ){
+ char *zDummy = 0;
+ rc = sqlite3Fts3ExprParse(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
+ );
+ assert( rc==SQLITE_OK || pExpr==0 );
+ sqlite3_free(zDummy);
+ }else{
+ rc = fts3ExprParseUnbalanced(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+ );
+ }
+
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
+ sqlite3Fts3ExprFree(pExpr);
sqlite3_result_error(context, "Error parsing expression", -1);
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
sqlite3_result_error_nomem(context);
@@ -1004,9 +1263,15 @@ exprtest_out:
** with database connection db.
*/
int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
- return sqlite3_create_function(
+ int rc = sqlite3_create_function(
db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
+ -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
+ );
+ }
+ return rc;
}
#endif
diff --git a/ext/fts3/fts3_hash.h b/ext/fts3/fts3_hash.h
index 399f515..dc3fcf8 100644
--- a/ext/fts3/fts3_hash.h
+++ b/ext/fts3/fts3_hash.h
@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give.
**
*************************************************************************
-** This is the header file for the generic hash-table implemenation
+** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
diff --git a/ext/fts3/fts3_icu.c b/ext/fts3/fts3_icu.c
index 18b7948..52df8c7 100644
--- a/ext/fts3/fts3_icu.c
+++ b/ext/fts3/fts3_icu.c
@@ -119,7 +119,7 @@ static int icuOpen(
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
- nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
+ ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
@@ -127,7 +127,7 @@ static int icuOpen(
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
- pCsr->aOffset = (int *)&pCsr->aChar[nChar];
+ pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c
index 6fce3d0..d54a787 100644
--- a/ext/fts3/fts3_snippet.c
+++ b/ext/fts3/fts3_snippet.c
@@ -389,9 +389,9 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
** is the snippet with the highest score, where scores are calculated
** by adding:
**
-** (a) +1 point for each occurence of a matchable phrase in the snippet.
+** (a) +1 point for each occurrence of a matchable phrase in the snippet.
**
-** (b) +1000 points for the first occurence of each matchable phrase in
+** (b) +1000 points for the first occurrence of each matchable phrase in
** the snippet for which the corresponding mCovered bit is not set.
**
** The selected snippet parameters are stored in structure *pFragment before
@@ -576,7 +576,7 @@ static int fts3SnippetShift(
return rc;
}
while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
- const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
+ const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
}
pMod->xClose(pC);
@@ -620,8 +620,6 @@ static int fts3SnippetText(
int iCol = pFragment->iCol+1; /* Query column to extract text from */
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
- const char *ZDUMMY; /* Dummy argument used with tokenizer */
- int DUMMY1; /* Dummy argument used with tokenizer */
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
if( zDoc==0 ){
@@ -640,10 +638,23 @@ static int fts3SnippetText(
}
while( rc==SQLITE_OK ){
- int iBegin; /* Offset in zDoc of start of token */
- int iFin; /* Offset in zDoc of end of token */
- int isHighlight; /* True for highlighted terms */
-
+ const char *ZDUMMY; /* Dummy argument used with tokenizer */
+ int DUMMY1 = -1; /* Dummy argument used with tokenizer */
+ int iBegin = 0; /* Offset in zDoc of start of token */
+ int iFin = 0; /* Offset in zDoc of end of token */
+ int isHighlight = 0; /* True for highlighted terms */
+
+ /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
+ ** in the FTS code the variable that the third argument to xNext points to
+ ** is initialized to zero before the first (*but not necessarily
+ ** subsequent*) call to xNext(). This is done for a particular application
+ ** that needs to know whether or not the tokenizer is being used for
+ ** snippet generation or for some other purpose.
+ **
+ ** Extreme care is required when writing code to depend on this
+ ** initialization. It is not a documented part of the tokenizer interface.
+ ** If a tokenizer is used directly by any code outside of FTS, this
+ ** convention might not be respected. */
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
@@ -1333,8 +1344,6 @@ void sqlite3Fts3Offsets(
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
- const char *ZDUMMY; /* Dummy argument used with xNext() */
- int NDUMMY; /* Dummy argument used with xNext() */
int rc; /* Return Code */
int nToken; /* Number of tokens in query */
int iCol; /* Column currently being processed */
@@ -1367,9 +1376,11 @@ void sqlite3Fts3Offsets(
*/
for(iCol=0; iCol<pTab->nColumn; iCol++){
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
- int iStart;
- int iEnd;
- int iCurrent;
+ const char *ZDUMMY; /* Dummy argument used with xNext() */
+ int NDUMMY = 0; /* Dummy argument used with xNext() */
+ int iStart = 0;
+ int iEnd = 0;
+ int iCurrent = 0;
const char *zDoc;
int nDoc;
diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c
index 4da0b8f..75ec6bd 100644
--- a/ext/fts3/fts3_test.c
+++ b/ext/fts3/fts3_test.c
@@ -267,7 +267,7 @@ static int fts3_near_match_cmd(
**
** Whether or not the arguments are present, this command returns a list of
** two integers - the initial chunksize and threshold when the command is
-** invoked. This can be used to restore the default behaviour after running
+** invoked. This can be used to restore the default behavior after running
** tests. For example:
**
** # Override incr-load settings for testing:
diff --git a/ext/fts3/fts3_tokenize_vtab.c b/ext/fts3/fts3_tokenize_vtab.c
new file mode 100644
index 0000000..364852e
--- /dev/null
+++ b/ext/fts3/fts3_tokenize_vtab.c
@@ -0,0 +1,454 @@
+/*
+** 2013 Apr 22
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code for the "fts3tokenize" virtual table module.
+** An fts3tokenize virtual table is created as follows:
+**
+** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
+** <tokenizer-name>, <arg-1>, ...
+** );
+**
+** The table created has the following schema:
+**
+** CREATE TABLE <tbl>(input, token, start, end, position)
+**
+** When queried, the query must include a WHERE clause of type:
+**
+** input = <string>
+**
+** The virtual table module tokenizes this <string>, using the FTS3
+** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
+** statement and returns one row for each token in the result. With
+** fields set as follows:
+**
+** input: Always set to a copy of <string>
+** token: A token from the input.
+** start: Byte offset of the token within the input <string>.
+** end: Byte offset of the byte immediately following the end of the
+** token within the input string.
+** pos: Token offset of token within input.
+**
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <string.h>
+#include <assert.h>
+
+typedef struct Fts3tokTable Fts3tokTable;
+typedef struct Fts3tokCursor Fts3tokCursor;
+
+/*
+** Virtual table structure.
+*/
+struct Fts3tokTable {
+ sqlite3_vtab base; /* Base class used by SQLite core */
+ const sqlite3_tokenizer_module *pMod;
+ sqlite3_tokenizer *pTok;
+};
+
+/*
+** Virtual table cursor structure.
+*/
+struct Fts3tokCursor {
+ sqlite3_vtab_cursor base; /* Base class used by SQLite core */
+ char *zInput; /* Input string */
+ sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
+ int iRowid; /* Current 'rowid' value */
+ const char *zToken; /* Current 'token' value */
+ int nToken; /* Size of zToken in bytes */
+ int iStart; /* Current 'start' value */
+ int iEnd; /* Current 'end' value */
+ int iPos; /* Current 'pos' value */
+};
+
+/*
+** Query FTS for the tokenizer implementation named zName.
+*/
+static int fts3tokQueryTokenizer(
+ Fts3Hash *pHash,
+ const char *zName,
+ const sqlite3_tokenizer_module **pp,
+ char **pzErr
+){
+ sqlite3_tokenizer_module *p;
+ int nName = (int)strlen(zName);
+
+ p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+ if( !p ){
+ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+ return SQLITE_ERROR;
+ }
+
+ *pp = p;
+ return SQLITE_OK;
+}
+
+/*
+** The second argument, argv[], is an array of pointers to nul-terminated
+** strings. This function makes a copy of the array and strings into a
+** single block of memory. It then dequotes any of the strings that appear
+** to be quoted.
+**
+** If successful, output parameter *pazDequote is set to point at the
+** array of dequoted strings and SQLITE_OK is returned. The caller is
+** responsible for eventually calling sqlite3_free() to free the array
+** in this case. Or, if an error occurs, an SQLite error code is returned.
+** The final value of *pazDequote is undefined in this case.
+*/
+static int fts3tokDequoteArray(
+ int argc, /* Number of elements in argv[] */
+ const char * const *argv, /* Input array */
+ char ***pazDequote /* Output array */
+){
+ int rc = SQLITE_OK; /* Return code */
+ if( argc==0 ){
+ *pazDequote = 0;
+ }else{
+ int i;
+ int nByte = 0;
+ char **azDequote;
+
+ for(i=0; i<argc; i++){
+ nByte += (int)(strlen(argv[i]) + 1);
+ }
+
+ *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
+ if( azDequote==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ char *pSpace = (char *)&azDequote[argc];
+ for(i=0; i<argc; i++){
+ int n = (int)strlen(argv[i]);
+ azDequote[i] = pSpace;
+ memcpy(pSpace, argv[i], n+1);
+ sqlite3Fts3Dequote(pSpace);
+ pSpace += (n+1);
+ }
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Schema of the tokenizer table.
+*/
+#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+**
+** argv[0]: module name
+** argv[1]: database name
+** argv[2]: table name
+** argv[3]: first argument (tokenizer name)
+*/
+static int fts3tokConnectMethod(
+ sqlite3 *db, /* Database connection */
+ void *pHash, /* Hash table of tokenizers */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
+ char **pzErr /* OUT: sqlite3_malloc'd error message */
+){
+ Fts3tokTable *pTab;
+ const sqlite3_tokenizer_module *pMod = 0;
+ sqlite3_tokenizer *pTok = 0;
+ int rc;
+ char **azDequote = 0;
+ int nDequote;
+
+ rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
+ if( rc!=SQLITE_OK ) return rc;
+
+ nDequote = argc-3;
+ rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
+
+ if( rc==SQLITE_OK ){
+ const char *zModule;
+ if( nDequote<1 ){
+ zModule = "simple";
+ }else{
+ zModule = azDequote[0];
+ }
+ rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
+ }
+
+ assert( (rc==SQLITE_OK)==(pMod!=0) );
+ if( rc==SQLITE_OK ){
+ const char * const *azArg = (const char * const *)&azDequote[1];
+ rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
+ }
+
+ if( rc==SQLITE_OK ){
+ pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
+ if( pTab==0 ){
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ memset(pTab, 0, sizeof(Fts3tokTable));
+ pTab->pMod = pMod;
+ pTab->pTok = pTok;
+ *ppVtab = &pTab->base;
+ }else{
+ if( pTok ){
+ pMod->xDestroy(pTok);
+ }
+ }
+
+ sqlite3_free(azDequote);
+ return rc;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
+ Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
+
+ pTab->pMod->xDestroy(pTab->pTok);
+ sqlite3_free(pTab);
+ return SQLITE_OK;
+}
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3tokBestIndexMethod(
+ sqlite3_vtab *pVTab,
+ sqlite3_index_info *pInfo
+){
+ int i;
+ UNUSED_PARAMETER(pVTab);
+
+ for(i=0; i<pInfo->nConstraint; i++){
+ if( pInfo->aConstraint[i].usable
+ && pInfo->aConstraint[i].iColumn==0
+ && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ
+ ){
+ pInfo->idxNum = 1;
+ pInfo->aConstraintUsage[i].argvIndex = 1;
+ pInfo->aConstraintUsage[i].omit = 1;
+ pInfo->estimatedCost = 1;
+ return SQLITE_OK;
+ }
+ }
+
+ pInfo->idxNum = 0;
+ assert( pInfo->estimatedCost>1000000.0 );
+
+ return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+ Fts3tokCursor *pCsr;
+ UNUSED_PARAMETER(pVTab);
+
+ pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
+ if( pCsr==0 ){
+ return SQLITE_NOMEM;
+ }
+ memset(pCsr, 0, sizeof(Fts3tokCursor));
+
+ *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+ return SQLITE_OK;
+}
+
+/*
+** Reset the tokenizer cursor passed as the only argument. As if it had
+** just been returned by fts3tokOpenMethod().
+*/
+static void fts3tokResetCursor(Fts3tokCursor *pCsr){
+ if( pCsr->pCsr ){
+ Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
+ pTab->pMod->xClose(pCsr->pCsr);
+ pCsr->pCsr = 0;
+ }
+ sqlite3_free(pCsr->zInput);
+ pCsr->zInput = 0;
+ pCsr->zToken = 0;
+ pCsr->nToken = 0;
+ pCsr->iStart = 0;
+ pCsr->iEnd = 0;
+ pCsr->iPos = 0;
+ pCsr->iRowid = 0;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+ fts3tokResetCursor(pCsr);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+ Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+ int rc; /* Return code */
+
+ pCsr->iRowid++;
+ rc = pTab->pMod->xNext(pCsr->pCsr,
+ &pCsr->zToken, &pCsr->nToken,
+ &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
+ );
+
+ if( rc!=SQLITE_OK ){
+ fts3tokResetCursor(pCsr);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ }
+
+ return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3tokFilterMethod(
+ sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
+ int idxNum, /* Strategy index */
+ const char *idxStr, /* Unused */
+ int nVal, /* Number of elements in apVal */
+ sqlite3_value **apVal /* Arguments for the indexing scheme */
+){
+ int rc = SQLITE_ERROR;
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+ Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+ UNUSED_PARAMETER(idxStr);
+ UNUSED_PARAMETER(nVal);
+
+ fts3tokResetCursor(pCsr);
+ if( idxNum==1 ){
+ const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
+ int nByte = sqlite3_value_bytes(apVal[0]);
+ pCsr->zInput = sqlite3_malloc(nByte+1);
+ if( pCsr->zInput==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memcpy(pCsr->zInput, zByte, nByte);
+ pCsr->zInput[nByte] = 0;
+ rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
+ if( rc==SQLITE_OK ){
+ pCsr->pCsr->pTokenizer = pTab->pTok;
+ }
+ }
+ }
+
+ if( rc!=SQLITE_OK ) return rc;
+ return fts3tokNextMethod(pCursor);
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+ return (pCsr->zToken==0);
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3tokColumnMethod(
+ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
+ int iCol /* Index of column to read value from */
+){
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+ /* CREATE TABLE x(input, token, start, end, position) */
+ switch( iCol ){
+ case 0:
+ sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
+ break;
+ case 1:
+ sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
+ break;
+ case 2:
+ sqlite3_result_int(pCtx, pCsr->iStart);
+ break;
+ case 3:
+ sqlite3_result_int(pCtx, pCsr->iEnd);
+ break;
+ default:
+ assert( iCol==4 );
+ sqlite3_result_int(pCtx, pCsr->iPos);
+ break;
+ }
+ return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3tokRowidMethod(
+ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
+ sqlite_int64 *pRowid /* OUT: Rowid value */
+){
+ Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+ *pRowid = (sqlite3_int64)pCsr->iRowid;
+ return SQLITE_OK;
+}
+
+/*
+** Register the fts3tok module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
+ static const sqlite3_module fts3tok_module = {
+ 0, /* iVersion */
+ fts3tokConnectMethod, /* xCreate */
+ fts3tokConnectMethod, /* xConnect */
+ fts3tokBestIndexMethod, /* xBestIndex */
+ fts3tokDisconnectMethod, /* xDisconnect */
+ fts3tokDisconnectMethod, /* xDestroy */
+ fts3tokOpenMethod, /* xOpen */
+ fts3tokCloseMethod, /* xClose */
+ fts3tokFilterMethod, /* xFilter */
+ fts3tokNextMethod, /* xNext */
+ fts3tokEofMethod, /* xEof */
+ fts3tokColumnMethod, /* xColumn */
+ fts3tokRowidMethod, /* xRowid */
+ 0, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindFunction */
+ 0, /* xRename */
+ 0, /* xSavepoint */
+ 0, /* xRelease */
+ 0 /* xRollbackTo */
+ };
+ int rc; /* Return code */
+
+ rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
+ return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/ext/fts3/fts3_tokenizer.c b/ext/fts3/fts3_tokenizer.c
index 4a7a175..04f8446 100644
--- a/ext/fts3/fts3_tokenizer.c
+++ b/ext/fts3/fts3_tokenizer.c
@@ -251,10 +251,10 @@ static void testFunc(
const char *azArg[64];
const char *zToken;
- int nToken;
- int iStart;
- int iEnd;
- int iPos;
+ int nToken = 0;
+ int iStart = 0;
+ int iEnd = 0;
+ int iPos = 0;
int i;
Tcl_Obj *pRet;
@@ -428,7 +428,7 @@ static void intTestFunc(
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
-** been initialised to use string keys, and to take a private copy
+** been initialized to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
diff --git a/ext/fts3/fts3_tokenizer.h b/ext/fts3/fts3_tokenizer.h
index c91c7ed..4a40b2b 100644
--- a/ext/fts3/fts3_tokenizer.h
+++ b/ext/fts3/fts3_tokenizer.h
@@ -70,7 +70,7 @@ struct sqlite3_tokenizer_module {
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
- ** sqlite3_tokenizer.pModule variable should not be initialised by
+ ** sqlite3_tokenizer.pModule variable should not be initialized by
** this callback. The caller will do so.
*/
int (*xCreate)(
diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c
index 79941ed..188358e 100644
--- a/ext/fts3/fts3_unicode.c
+++ b/ext/fts3/fts3_unicode.c
@@ -125,7 +125,7 @@ static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
**
** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
-** It is not possible to change the behaviour of the tokenizer with respect
+** It is not possible to change the behavior of the tokenizer with respect
** to these codepoints.
*/
static int unicodeAddExceptions(
diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c
index c9f1743..269d1dd 100644
--- a/ext/fts3/fts3_write.c
+++ b/ext/fts3/fts3_write.c
@@ -776,16 +776,16 @@ static int fts3PendingTermsAdd(
int iLangid, /* Language id to use */
const char *zText, /* Text of document to be inserted */
int iCol, /* Column into which text is being inserted */
- u32 *pnWord /* OUT: Number of tokens inserted */
+ u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
){
int rc;
- int iStart;
- int iEnd;
- int iPos;
+ int iStart = 0;
+ int iEnd = 0;
+ int iPos = 0;
int nWord = 0;
char const *zToken;
- int nToken;
+ int nToken = 0;
sqlite3_tokenizer *pTokenizer = p->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
@@ -840,7 +840,7 @@ static int fts3PendingTermsAdd(
}
pModule->xClose(pCsr);
- *pnWord = nWord;
+ *pnWord += nWord;
return (rc==SQLITE_DONE ? SQLITE_OK : rc);
}
@@ -1044,11 +1044,13 @@ static void fts3DeleteTerms(
int *pRC, /* Result code */
Fts3Table *p, /* The FTS table to delete from */
sqlite3_value *pRowid, /* The docid to be deleted */
- u32 *aSz /* Sizes of deleted document written here */
+ u32 *aSz, /* Sizes of deleted document written here */
+ int *pbFound /* OUT: Set to true if row really does exist */
){
int rc;
sqlite3_stmt *pSelect;
+ assert( *pbFound==0 );
if( *pRC ) return;
rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
if( rc==SQLITE_OK ){
@@ -1066,6 +1068,7 @@ static void fts3DeleteTerms(
*pRC = rc;
return;
}
+ *pbFound = 1;
}
rc = sqlite3_reset(pSelect);
}else{
@@ -1479,6 +1482,7 @@ static int fts3SegReaderNextDocid(
*pnOffsetList = (int)(p - pReader->pOffsetList - 1);
}
+ /* List may have been edited in place by fts3EvalNearTrim() */
while( p<pEnd && *p==0 ) p++;
/* If there are no more entries in the doclist, set pOffsetList to
@@ -2494,9 +2498,13 @@ static int fts3DeleteSegdir(
**
** If there are no entries in the input position list for column iCol, then
** *pnList is set to zero before returning.
+**
+** If parameter bZero is non-zero, then any part of the input list following
+** the end of the output list is zeroed before returning.
*/
static void fts3ColumnFilter(
int iCol, /* Column to filter on */
+ int bZero, /* Zero out anything following *ppList */
char **ppList, /* IN/OUT: Pointer to position list */
int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
){
@@ -2525,6 +2533,9 @@ static void fts3ColumnFilter(
p += sqlite3Fts3GetVarint32(p, &iCurrent);
}
+ if( bZero && &pList[nList]!=pEnd ){
+ memset(&pList[nList], 0, pEnd - &pList[nList]);
+ }
*ppList = pList;
*pnList = nList;
}
@@ -2598,19 +2609,19 @@ int sqlite3Fts3MsrIncrNext(
if( rc!=SQLITE_OK ) return rc;
fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp);
+ if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){
+ rc = fts3MsrBufferData(pMsr, pList, nList+1);
+ if( rc!=SQLITE_OK ) return rc;
+ assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
+ pList = pMsr->aBuffer;
+ }
+
if( pMsr->iColFilter>=0 ){
- fts3ColumnFilter(pMsr->iColFilter, &pList, &nList);
+ fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList);
}
if( nList>0 ){
- if( fts3SegReaderIsPending(apSegment[0]) ){
- rc = fts3MsrBufferData(pMsr, pList, nList+1);
- if( rc!=SQLITE_OK ) return rc;
- *paPoslist = pMsr->aBuffer;
- assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
- }else{
- *paPoslist = pList;
- }
+ *paPoslist = pList;
*piDocid = iDocid;
*pnPoslist = nList;
break;
@@ -2853,7 +2864,7 @@ int sqlite3Fts3SegReaderStep(
}
if( isColFilter ){
- fts3ColumnFilter(pFilter->iCol, &pList, &nList);
+ fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList);
}
if( !isIgnoreEmpty || nList>0 ){
@@ -3290,7 +3301,7 @@ static int fts3DoRebuild(Fts3Table *p){
int iCol;
int iLangid = langidFromSelect(p, pStmt);
rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
- aSz[p->nColumn] = 0;
+ memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
@@ -4934,9 +4945,9 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
- int nToken; /* Number of bytes in token */
- int iDum1, iDum2; /* Dummy variables */
- int iPos; /* Position of token in zText */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
if( rc==SQLITE_OK ){
@@ -5103,9 +5114,9 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
- int nToken; /* Number of bytes in token */
- int iDum1, iDum2; /* Dummy variables */
- int iPos; /* Position of token in zText */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
@@ -5194,28 +5205,32 @@ int sqlite3Fts3DeferToken(
static int fts3DeleteByRowid(
Fts3Table *p,
sqlite3_value *pRowid,
- int *pnDoc,
+ int *pnChng, /* IN/OUT: Decrement if row is deleted */
u32 *aSzDel
){
- int isEmpty = 0;
- int rc = fts3IsEmpty(p, pRowid, &isEmpty);
- if( rc==SQLITE_OK ){
- if( isEmpty ){
- /* Deleting this row means the whole table is empty. In this case
- ** delete the contents of all three tables and throw away any
- ** data in the pendingTerms hash table. */
- rc = fts3DeleteAll(p, 1);
- *pnDoc = *pnDoc - 1;
- }else{
- fts3DeleteTerms(&rc, p, pRowid, aSzDel);
- if( p->zContentTbl==0 ){
- fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
- if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
+ int rc = SQLITE_OK; /* Return code */
+ int bFound = 0; /* True if *pRowid really is in the table */
+
+ fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
+ if( bFound && rc==SQLITE_OK ){
+ int isEmpty = 0; /* Deleting *pRowid leaves the table empty */
+ rc = fts3IsEmpty(p, pRowid, &isEmpty);
+ if( rc==SQLITE_OK ){
+ if( isEmpty ){
+ /* Deleting this row means the whole table is empty. In this case
+ ** delete the contents of all three tables and throw away any
+ ** data in the pendingTerms hash table. */
+ rc = fts3DeleteAll(p, 1);
+ *pnChng = 0;
+ memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
}else{
- *pnDoc = *pnDoc - 1;
- }
- if( p->bHasDocsize ){
- fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
+ *pnChng = *pnChng - 1;
+ if( p->zContentTbl==0 ){
+ fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
+ }
+ if( p->bHasDocsize ){
+ fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
+ }
}
}
}
@@ -5246,7 +5261,7 @@ int sqlite3Fts3UpdateMethod(
int rc = SQLITE_OK; /* Return Code */
int isRemove = 0; /* True for an UPDATE or DELETE */
u32 *aSzIns = 0; /* Sizes of inserted documents */
- u32 *aSzDel; /* Sizes of deleted documents */
+ u32 *aSzDel = 0; /* Sizes of deleted documents */
int nChng = 0; /* Net change in number of documents */
int bInsertDone = 0;
@@ -5274,13 +5289,13 @@ int sqlite3Fts3UpdateMethod(
}
/* Allocate space to hold the change in document sizes */
- aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
- if( aSzIns==0 ){
+ aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
+ if( aSzDel==0 ){
rc = SQLITE_NOMEM;
goto update_out;
}
- aSzDel = &aSzIns[p->nColumn+1];
- memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2);
+ aSzIns = &aSzDel[p->nColumn+1];
+ memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
/* If this is an INSERT operation, or an UPDATE that modifies the rowid
** value, then this operation requires constraint handling.
@@ -5365,7 +5380,7 @@ int sqlite3Fts3UpdateMethod(
}
update_out:
- sqlite3_free(aSzIns);
+ sqlite3_free(aSzDel);
sqlite3Fts3SegmentsClose(p);
return rc;
}