summaryrefslogtreecommitdiff
path: root/ext/fts3/fts3_expr.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts3/fts3_expr.c')
-rw-r--r--ext/fts3/fts3_expr.c335
1 files changed, 300 insertions, 35 deletions
diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c
index a6e3492..c046d7d 100644
--- a/ext/fts3/fts3_expr.c
+++ b/ext/fts3/fts3_expr.c
@@ -106,7 +106,7 @@ struct ParseContext {
** This function is equivalent to the standard isspace() function.
**
** The standard isspace() can be awkward to use safely, because although it
-** is defined to accept an argument of type int, its behaviour when passed
+** is defined to accept an argument of type int, its behavior when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
@@ -185,7 +185,7 @@ static int getNextToken(
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
- int nToken, iStart, iEnd, iPosition;
+ int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
int nByte; /* total space to allocate */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
@@ -300,7 +300,7 @@ static int getNextString(
int ii;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
- int nByte, iBegin, iEnd, iPos;
+ int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
Fts3PhraseToken *pToken;
@@ -640,8 +640,10 @@ static int fts3ExprParse(
}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
+ p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
+ pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
@@ -729,6 +731,7 @@ static int fts3ExprParse(
pIter = pIter->pLeft;
}
pIter->pLeft = pRet;
+ pRet->pParent = pIter;
pRet = pNotBranch;
}
}
@@ -746,30 +749,184 @@ exprparse_out:
}
/*
-** Parameters z and n contain a pointer to and length of a buffer containing
-** an fts3 query expression, respectively. This function attempts to parse the
-** query expression and create a tree of Fts3Expr structures representing the
-** parsed expression. If successful, *ppExpr is set to point to the head
-** of the parsed expression tree and SQLITE_OK is returned. If an error
-** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
-** error) is returned and *ppExpr is set to 0.
+** Return SQLITE_ERROR if the maximum depth of the expression tree passed
+** as the only argument is more than nMaxDepth.
+*/
+static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
+ int rc = SQLITE_OK;
+ if( p ){
+ if( nMaxDepth<0 ){
+ rc = SQLITE_TOOBIG;
+ }else{
+ rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
+ }
+ }
+ }
+ return rc;
+}
+
+/*
+** This function attempts to transform the expression tree at (*pp) to
+** an equivalent but more balanced form. The tree is modified in place.
+** If successful, SQLITE_OK is returned and (*pp) set to point to the
+** new root expression node.
**
-** If parameter n is a negative number, then z is assumed to point to a
-** nul-terminated string and the length is determined using strlen().
+** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
**
-** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
-** use to normalize query tokens while parsing the expression. The azCol[]
-** array, which is assumed to contain nCol entries, should contain the names
-** of each column in the target fts3 table, in order from left to right.
-** Column names must be nul-terminated strings.
+** Otherwise, if an error occurs, an SQLite error code is returned and
+** expression (*pp) freed.
+*/
+static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
+ int rc = SQLITE_OK; /* Return code */
+ Fts3Expr *pRoot = *pp; /* Initial root node */
+ Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
+ int eType = pRoot->eType; /* Type of node in this tree */
+
+ if( nMaxDepth==0 ){
+ rc = SQLITE_ERROR;
+ }
+
+ if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
+ Fts3Expr **apLeaf;
+ apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth);
+ if( 0==apLeaf ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
+ }
+
+ if( rc==SQLITE_OK ){
+ int i;
+ Fts3Expr *p;
+
+ /* Set $p to point to the left-most leaf in the tree of eType nodes. */
+ for(p=pRoot; p->eType==eType; p=p->pLeft){
+ assert( p->pParent==0 || p->pParent->pLeft==p );
+ assert( p->pLeft && p->pRight );
+ }
+
+ /* This loop runs once for each leaf in the tree of eType nodes. */
+ while( 1 ){
+ int iLvl;
+ Fts3Expr *pParent = p->pParent; /* Current parent of p */
+
+ assert( pParent==0 || pParent->pLeft==p );
+ p->pParent = 0;
+ if( pParent ){
+ pParent->pLeft = 0;
+ }else{
+ pRoot = 0;
+ }
+ rc = fts3ExprBalance(&p, nMaxDepth-1);
+ if( rc!=SQLITE_OK ) break;
+
+ for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
+ if( apLeaf[iLvl]==0 ){
+ apLeaf[iLvl] = p;
+ p = 0;
+ }else{
+ assert( pFree );
+ pFree->pLeft = apLeaf[iLvl];
+ pFree->pRight = p;
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ apLeaf[iLvl] = 0;
+ }
+ }
+ if( p ){
+ sqlite3Fts3ExprFree(p);
+ rc = SQLITE_TOOBIG;
+ break;
+ }
+
+ /* If that was the last leaf node, break out of the loop */
+ if( pParent==0 ) break;
+
+ /* Set $p to point to the next leaf in the tree of eType nodes */
+ for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
+
+ /* Remove pParent from the original tree. */
+ assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
+ pParent->pRight->pParent = pParent->pParent;
+ if( pParent->pParent ){
+ pParent->pParent->pLeft = pParent->pRight;
+ }else{
+ assert( pParent==pRoot );
+ pRoot = pParent->pRight;
+ }
+
+ /* Link pParent into the free node list. It will be used as an
+ ** internal node of the new tree. */
+ pParent->pParent = pFree;
+ pFree = pParent;
+ }
+
+ if( rc==SQLITE_OK ){
+ p = 0;
+ for(i=0; i<nMaxDepth; i++){
+ if( apLeaf[i] ){
+ if( p==0 ){
+ p = apLeaf[i];
+ p->pParent = 0;
+ }else{
+ assert( pFree!=0 );
+ pFree->pRight = p;
+ pFree->pLeft = apLeaf[i];
+ pFree->pLeft->pParent = pFree;
+ pFree->pRight->pParent = pFree;
+
+ p = pFree;
+ pFree = pFree->pParent;
+ p->pParent = 0;
+ }
+ }
+ }
+ pRoot = p;
+ }else{
+ /* An error occurred. Delete the contents of the apLeaf[] array
+ ** and pFree list. Everything else is cleaned up by the call to
+ ** sqlite3Fts3ExprFree(pRoot) below. */
+ Fts3Expr *pDel;
+ for(i=0; i<nMaxDepth; i++){
+ sqlite3Fts3ExprFree(apLeaf[i]);
+ }
+ while( (pDel=pFree)!=0 ){
+ pFree = pDel->pParent;
+ sqlite3_free(pDel);
+ }
+ }
+
+ assert( pFree==0 );
+ sqlite3_free( apLeaf );
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts3ExprFree(pRoot);
+ pRoot = 0;
+ }
+ *pp = pRoot;
+ return rc;
+}
+
+/*
+** This function is similar to sqlite3Fts3ExprParse(), with the following
+** differences:
**
-** The iDefaultCol parameter should be passed the index of the table column
-** that appears on the left-hand-side of the MATCH operator (the default
-** column to match against for tokens for which a column name is not explicitly
-** specified as part of the query string), or -1 if tokens may by default
-** match any table column.
+** 1. It does not do expression rebalancing.
+** 2. It does not check that the expression does not exceed the
+** maximum allowable depth.
+** 3. Even if it fails, *ppExpr may still be set to point to an
+** expression tree. It should be deleted using sqlite3Fts3ExprFree()
+** in this case.
*/
-int sqlite3Fts3ExprParse(
+static int fts3ExprParseUnbalanced(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
@@ -798,28 +955,116 @@ int sqlite3Fts3ExprParse(
n = (int)strlen(z);
}
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
+ assert( rc==SQLITE_OK || *ppExpr==0 );
/* Check for mismatched parenthesis */
if( rc==SQLITE_OK && sParse.nNest ){
rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+/*
+** Parameters z and n contain a pointer to and length of a buffer containing
+** an fts3 query expression, respectively. This function attempts to parse the
+** query expression and create a tree of Fts3Expr structures representing the
+** parsed expression. If successful, *ppExpr is set to point to the head
+** of the parsed expression tree and SQLITE_OK is returned. If an error
+** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
+** error) is returned and *ppExpr is set to 0.
+**
+** If parameter n is a negative number, then z is assumed to point to a
+** nul-terminated string and the length is determined using strlen().
+**
+** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
+** use to normalize query tokens while parsing the expression. The azCol[]
+** array, which is assumed to contain nCol entries, should contain the names
+** of each column in the target fts3 table, in order from left to right.
+** Column names must be nul-terminated strings.
+**
+** The iDefaultCol parameter should be passed the index of the table column
+** that appears on the left-hand-side of the MATCH operator (the default
+** column to match against for tokens for which a column name is not explicitly
+** specified as part of the query string), or -1 if tokens may by default
+** match any table column.
+*/
+int sqlite3Fts3ExprParse(
+ sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
+ int iLangid, /* Language id for tokenizer */
+ char **azCol, /* Array of column names for fts3 table */
+ int bFts4, /* True to allow FTS4-only syntax */
+ int nCol, /* Number of entries in azCol[] */
+ int iDefaultCol, /* Default column to query */
+ const char *z, int n, /* Text of MATCH query */
+ Fts3Expr **ppExpr, /* OUT: Parsed query structure */
+ char **pzErr /* OUT: Error message (sqlite3_malloc) */
+){
+ static const int MAX_EXPR_DEPTH = 12;
+ int rc = fts3ExprParseUnbalanced(
+ pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
+ );
+
+ /* Rebalance the expression. And check that its depth does not exceed
+ ** MAX_EXPR_DEPTH. */
+ if( rc==SQLITE_OK && *ppExpr ){
+ rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH);
+ if( rc==SQLITE_OK ){
+ rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH);
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
+ if( rc==SQLITE_TOOBIG ){
+ *pzErr = sqlite3_mprintf(
+ "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH
+ );
+ rc = SQLITE_ERROR;
+ }else if( rc==SQLITE_ERROR ){
+ *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
+ }
}
return rc;
}
/*
+** Free a single node of an expression tree.
+*/
+static void fts3FreeExprNode(Fts3Expr *p){
+ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
+ sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
+ sqlite3_free(p->aMI);
+ sqlite3_free(p);
+}
+
+/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
+**
+** This function would be simpler if it recursively called itself. But
+** that would mean passing a sufficiently large expression to ExprParse()
+** could cause a stack overflow.
*/
-void sqlite3Fts3ExprFree(Fts3Expr *p){
- if( p ){
- assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
- sqlite3Fts3ExprFree(p->pLeft);
- sqlite3Fts3ExprFree(p->pRight);
- sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
- sqlite3_free(p->aMI);
- sqlite3_free(p);
+void sqlite3Fts3ExprFree(Fts3Expr *pDel){
+ Fts3Expr *p;
+ assert( pDel==0 || pDel->pParent==0 );
+ for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
+ assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
+ }
+ while( p ){
+ Fts3Expr *pParent = p->pParent;
+ fts3FreeExprNode(p);
+ if( pParent && p==pParent->pLeft && pParent->pRight ){
+ p = pParent->pRight;
+ while( p && (p->pLeft || p->pRight) ){
+ assert( p==p->pParent->pRight || p==p->pParent->pLeft );
+ p = (p->pLeft ? p->pLeft : p->pRight);
+ }
+ }else{
+ p = pParent;
+ }
}
}
@@ -871,6 +1116,9 @@ static int queryTestTokenizer(
** the returned expression text and then freed using sqlite3_free().
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
+ if( pExpr==0 ){
+ return sqlite3_mprintf("");
+ }
switch( pExpr->eType ){
case FTSQUERY_PHRASE: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
@@ -978,10 +1226,21 @@ static void fts3ExprTest(
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
}
- rc = sqlite3Fts3ExprParse(
- pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
- );
+ if( sqlite3_user_data(context) ){
+ char *zDummy = 0;
+ rc = sqlite3Fts3ExprParse(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
+ );
+ assert( rc==SQLITE_OK || pExpr==0 );
+ sqlite3_free(zDummy);
+ }else{
+ rc = fts3ExprParseUnbalanced(
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
+ );
+ }
+
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
+ sqlite3Fts3ExprFree(pExpr);
sqlite3_result_error(context, "Error parsing expression", -1);
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
sqlite3_result_error_nomem(context);
@@ -1004,9 +1263,15 @@ exprtest_out:
** with database connection db.
*/
int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
- return sqlite3_create_function(
+ int rc = sqlite3_create_function(
db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
+ -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0
+ );
+ }
+ return rc;
}
#endif