From 08119c361d1181b3e8f1abb429236e488a664753 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Tue, 13 Aug 2013 15:42:54 -0400 Subject: Imported Upstream version 2.2.1 --- ext/fts3/fts3_expr.c | 335 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 300 insertions(+), 35 deletions(-) (limited to 'ext/fts3/fts3_expr.c') diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index a6e3492..c046d7d 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -106,7 +106,7 @@ struct ParseContext { ** This function is equivalent to the standard isspace() function. ** ** The standard isspace() can be awkward to use safely, because although it -** is defined to accept an argument of type int, its behaviour when passed +** is defined to accept an argument of type int, its behavior when passed ** an integer that falls outside of the range of the unsigned char type ** is undefined (and sometimes, "undefined" means segfault). This wrapper ** is defined to accept an argument of type char, and always returns 0 for @@ -185,7 +185,7 @@ static int getNextToken( rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; - int nToken, iStart, iEnd, iPosition; + int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); @@ -300,7 +300,7 @@ static int getNextString( int ii; for(ii=0; rc==SQLITE_OK; ii++){ const char *zByte; - int nByte, iBegin, iEnd, iPos; + int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ Fts3PhraseToken *pToken; @@ -640,8 +640,10 @@ static int fts3ExprParse( } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; + p->pParent = pNot; if( pNotBranch ){ pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; } pNotBranch = pNot; p = pPrev; @@ -729,6 +731,7 @@ static int fts3ExprParse( pIter = pIter->pLeft; } pIter->pLeft = pRet; + pRet->pParent = pIter; pRet = pNotBranch; } } @@ -746,30 +749,184 @@ exprparse_out: } /* -** Parameters z and n contain a pointer to and length of a buffer containing -** an fts3 query expression, respectively. This function attempts to parse the -** query expression and create a tree of Fts3Expr structures representing the -** parsed expression. If successful, *ppExpr is set to point to the head -** of the parsed expression tree and SQLITE_OK is returned. If an error -** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse -** error) is returned and *ppExpr is set to 0. +** Return SQLITE_ERROR if the maximum depth of the expression tree passed +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ + int rc = SQLITE_OK; + if( p ){ + if( nMaxDepth<0 ){ + rc = SQLITE_TOOBIG; + }else{ + rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); + } + } + } + return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the +** new root expression node. ** -** If parameter n is a negative number, then z is assumed to point to a -** nul-terminated string and the length is determined using strlen(). +** nMaxDepth is the maximum allowable depth of the balanced sub-tree. ** -** The first parameter, pTokenizer, is passed the fts3 tokenizer module to -** use to normalize query tokens while parsing the expression. The azCol[] -** array, which is assumed to contain nCol entries, should contain the names -** of each column in the target fts3 table, in order from left to right. -** Column names must be nul-terminated strings. +** Otherwise, if an error occurs, an SQLite error code is returned and +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ + int rc = SQLITE_OK; /* Return code */ + Fts3Expr *pRoot = *pp; /* Initial root node */ + Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */ + int eType = pRoot->eType; /* Type of node in this tree */ + + if( nMaxDepth==0 ){ + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ + Fts3Expr **apLeaf; + apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); + if( 0==apLeaf ){ + rc = SQLITE_NOMEM; + }else{ + memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); + } + + if( rc==SQLITE_OK ){ + int i; + Fts3Expr *p; + + /* Set $p to point to the left-most leaf in the tree of eType nodes. */ + for(p=pRoot; p->eType==eType; p=p->pLeft){ + assert( p->pParent==0 || p->pParent->pLeft==p ); + assert( p->pLeft && p->pRight ); + } + + /* This loop runs once for each leaf in the tree of eType nodes. */ + while( 1 ){ + int iLvl; + Fts3Expr *pParent = p->pParent; /* Current parent of p */ + + assert( pParent==0 || pParent->pLeft==p ); + p->pParent = 0; + if( pParent ){ + pParent->pLeft = 0; + }else{ + pRoot = 0; + } + rc = fts3ExprBalance(&p, nMaxDepth-1); + if( rc!=SQLITE_OK ) break; + + for(iLvl=0; p && iLvlpLeft = apLeaf[iLvl]; + pFree->pRight = p; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + apLeaf[iLvl] = 0; + } + } + if( p ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_TOOBIG; + break; + } + + /* If that was the last leaf node, break out of the loop */ + if( pParent==0 ) break; + + /* Set $p to point to the next leaf in the tree of eType nodes */ + for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + + /* Remove pParent from the original tree. */ + assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); + pParent->pRight->pParent = pParent->pParent; + if( pParent->pParent ){ + pParent->pParent->pLeft = pParent->pRight; + }else{ + assert( pParent==pRoot ); + pRoot = pParent->pRight; + } + + /* Link pParent into the free node list. It will be used as an + ** internal node of the new tree. */ + pParent->pParent = pFree; + pFree = pParent; + } + + if( rc==SQLITE_OK ){ + p = 0; + for(i=0; ipParent = 0; + }else{ + assert( pFree!=0 ); + pFree->pRight = p; + pFree->pLeft = apLeaf[i]; + pFree->pLeft->pParent = pFree; + pFree->pRight->pParent = pFree; + + p = pFree; + pFree = pFree->pParent; + p->pParent = 0; + } + } + } + pRoot = p; + }else{ + /* An error occurred. Delete the contents of the apLeaf[] array + ** and pFree list. Everything else is cleaned up by the call to + ** sqlite3Fts3ExprFree(pRoot) below. */ + Fts3Expr *pDel; + for(i=0; ipParent; + sqlite3_free(pDel); + } + } + + assert( pFree==0 ); + sqlite3_free( apLeaf ); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(pRoot); + pRoot = 0; + } + *pp = pRoot; + return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences: ** -** The iDefaultCol parameter should be passed the index of the table column -** that appears on the left-hand-side of the MATCH operator (the default -** column to match against for tokens for which a column name is not explicitly -** specified as part of the query string), or -1 if tokens may by default -** match any table column. +** 1. It does not do expression rebalancing. +** 2. It does not check that the expression does not exceed the +** maximum allowable depth. +** 3. Even if it fails, *ppExpr may still be set to point to an +** expression tree. It should be deleted using sqlite3Fts3ExprFree() +** in this case. */ -int sqlite3Fts3ExprParse( +static int fts3ExprParseUnbalanced( sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ int iLangid, /* Language id for tokenizer */ char **azCol, /* Array of column names for fts3 table */ @@ -798,28 +955,116 @@ int sqlite3Fts3ExprParse( n = (int)strlen(z); } rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); + assert( rc==SQLITE_OK || *ppExpr==0 ); /* Check for mismatched parenthesis */ if( rc==SQLITE_OK && sParse.nNest ){ rc = SQLITE_ERROR; + } + + return rc; +} + +/* +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right. +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. +*/ +int sqlite3Fts3ExprParse( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ +){ + static const int MAX_EXPR_DEPTH = 12; + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); + } + } + + if( rc!=SQLITE_OK ){ sqlite3Fts3ExprFree(*ppExpr); *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + *pzErr = sqlite3_mprintf( + "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); + } } return rc; } +/* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} + /* ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. */ -void sqlite3Fts3ExprFree(Fts3Expr *p){ - if( p ){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3ExprFree(p->pLeft); - sqlite3Fts3ExprFree(p->pRight); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } } } @@ -871,6 +1116,9 @@ static int queryTestTokenizer( ** the returned expression text and then freed using sqlite3_free(). */ static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } switch( pExpr->eType ){ case FTSQUERY_PHRASE: { Fts3Phrase *pPhrase = pExpr->pPhrase; @@ -978,10 +1226,21 @@ static void fts3ExprTest( azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); } - rc = sqlite3Fts3ExprParse( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr - ); + if( sqlite3_user_data(context) ){ + char *zDummy = 0; + rc = sqlite3Fts3ExprParse( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy + ); + assert( rc==SQLITE_OK || pExpr==0 ); + sqlite3_free(zDummy); + }else{ + rc = fts3ExprParseUnbalanced( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr + ); + } + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ + sqlite3Fts3ExprFree(pExpr); sqlite3_result_error(context, "Error parsing expression", -1); }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ sqlite3_result_error_nomem(context); @@ -1004,9 +1263,15 @@ exprtest_out: ** with database connection db. */ int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ - return sqlite3_create_function( + int rc = sqlite3_create_function( db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + ); + } + return rc; } #endif -- cgit v1.2.3