diff options
Diffstat (limited to 'ext')
36 files changed, 8940 insertions, 171 deletions
| diff --git a/ext/async/README.txt b/ext/async/README.txt index 05acffe..f62fa2f 100644 --- a/ext/async/README.txt +++ b/ext/async/README.txt @@ -1,3 +1,10 @@ +NOTE (2012-11-29): + +The functionality implemented by this extension has been superseded +by WAL-mode.  This module is no longer supported or maintained.  The +code is retained for historical reference only. + +------------------------------------------------------------------------------  Normally, when SQLite writes to a database file, it waits until the write  operation is finished before returning control to the calling application. @@ -161,4 +168,3 @@ the database, eliminating the bottleneck.    The functionality required of each of the above functions is described    in comments in sqlite3async.c. - diff --git a/ext/async/sqlite3async.c b/ext/async/sqlite3async.c index 0814da7..4ab39ca 100644 --- a/ext/async/sqlite3async.c +++ b/ext/async/sqlite3async.c @@ -1510,6 +1510,7 @@ static void asyncWriterThread(void){        case ASYNC_DELETE:          ASYNC_TRACE(("DELETE %s\n", p->zBuf));          rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset); +        if( rc==SQLITE_IOERR_DELETE_NOENT ) rc = SQLITE_OK;          break;        case ASYNC_OPENEXCLUSIVE: { diff --git a/ext/async/sqlite3async.h b/ext/async/sqlite3async.h index 143cdc7..5b20d71 100644 --- a/ext/async/sqlite3async.h +++ b/ext/async/sqlite3async.h @@ -75,7 +75,7 @@ int sqlite3async_initialize(const char *zParent, int isDefault);  ** On win32 platforms, this function also releases the small number of   ** critical section and event objects created by sqlite3async_initialize().  */  -void sqlite3async_shutdown(); +void sqlite3async_shutdown(void);  /*  ** This function may only be called when the asynchronous IO VFS is  @@ -94,7 +94,7 @@ void sqlite3async_shutdown();  ** If multiple simultaneous calls are made to sqlite3async_run() from two  ** or more threads, then the calls are serialized internally.  */ -void sqlite3async_run(); +void sqlite3async_run(void);  /*  ** This function may only be called when the asynchronous IO VFS is  diff --git a/ext/fts1/ft_hash.h b/ext/fts1/ft_hash.h index 93b6dcf..95871a4 100644 --- a/ext/fts1/ft_hash.h +++ b/ext/fts1/ft_hash.h @@ -9,7 +9,7 @@  **    May you share freely, never taking more than you give.  **  ************************************************************************* -** This is the header file for the generic hash-table implemenation +** This is the header file for the generic hash-table implementation  ** used in SQLite.  We've modified it slightly to serve as a standalone  ** hash table implementation for the full-text indexing module.  ** diff --git a/ext/fts1/fts1_hash.h b/ext/fts1/fts1_hash.h index c31c430..9001152 100644 --- a/ext/fts1/fts1_hash.h +++ b/ext/fts1/fts1_hash.h @@ -9,7 +9,7 @@  **    May you share freely, never taking more than you give.  **  ************************************************************************* -** This is the header file for the generic hash-table implemenation +** This is the header file for the generic hash-table implementation  ** used in SQLite.  We've modified it slightly to serve as a standalone  ** hash table implementation for the full-text indexing module.  ** diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c index 93e03cd..f008ce6 100644 --- a/ext/fts2/fts2.c +++ b/ext/fts2/fts2.c @@ -6779,7 +6779,7 @@ void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);  int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *);  /* -** Initialise the fts2 extension. If this extension is built as part +** Initialize the fts2 extension. If this extension is built as part  ** of the sqlite library, then this function is called directly by  ** SQLite. If fts2 is built as a dynamically loadable extension, this  ** function is called by the sqlite3_extension_init() entry point. @@ -6797,7 +6797,7 @@ int sqlite3Fts2Init(sqlite3 *db){    sqlite3Fts2IcuTokenizerModule(&pIcu);  #endif -  /* Allocate and initialise the hash-table used to store tokenizers. */ +  /* Allocate and initialize the hash-table used to store tokenizers. */    pHash = sqlite3_malloc(sizeof(fts2Hash));    if( !pHash ){      rc = SQLITE_NOMEM; diff --git a/ext/fts2/fts2_hash.h b/ext/fts2/fts2_hash.h index 571aa2c..02936f1 100644 --- a/ext/fts2/fts2_hash.h +++ b/ext/fts2/fts2_hash.h @@ -9,7 +9,7 @@  **    May you share freely, never taking more than you give.  **  ************************************************************************* -** This is the header file for the generic hash-table implemenation +** This is the header file for the generic hash-table implementation  ** used in SQLite.  We've modified it slightly to serve as a standalone  ** hash table implementation for the full-text indexing module.  ** diff --git a/ext/fts2/fts2_icu.c b/ext/fts2/fts2_icu.c index de8e116..2670301 100644 --- a/ext/fts2/fts2_icu.c +++ b/ext/fts2/fts2_icu.c @@ -118,7 +118,7 @@ static int icuOpen(    nChar = nInput+1;    pCsr = (IcuCursor *)sqlite3_malloc(        sizeof(IcuCursor) +                /* IcuCursor */ -      nChar * sizeof(UChar) +            /* IcuCursor.aChar[] */ +      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */        (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */    );    if( !pCsr ){ @@ -126,7 +126,7 @@ static int icuOpen(    }    memset(pCsr, 0, sizeof(IcuCursor));    pCsr->aChar = (UChar *)&pCsr[1]; -  pCsr->aOffset = (int *)&pCsr->aChar[nChar]; +  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];    pCsr->aOffset[iOut] = iInput;    U8_NEXT(zInput, iInput, nInput, c);  diff --git a/ext/fts2/fts2_tokenizer.c b/ext/fts2/fts2_tokenizer.c index f8b0663..a93790c 100644 --- a/ext/fts2/fts2_tokenizer.c +++ b/ext/fts2/fts2_tokenizer.c @@ -319,7 +319,7 @@ static void intTestFunc(  /*  ** Set up SQL objects in database db used to access the contents of  ** the hash table pointed to by argument pHash. The hash table must -** been initialised to use string keys, and to take a private copy  +** been initialized to use string keys, and to take a private copy   ** of the key when a value is inserted. i.e. by a call similar to:  **  **    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); diff --git a/ext/fts2/fts2_tokenizer.h b/ext/fts2/fts2_tokenizer.h index 8c256b2..8db2048 100644 --- a/ext/fts2/fts2_tokenizer.h +++ b/ext/fts2/fts2_tokenizer.h @@ -70,7 +70,7 @@ struct sqlite3_tokenizer_module {    ** This method should return either SQLITE_OK (0), or an SQLite error     ** code. If SQLITE_OK is returned, then *ppTokenizer should be set    ** to point at the newly created tokenizer structure. The generic -  ** sqlite3_tokenizer.pModule variable should not be initialised by +  ** sqlite3_tokenizer.pModule variable should not be initialized by    ** this callback. The caller will do so.    */    int (*xCreate)( diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 58414f6..c00a13f 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1571,7 +1571,7 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){        }else{          rc = sqlite3_reset(pCsr->pStmt);          if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ -          /* If no row was found and no error has occured, then the %_content +          /* If no row was found and no error has occurred, then the %_content            ** table is missing a row that is present in the full-text index.            ** The data structures are corrupt.  */            rc = FTS_CORRUPT_VTAB; @@ -2811,7 +2811,7 @@ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){  }  /* -** This function retreives the doclist for the specified term (or term +** This function retrieves the doclist for the specified term (or term  ** prefix) from the database.  */  static int fts3TermSelect( @@ -2975,14 +2975,12 @@ static int fts3FilterMethod(      pCsr->iLangid = 0;      if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); +    assert( p->base.zErrMsg==0 );      rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, -        p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr +        p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,  +        &p->base.zErrMsg      );      if( rc!=SQLITE_OK ){ -      if( rc==SQLITE_ERROR ){ -        static const char *zErr = "malformed MATCH expression: [%s]"; -        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery); -      }        return rc;      } @@ -3562,7 +3560,7 @@ void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);  #endif  /* -** Initialise the fts3 extension. If this extension is built as part +** Initialize the fts3 extension. If this extension is built as part  ** of the sqlite library, then this function is called directly by  ** SQLite. If fts3 is built as a dynamically loadable extension, this  ** function is called by the sqlite3_extension_init() entry point. @@ -3596,7 +3594,7 @@ int sqlite3Fts3Init(sqlite3 *db){    sqlite3Fts3SimpleTokenizerModule(&pSimple);    sqlite3Fts3PorterTokenizerModule(&pPorter); -  /* Allocate and initialise the hash-table used to store tokenizers. */ +  /* Allocate and initialize the hash-table used to store tokenizers. */    pHash = sqlite3_malloc(sizeof(Fts3Hash));    if( !pHash ){      rc = SQLITE_NOMEM; @@ -3646,9 +3644,13 @@ int sqlite3Fts3Init(sqlite3 *db){            db, "fts4", &fts3Module, (void *)pHash, 0        );      } +    if( rc==SQLITE_OK ){ +      rc = sqlite3Fts3InitTok(db, (void *)pHash); +    }      return rc;    } +    /* An error has occurred. Delete the hash table and return the error code. */    assert( rc!=SQLITE_OK );    if( pHash ){ @@ -4743,35 +4745,39 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){        nTmp += p->pRight->pPhrase->doclist.nList;      }      nTmp += p->pPhrase->doclist.nList; -    aTmp = sqlite3_malloc(nTmp*2); -    if( !aTmp ){ -      *pRc = SQLITE_NOMEM; +    if( nTmp==0 ){        res = 0;      }else{ -      char *aPoslist = p->pPhrase->doclist.pList; -      int nToken = p->pPhrase->nToken; +      aTmp = sqlite3_malloc(nTmp*2); +      if( !aTmp ){ +        *pRc = SQLITE_NOMEM; +        res = 0; +      }else{ +        char *aPoslist = p->pPhrase->doclist.pList; +        int nToken = p->pPhrase->nToken; -      for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ -        Fts3Phrase *pPhrase = p->pRight->pPhrase; -        int nNear = p->nNear; -        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); -      } -   -      aPoslist = pExpr->pRight->pPhrase->doclist.pList; -      nToken = pExpr->pRight->pPhrase->nToken; -      for(p=pExpr->pLeft; p && res; p=p->pLeft){ -        int nNear; -        Fts3Phrase *pPhrase; -        assert( p->pParent && p->pParent->pLeft==p ); -        nNear = p->pParent->nNear; -        pPhrase = ( -            p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase -        ); -        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); +        for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ +          Fts3Phrase *pPhrase = p->pRight->pPhrase; +          int nNear = p->nNear; +          res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); +        } + +        aPoslist = pExpr->pRight->pPhrase->doclist.pList; +        nToken = pExpr->pRight->pPhrase->nToken; +        for(p=pExpr->pLeft; p && res; p=p->pLeft){ +          int nNear; +          Fts3Phrase *pPhrase; +          assert( p->pParent && p->pParent->pLeft==p ); +          nNear = p->pParent->nNear; +          pPhrase = ( +              p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase +              ); +          res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); +        }        } -    } -    sqlite3_free(aTmp); +      sqlite3_free(aTmp); +    }    }    return res; @@ -5191,7 +5197,7 @@ int sqlite3Fts3EvalPhraseStats(  ** of the current row.   **  ** More specifically, the returned buffer contains 1 varint for each  -** occurence of the phrase in the column, stored using the normal (delta+2)  +** occurrence of the phrase in the column, stored using the normal (delta+2)   ** compression and is terminated by either an 0x01 or 0x00 byte. For example,  ** if the requested column contains "a b X c d X X" and the position-list  ** for 'X' is requested, the buffer returned may contain: diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 77ca470..b19064c 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -524,7 +524,7 @@ void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);  /* fts3_expr.c */  int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, -  char **, int, int, int, const char *, int, Fts3Expr ** +  char **, int, int, int, const char *, int, Fts3Expr **, char **  );  void sqlite3Fts3ExprFree(Fts3Expr *);  #ifdef SQLITE_TEST @@ -549,6 +549,9 @@ int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);  int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);  int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); +/* fts3_tokenize_vtab.c */ +int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *); +  /* fts3_unicode2.c (functions generated by parsing unicode text files) */  #ifdef SQLITE_ENABLE_FTS4_UNICODE61  int sqlite3FtsUnicodeFold(int, int); diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c index a2bff2e..9b582fc 100644 --- a/ext/fts3/fts3_aux.c +++ b/ext/fts3/fts3_aux.c @@ -70,17 +70,26 @@ static int fts3auxConnectMethod(    UNUSED_PARAMETER(pUnused); -  /* The user should specify a single argument - the name of an fts3 table. */ -  if( argc!=4 ){ -    *pzErr = sqlite3_mprintf( -        "wrong number of arguments to fts4aux constructor" -    ); -    return SQLITE_ERROR; -  } +  /* The user should invoke this in one of two forms: +  ** +  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table); +  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table); +  */ +  if( argc!=4 && argc!=5 ) goto bad_args;    zDb = argv[1];     nDb = (int)strlen(zDb); -  zFts3 = argv[3]; +  if( argc==5 ){ +    if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){ +      zDb = argv[3];  +      nDb = (int)strlen(zDb); +      zFts3 = argv[4]; +    }else{ +      goto bad_args; +    } +  }else{ +    zFts3 = argv[3]; +  }    nFts3 = (int)strlen(zFts3);    rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); @@ -103,6 +112,10 @@ static int fts3auxConnectMethod(    *ppVtab = (sqlite3_vtab *)p;    return SQLITE_OK; + + bad_args: +  *pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor"); +  return SQLITE_ERROR;  }  /* diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index a6e3492..c046d7d 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -106,7 +106,7 @@ struct ParseContext {  ** This function is equivalent to the standard isspace() function.   **  ** The standard isspace() can be awkward to use safely, because although it -** is defined to accept an argument of type int, its behaviour when passed +** is defined to accept an argument of type int, its behavior when passed  ** an integer that falls outside of the range of the unsigned char type  ** is undefined (and sometimes, "undefined" means segfault). This wrapper  ** is defined to accept an argument of type char, and always returns 0 for @@ -185,7 +185,7 @@ static int getNextToken(    rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);    if( rc==SQLITE_OK ){      const char *zToken; -    int nToken, iStart, iEnd, iPosition; +    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;      int nByte;                               /* total space to allocate */      rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); @@ -300,7 +300,7 @@ static int getNextString(      int ii;      for(ii=0; rc==SQLITE_OK; ii++){        const char *zByte; -      int nByte, iBegin, iEnd, iPos; +      int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;        rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);        if( rc==SQLITE_OK ){          Fts3PhraseToken *pToken; @@ -640,8 +640,10 @@ static int fts3ExprParse(          }          pNot->eType = FTSQUERY_NOT;          pNot->pRight = p; +        p->pParent = pNot;          if( pNotBranch ){            pNot->pLeft = pNotBranch; +          pNotBranch->pParent = pNot;          }          pNotBranch = pNot;          p = pPrev; @@ -729,6 +731,7 @@ static int fts3ExprParse(            pIter = pIter->pLeft;          }          pIter->pLeft = pRet; +        pRet->pParent = pIter;          pRet = pNotBranch;        }      } @@ -746,30 +749,184 @@ exprparse_out:  }  /* -** Parameters z and n contain a pointer to and length of a buffer containing -** an fts3 query expression, respectively. This function attempts to parse the -** query expression and create a tree of Fts3Expr structures representing the -** parsed expression. If successful, *ppExpr is set to point to the head -** of the parsed expression tree and SQLITE_OK is returned. If an error -** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse -** error) is returned and *ppExpr is set to 0. +** Return SQLITE_ERROR if the maximum depth of the expression tree passed  +** as the only argument is more than nMaxDepth. +*/ +static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){ +  int rc = SQLITE_OK; +  if( p ){ +    if( nMaxDepth<0 ){  +      rc = SQLITE_TOOBIG; +    }else{ +      rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1); +      if( rc==SQLITE_OK ){ +        rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1); +      } +    } +  } +  return rc; +} + +/* +** This function attempts to transform the expression tree at (*pp) to +** an equivalent but more balanced form. The tree is modified in place. +** If successful, SQLITE_OK is returned and (*pp) set to point to the  +** new root expression node.   ** -** If parameter n is a negative number, then z is assumed to point to a -** nul-terminated string and the length is determined using strlen(). +** nMaxDepth is the maximum allowable depth of the balanced sub-tree.  ** -** The first parameter, pTokenizer, is passed the fts3 tokenizer module to -** use to normalize query tokens while parsing the expression. The azCol[] -** array, which is assumed to contain nCol entries, should contain the names -** of each column in the target fts3 table, in order from left to right.  -** Column names must be nul-terminated strings. +** Otherwise, if an error occurs, an SQLite error code is returned and  +** expression (*pp) freed. +*/ +static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){ +  int rc = SQLITE_OK;             /* Return code */ +  Fts3Expr *pRoot = *pp;          /* Initial root node */ +  Fts3Expr *pFree = 0;            /* List of free nodes. Linked by pParent. */ +  int eType = pRoot->eType;       /* Type of node in this tree */ + +  if( nMaxDepth==0 ){ +    rc = SQLITE_ERROR; +  } + +  if( rc==SQLITE_OK && (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){ +    Fts3Expr **apLeaf; +    apLeaf = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nMaxDepth); +    if( 0==apLeaf ){ +      rc = SQLITE_NOMEM; +    }else{ +      memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth); +    } + +    if( rc==SQLITE_OK ){ +      int i; +      Fts3Expr *p; + +      /* Set $p to point to the left-most leaf in the tree of eType nodes. */ +      for(p=pRoot; p->eType==eType; p=p->pLeft){ +        assert( p->pParent==0 || p->pParent->pLeft==p ); +        assert( p->pLeft && p->pRight ); +      } + +      /* This loop runs once for each leaf in the tree of eType nodes. */ +      while( 1 ){ +        int iLvl; +        Fts3Expr *pParent = p->pParent;     /* Current parent of p */ + +        assert( pParent==0 || pParent->pLeft==p ); +        p->pParent = 0; +        if( pParent ){ +          pParent->pLeft = 0; +        }else{ +          pRoot = 0; +        } +        rc = fts3ExprBalance(&p, nMaxDepth-1); +        if( rc!=SQLITE_OK ) break; + +        for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){ +          if( apLeaf[iLvl]==0 ){ +            apLeaf[iLvl] = p; +            p = 0; +          }else{ +            assert( pFree ); +            pFree->pLeft = apLeaf[iLvl]; +            pFree->pRight = p; +            pFree->pLeft->pParent = pFree; +            pFree->pRight->pParent = pFree; + +            p = pFree; +            pFree = pFree->pParent; +            p->pParent = 0; +            apLeaf[iLvl] = 0; +          } +        } +        if( p ){ +          sqlite3Fts3ExprFree(p); +          rc = SQLITE_TOOBIG; +          break; +        } + +        /* If that was the last leaf node, break out of the loop */ +        if( pParent==0 ) break; + +        /* Set $p to point to the next leaf in the tree of eType nodes */ +        for(p=pParent->pRight; p->eType==eType; p=p->pLeft); + +        /* Remove pParent from the original tree. */ +        assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent ); +        pParent->pRight->pParent = pParent->pParent; +        if( pParent->pParent ){ +          pParent->pParent->pLeft = pParent->pRight; +        }else{ +          assert( pParent==pRoot ); +          pRoot = pParent->pRight; +        } + +        /* Link pParent into the free node list. It will be used as an +        ** internal node of the new tree.  */ +        pParent->pParent = pFree; +        pFree = pParent; +      } + +      if( rc==SQLITE_OK ){ +        p = 0; +        for(i=0; i<nMaxDepth; i++){ +          if( apLeaf[i] ){ +            if( p==0 ){ +              p = apLeaf[i]; +              p->pParent = 0; +            }else{ +              assert( pFree!=0 ); +              pFree->pRight = p; +              pFree->pLeft = apLeaf[i]; +              pFree->pLeft->pParent = pFree; +              pFree->pRight->pParent = pFree; + +              p = pFree; +              pFree = pFree->pParent; +              p->pParent = 0; +            } +          } +        } +        pRoot = p; +      }else{ +        /* An error occurred. Delete the contents of the apLeaf[] array  +        ** and pFree list. Everything else is cleaned up by the call to +        ** sqlite3Fts3ExprFree(pRoot) below.  */ +        Fts3Expr *pDel; +        for(i=0; i<nMaxDepth; i++){ +          sqlite3Fts3ExprFree(apLeaf[i]); +        } +        while( (pDel=pFree)!=0 ){ +          pFree = pDel->pParent; +          sqlite3_free(pDel); +        } +      } + +      assert( pFree==0 ); +      sqlite3_free( apLeaf ); +    } +  } + +  if( rc!=SQLITE_OK ){ +    sqlite3Fts3ExprFree(pRoot); +    pRoot = 0; +  } +  *pp = pRoot; +  return rc; +} + +/* +** This function is similar to sqlite3Fts3ExprParse(), with the following +** differences:  ** -** The iDefaultCol parameter should be passed the index of the table column -** that appears on the left-hand-side of the MATCH operator (the default -** column to match against for tokens for which a column name is not explicitly -** specified as part of the query string), or -1 if tokens may by default -** match any table column. +**   1. It does not do expression rebalancing. +**   2. It does not check that the expression does not exceed the  +**      maximum allowable depth. +**   3. Even if it fails, *ppExpr may still be set to point to an  +**      expression tree. It should be deleted using sqlite3Fts3ExprFree() +**      in this case.  */ -int sqlite3Fts3ExprParse( +static int fts3ExprParseUnbalanced(    sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */    int iLangid,                        /* Language id for tokenizer */    char **azCol,                       /* Array of column names for fts3 table */ @@ -798,28 +955,116 @@ int sqlite3Fts3ExprParse(      n = (int)strlen(z);    }    rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); +  assert( rc==SQLITE_OK || *ppExpr==0 );    /* Check for mismatched parenthesis */    if( rc==SQLITE_OK && sParse.nNest ){      rc = SQLITE_ERROR; +  } +   +  return rc; +} + +/* +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right.  +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. +*/ +int sqlite3Fts3ExprParse( +  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */ +  int iLangid,                        /* Language id for tokenizer */ +  char **azCol,                       /* Array of column names for fts3 table */ +  int bFts4,                          /* True to allow FTS4-only syntax */ +  int nCol,                           /* Number of entries in azCol[] */ +  int iDefaultCol,                    /* Default column to query */ +  const char *z, int n,               /* Text of MATCH query */ +  Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */ +  char **pzErr                        /* OUT: Error message (sqlite3_malloc) */ +){ +  static const int MAX_EXPR_DEPTH = 12; +  int rc = fts3ExprParseUnbalanced( +      pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr +  ); +   +  /* Rebalance the expression. And check that its depth does not exceed +  ** MAX_EXPR_DEPTH.  */ +  if( rc==SQLITE_OK && *ppExpr ){ +    rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); +    if( rc==SQLITE_OK ){ +      rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); +    } +  } + +  if( rc!=SQLITE_OK ){      sqlite3Fts3ExprFree(*ppExpr);      *ppExpr = 0; +    if( rc==SQLITE_TOOBIG ){ +      *pzErr = sqlite3_mprintf( +          "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH +      ); +      rc = SQLITE_ERROR; +    }else if( rc==SQLITE_ERROR ){ +      *pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z); +    }    }    return rc;  }  /* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ +  assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); +  sqlite3Fts3EvalPhraseCleanup(p->pPhrase); +  sqlite3_free(p->aMI); +  sqlite3_free(p); +} + +/*  ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow.  */ -void sqlite3Fts3ExprFree(Fts3Expr *p){ -  if( p ){ -    assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); -    sqlite3Fts3ExprFree(p->pLeft); -    sqlite3Fts3ExprFree(p->pRight); -    sqlite3Fts3EvalPhraseCleanup(p->pPhrase); -    sqlite3_free(p->aMI); -    sqlite3_free(p); +void sqlite3Fts3ExprFree(Fts3Expr *pDel){ +  Fts3Expr *p; +  assert( pDel==0 || pDel->pParent==0 ); +  for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ +    assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); +  } +  while( p ){ +    Fts3Expr *pParent = p->pParent; +    fts3FreeExprNode(p); +    if( pParent && p==pParent->pLeft && pParent->pRight ){ +      p = pParent->pRight; +      while( p && (p->pLeft || p->pRight) ){ +        assert( p==p->pParent->pRight || p==p->pParent->pLeft ); +        p = (p->pLeft ? p->pLeft : p->pRight); +      } +    }else{ +      p = pParent; +    }    }  } @@ -871,6 +1116,9 @@ static int queryTestTokenizer(  ** the returned expression text and then freed using sqlite3_free().  */  static char *exprToString(Fts3Expr *pExpr, char *zBuf){ +  if( pExpr==0 ){ +    return sqlite3_mprintf(""); +  }    switch( pExpr->eType ){      case FTSQUERY_PHRASE: {        Fts3Phrase *pPhrase = pExpr->pPhrase; @@ -978,10 +1226,21 @@ static void fts3ExprTest(      azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);    } -  rc = sqlite3Fts3ExprParse( -      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr -  ); +  if( sqlite3_user_data(context) ){ +    char *zDummy = 0; +    rc = sqlite3Fts3ExprParse( +        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy +    ); +    assert( rc==SQLITE_OK || pExpr==0 ); +    sqlite3_free(zDummy); +  }else{ +    rc = fts3ExprParseUnbalanced( +        pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr +    ); +  } +    if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ +    sqlite3Fts3ExprFree(pExpr);      sqlite3_result_error(context, "Error parsing expression", -1);    }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){      sqlite3_result_error_nomem(context); @@ -1004,9 +1263,15 @@ exprtest_out:  ** with database connection db.   */  int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ -  return sqlite3_create_function( +  int rc = sqlite3_create_function(        db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0    ); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",  +        -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 +    ); +  } +  return rc;  }  #endif diff --git a/ext/fts3/fts3_hash.h b/ext/fts3/fts3_hash.h index 399f515..dc3fcf8 100644 --- a/ext/fts3/fts3_hash.h +++ b/ext/fts3/fts3_hash.h @@ -9,7 +9,7 @@  **    May you share freely, never taking more than you give.  **  ************************************************************************* -** This is the header file for the generic hash-table implemenation +** This is the header file for the generic hash-table implementation  ** used in SQLite.  We've modified it slightly to serve as a standalone  ** hash table implementation for the full-text indexing module.  ** diff --git a/ext/fts3/fts3_icu.c b/ext/fts3/fts3_icu.c index 18b7948..52df8c7 100644 --- a/ext/fts3/fts3_icu.c +++ b/ext/fts3/fts3_icu.c @@ -119,7 +119,7 @@ static int icuOpen(    nChar = nInput+1;    pCsr = (IcuCursor *)sqlite3_malloc(        sizeof(IcuCursor) +                /* IcuCursor */ -      nChar * sizeof(UChar) +            /* IcuCursor.aChar[] */ +      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */        (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */    );    if( !pCsr ){ @@ -127,7 +127,7 @@ static int icuOpen(    }    memset(pCsr, 0, sizeof(IcuCursor));    pCsr->aChar = (UChar *)&pCsr[1]; -  pCsr->aOffset = (int *)&pCsr->aChar[nChar]; +  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];    pCsr->aOffset[iOut] = iInput;    U8_NEXT(zInput, iInput, nInput, c);  diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 6fce3d0..d54a787 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -389,9 +389,9 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){  ** is the snippet with the highest score, where scores are calculated  ** by adding:  ** -**   (a) +1 point for each occurence of a matchable phrase in the snippet. +**   (a) +1 point for each occurrence of a matchable phrase in the snippet.  ** -**   (b) +1000 points for the first occurence of each matchable phrase in  +**   (b) +1000 points for the first occurrence of each matchable phrase in   **       the snippet for which the corresponding mCovered bit is not set.  **  ** The selected snippet parameters are stored in structure *pFragment before @@ -576,7 +576,7 @@ static int fts3SnippetShift(          return rc;        }        while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ -        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; +        const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;          rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);        }        pMod->xClose(pC); @@ -620,8 +620,6 @@ static int fts3SnippetText(    int iCol = pFragment->iCol+1;   /* Query column to extract text from */    sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */    sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */ -  const char *ZDUMMY;             /* Dummy argument used with tokenizer */ -  int DUMMY1;                     /* Dummy argument used with tokenizer */    zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);    if( zDoc==0 ){ @@ -640,10 +638,23 @@ static int fts3SnippetText(    }    while( rc==SQLITE_OK ){ -    int iBegin;                   /* Offset in zDoc of start of token */ -    int iFin;                     /* Offset in zDoc of end of token */ -    int isHighlight;              /* True for highlighted terms */ - +    const char *ZDUMMY;           /* Dummy argument used with tokenizer */ +    int DUMMY1 = -1;              /* Dummy argument used with tokenizer */ +    int iBegin = 0;               /* Offset in zDoc of start of token */ +    int iFin = 0;                 /* Offset in zDoc of end of token */ +    int isHighlight = 0;          /* True for highlighted terms */ + +    /* Variable DUMMY1 is initialized to a negative value above. Elsewhere +    ** in the FTS code the variable that the third argument to xNext points to +    ** is initialized to zero before the first (*but not necessarily +    ** subsequent*) call to xNext(). This is done for a particular application +    ** that needs to know whether or not the tokenizer is being used for +    ** snippet generation or for some other purpose. +    ** +    ** Extreme care is required when writing code to depend on this +    ** initialization. It is not a documented part of the tokenizer interface. +    ** If a tokenizer is used directly by any code outside of FTS, this +    ** convention might not be respected.  */      rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);      if( rc!=SQLITE_OK ){        if( rc==SQLITE_DONE ){ @@ -1333,8 +1344,6 @@ void sqlite3Fts3Offsets(  ){    Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;    sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; -  const char *ZDUMMY;             /* Dummy argument used with xNext() */ -  int NDUMMY;                     /* Dummy argument used with xNext() */    int rc;                         /* Return Code */    int nToken;                     /* Number of tokens in query */    int iCol;                       /* Column currently being processed */ @@ -1367,9 +1376,11 @@ void sqlite3Fts3Offsets(    */    for(iCol=0; iCol<pTab->nColumn; iCol++){      sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ -    int iStart; -    int iEnd; -    int iCurrent; +    const char *ZDUMMY;           /* Dummy argument used with xNext() */ +    int NDUMMY = 0;               /* Dummy argument used with xNext() */ +    int iStart = 0; +    int iEnd = 0; +    int iCurrent = 0;      const char *zDoc;      int nDoc; diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c index 4da0b8f..75ec6bd 100644 --- a/ext/fts3/fts3_test.c +++ b/ext/fts3/fts3_test.c @@ -267,7 +267,7 @@ static int fts3_near_match_cmd(  **  ** Whether or not the arguments are present, this command returns a list of  ** two integers - the initial chunksize and threshold when the command is -** invoked. This can be used to restore the default behaviour after running +** invoked. This can be used to restore the default behavior after running  ** tests. For example:  **  **    # Override incr-load settings for testing: diff --git a/ext/fts3/fts3_tokenize_vtab.c b/ext/fts3/fts3_tokenize_vtab.c new file mode 100644 index 0000000..364852e --- /dev/null +++ b/ext/fts3/fts3_tokenize_vtab.c @@ -0,0 +1,454 @@ +/* +** 2013 Apr 22 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +**   CREATE VIRTUAL TABLE <tbl> USING fts3tokenize( +**       <tokenizer-name>, <arg-1>, ... +**   ); +** +** The table created has the following schema: +** +**   CREATE TABLE <tbl>(input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +**   input = <string> +** +** The virtual table module tokenizes this <string>, using the FTS3  +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE  +** statement and returns one row for each token in the result. With +** fields set as follows: +** +**   input:   Always set to a copy of <string> +**   token:   A token from the input. +**   start:   Byte offset of the token within the input <string>. +**   end:     Byte offset of the byte immediately following the end of the +**            token within the input string. +**   pos:     Token offset of token within input. +** +*/ +#include "fts3Int.h" +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +#include <string.h> +#include <assert.h> + +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; + +/* +** Virtual table structure. +*/ +struct Fts3tokTable { +  sqlite3_vtab base;              /* Base class used by SQLite core */ +  const sqlite3_tokenizer_module *pMod; +  sqlite3_tokenizer *pTok; +}; + +/* +** Virtual table cursor structure. +*/ +struct Fts3tokCursor { +  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */ +  char *zInput;                   /* Input string */ +  sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ +  int iRowid;                     /* Current 'rowid' value */ +  const char *zToken;             /* Current 'token' value */ +  int nToken;                     /* Size of zToken in bytes */ +  int iStart;                     /* Current 'start' value */ +  int iEnd;                       /* Current 'end' value */ +  int iPos;                       /* Current 'pos' value */ +}; + +/* +** Query FTS for the tokenizer implementation named zName. +*/ +static int fts3tokQueryTokenizer( +  Fts3Hash *pHash, +  const char *zName, +  const sqlite3_tokenizer_module **pp, +  char **pzErr +){ +  sqlite3_tokenizer_module *p; +  int nName = (int)strlen(zName); + +  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); +  if( !p ){ +    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName); +    return SQLITE_ERROR; +  } + +  *pp = p; +  return SQLITE_OK; +} + +/* +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a  +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. +** +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. +*/ +static int fts3tokDequoteArray( +  int argc,                       /* Number of elements in argv[] */ +  const char * const *argv,       /* Input array */ +  char ***pazDequote              /* Output array */ +){ +  int rc = SQLITE_OK;             /* Return code */ +  if( argc==0 ){ +    *pazDequote = 0; +  }else{ +    int i; +    int nByte = 0; +    char **azDequote; + +    for(i=0; i<argc; i++){ +      nByte += (int)(strlen(argv[i]) + 1); +    } + +    *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); +    if( azDequote==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      char *pSpace = (char *)&azDequote[argc]; +      for(i=0; i<argc; i++){ +        int n = (int)strlen(argv[i]); +        azDequote[i] = pSpace; +        memcpy(pSpace, argv[i], n+1); +        sqlite3Fts3Dequote(pSpace); +        pSpace += (n+1); +      } +    } +  } + +  return rc; +} + +/* +** Schema of the tokenizer table. +*/ +#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" + +/* +** This function does all the work for both the xConnect and xCreate methods. +** These tables have no persistent representation of their own, so xConnect +** and xCreate are identical operations. +** +**   argv[0]: module name +**   argv[1]: database name  +**   argv[2]: table name +**   argv[3]: first argument (tokenizer name) +*/ +static int fts3tokConnectMethod( +  sqlite3 *db,                    /* Database connection */ +  void *pHash,                    /* Hash table of tokenizers */ +  int argc,                       /* Number of elements in argv array */ +  const char * const *argv,       /* xCreate/xConnect argument array */ +  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */ +  char **pzErr                    /* OUT: sqlite3_malloc'd error message */ +){ +  Fts3tokTable *pTab; +  const sqlite3_tokenizer_module *pMod = 0; +  sqlite3_tokenizer *pTok = 0; +  int rc; +  char **azDequote = 0; +  int nDequote; + +  rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); +  if( rc!=SQLITE_OK ) return rc; + +  nDequote = argc-3; +  rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); + +  if( rc==SQLITE_OK ){ +    const char *zModule; +    if( nDequote<1 ){ +      zModule = "simple"; +    }else{ +      zModule = azDequote[0]; +    } +    rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); +  } + +  assert( (rc==SQLITE_OK)==(pMod!=0) ); +  if( rc==SQLITE_OK ){ +    const char * const *azArg = (const char * const *)&azDequote[1]; +    rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); +  } + +  if( rc==SQLITE_OK ){ +    pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); +    if( pTab==0 ){ +      rc = SQLITE_NOMEM; +    } +  } + +  if( rc==SQLITE_OK ){ +    memset(pTab, 0, sizeof(Fts3tokTable)); +    pTab->pMod = pMod; +    pTab->pTok = pTok; +    *ppVtab = &pTab->base; +  }else{ +    if( pTok ){ +      pMod->xDestroy(pTok); +    } +  } + +  sqlite3_free(azDequote); +  return rc; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ +  Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + +  pTab->pMod->xDestroy(pTab->pTok); +  sqlite3_free(pTab); +  return SQLITE_OK; +} + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3tokBestIndexMethod( +  sqlite3_vtab *pVTab,  +  sqlite3_index_info *pInfo +){ +  int i; +  UNUSED_PARAMETER(pVTab); + +  for(i=0; i<pInfo->nConstraint; i++){ +    if( pInfo->aConstraint[i].usable  +     && pInfo->aConstraint[i].iColumn==0  +     && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ  +    ){ +      pInfo->idxNum = 1; +      pInfo->aConstraintUsage[i].argvIndex = 1; +      pInfo->aConstraintUsage[i].omit = 1; +      pInfo->estimatedCost = 1; +      return SQLITE_OK; +    } +  } + +  pInfo->idxNum = 0; +  assert( pInfo->estimatedCost>1000000.0 ); + +  return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ +  Fts3tokCursor *pCsr; +  UNUSED_PARAMETER(pVTab); + +  pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); +  if( pCsr==0 ){ +    return SQLITE_NOMEM; +  } +  memset(pCsr, 0, sizeof(Fts3tokCursor)); + +  *ppCsr = (sqlite3_vtab_cursor *)pCsr; +  return SQLITE_OK; +} + +/* +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). +*/ +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ +  if( pCsr->pCsr ){ +    Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); +    pTab->pMod->xClose(pCsr->pCsr); +    pCsr->pCsr = 0; +  } +  sqlite3_free(pCsr->zInput); +  pCsr->zInput = 0; +  pCsr->zToken = 0; +  pCsr->nToken = 0; +  pCsr->iStart = 0; +  pCsr->iEnd = 0; +  pCsr->iPos = 0; +  pCsr->iRowid = 0; +} + +/* +** xClose - Close a cursor. +*/ +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + +  fts3tokResetCursor(pCsr); +  sqlite3_free(pCsr); +  return SQLITE_OK; +} + +/* +** xNext - Advance the cursor to the next row, if any. +*/ +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); +  int rc;                         /* Return code */ + +  pCsr->iRowid++; +  rc = pTab->pMod->xNext(pCsr->pCsr, +      &pCsr->zToken, &pCsr->nToken, +      &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos +  ); + +  if( rc!=SQLITE_OK ){ +    fts3tokResetCursor(pCsr); +    if( rc==SQLITE_DONE ) rc = SQLITE_OK; +  } + +  return rc; +} + +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3tokFilterMethod( +  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */ +  int idxNum,                     /* Strategy index */ +  const char *idxStr,             /* Unused */ +  int nVal,                       /* Number of elements in apVal */ +  sqlite3_value **apVal           /* Arguments for the indexing scheme */ +){ +  int rc = SQLITE_ERROR; +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); +  UNUSED_PARAMETER(idxStr); +  UNUSED_PARAMETER(nVal); + +  fts3tokResetCursor(pCsr); +  if( idxNum==1 ){ +    const char *zByte = (const char *)sqlite3_value_text(apVal[0]); +    int nByte = sqlite3_value_bytes(apVal[0]); +    pCsr->zInput = sqlite3_malloc(nByte+1); +    if( pCsr->zInput==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      memcpy(pCsr->zInput, zByte, nByte); +      pCsr->zInput[nByte] = 0; +      rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); +      if( rc==SQLITE_OK ){ +        pCsr->pCsr->pTokenizer = pTab->pTok; +      } +    } +  } + +  if( rc!=SQLITE_OK ) return rc; +  return fts3tokNextMethod(pCursor); +} + +/* +** xEof - Return true if the cursor is at EOF, or false otherwise. +*/ +static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +  return (pCsr->zToken==0); +} + +/* +** xColumn - Return a column value. +*/ +static int fts3tokColumnMethod( +  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */ +  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */ +  int iCol                        /* Index of column to read value from */ +){ +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + +  /* CREATE TABLE x(input, token, start, end, position) */ +  switch( iCol ){ +    case 0: +      sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); +      break; +    case 1: +      sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); +      break; +    case 2: +      sqlite3_result_int(pCtx, pCsr->iStart); +      break; +    case 3: +      sqlite3_result_int(pCtx, pCsr->iEnd); +      break; +    default: +      assert( iCol==4 ); +      sqlite3_result_int(pCtx, pCsr->iPos); +      break; +  } +  return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( +  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */ +  sqlite_int64 *pRowid            /* OUT: Rowid value */ +){ +  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +  *pRowid = (sqlite3_int64)pCsr->iRowid; +  return SQLITE_OK; +} + +/* +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ +  static const sqlite3_module fts3tok_module = { +     0,                           /* iVersion      */ +     fts3tokConnectMethod,        /* xCreate       */ +     fts3tokConnectMethod,        /* xConnect      */ +     fts3tokBestIndexMethod,      /* xBestIndex    */ +     fts3tokDisconnectMethod,     /* xDisconnect   */ +     fts3tokDisconnectMethod,     /* xDestroy      */ +     fts3tokOpenMethod,           /* xOpen         */ +     fts3tokCloseMethod,          /* xClose        */ +     fts3tokFilterMethod,         /* xFilter       */ +     fts3tokNextMethod,           /* xNext         */ +     fts3tokEofMethod,            /* xEof          */ +     fts3tokColumnMethod,         /* xColumn       */ +     fts3tokRowidMethod,          /* xRowid        */ +     0,                           /* xUpdate       */ +     0,                           /* xBegin        */ +     0,                           /* xSync         */ +     0,                           /* xCommit       */ +     0,                           /* xRollback     */ +     0,                           /* xFindFunction */ +     0,                           /* xRename       */ +     0,                           /* xSavepoint    */ +     0,                           /* xRelease      */ +     0                            /* xRollbackTo   */ +  }; +  int rc;                         /* Return code */ + +  rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); +  return rc; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ diff --git a/ext/fts3/fts3_tokenizer.c b/ext/fts3/fts3_tokenizer.c index 4a7a175..04f8446 100644 --- a/ext/fts3/fts3_tokenizer.c +++ b/ext/fts3/fts3_tokenizer.c @@ -251,10 +251,10 @@ static void testFunc(    const char *azArg[64];    const char *zToken; -  int nToken; -  int iStart; -  int iEnd; -  int iPos; +  int nToken = 0; +  int iStart = 0; +  int iEnd = 0; +  int iPos = 0;    int i;    Tcl_Obj *pRet; @@ -428,7 +428,7 @@ static void intTestFunc(  /*  ** Set up SQL objects in database db used to access the contents of  ** the hash table pointed to by argument pHash. The hash table must -** been initialised to use string keys, and to take a private copy  +** been initialized to use string keys, and to take a private copy   ** of the key when a value is inserted. i.e. by a call similar to:  **  **    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); diff --git a/ext/fts3/fts3_tokenizer.h b/ext/fts3/fts3_tokenizer.h index c91c7ed..4a40b2b 100644 --- a/ext/fts3/fts3_tokenizer.h +++ b/ext/fts3/fts3_tokenizer.h @@ -70,7 +70,7 @@ struct sqlite3_tokenizer_module {    ** This method should return either SQLITE_OK (0), or an SQLite error     ** code. If SQLITE_OK is returned, then *ppTokenizer should be set    ** to point at the newly created tokenizer structure. The generic -  ** sqlite3_tokenizer.pModule variable should not be initialised by +  ** sqlite3_tokenizer.pModule variable should not be initialized by    ** this callback. The caller will do so.    */    int (*xCreate)( diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index 79941ed..188358e 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -125,7 +125,7 @@ static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){  **  ** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()  ** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. -** It is not possible to change the behaviour of the tokenizer with respect +** It is not possible to change the behavior of the tokenizer with respect  ** to these codepoints.  */  static int unicodeAddExceptions( diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index c9f1743..269d1dd 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -776,16 +776,16 @@ static int fts3PendingTermsAdd(    int iLangid,                    /* Language id to use */    const char *zText,              /* Text of document to be inserted */    int iCol,                       /* Column into which text is being inserted */ -  u32 *pnWord                     /* OUT: Number of tokens inserted */ +  u32 *pnWord                     /* IN/OUT: Incr. by number tokens inserted */  ){    int rc; -  int iStart; -  int iEnd; -  int iPos; +  int iStart = 0; +  int iEnd = 0; +  int iPos = 0;    int nWord = 0;    char const *zToken; -  int nToken; +  int nToken = 0;    sqlite3_tokenizer *pTokenizer = p->pTokenizer;    sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; @@ -840,7 +840,7 @@ static int fts3PendingTermsAdd(    }    pModule->xClose(pCsr); -  *pnWord = nWord; +  *pnWord += nWord;    return (rc==SQLITE_DONE ? SQLITE_OK : rc);  } @@ -1044,11 +1044,13 @@ static void fts3DeleteTerms(    int *pRC,               /* Result code */    Fts3Table *p,           /* The FTS table to delete from */    sqlite3_value *pRowid,  /* The docid to be deleted */ -  u32 *aSz                /* Sizes of deleted document written here */ +  u32 *aSz,               /* Sizes of deleted document written here */ +  int *pbFound            /* OUT: Set to true if row really does exist */  ){    int rc;    sqlite3_stmt *pSelect; +  assert( *pbFound==0 );    if( *pRC ) return;    rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);    if( rc==SQLITE_OK ){ @@ -1066,6 +1068,7 @@ static void fts3DeleteTerms(          *pRC = rc;          return;        } +      *pbFound = 1;      }      rc = sqlite3_reset(pSelect);    }else{ @@ -1479,6 +1482,7 @@ static int fts3SegReaderNextDocid(        *pnOffsetList = (int)(p - pReader->pOffsetList - 1);      } +    /* List may have been edited in place by fts3EvalNearTrim() */      while( p<pEnd && *p==0 ) p++;      /* If there are no more entries in the doclist, set pOffsetList to @@ -2494,9 +2498,13 @@ static int fts3DeleteSegdir(  **  ** If there are no entries in the input position list for column iCol, then  ** *pnList is set to zero before returning. +** +** If parameter bZero is non-zero, then any part of the input list following +** the end of the output list is zeroed before returning.  */  static void fts3ColumnFilter(    int iCol,                       /* Column to filter on */ +  int bZero,                      /* Zero out anything following *ppList */    char **ppList,                  /* IN/OUT: Pointer to position list */    int *pnList                     /* IN/OUT: Size of buffer *ppList in bytes */  ){ @@ -2525,6 +2533,9 @@ static void fts3ColumnFilter(      p += sqlite3Fts3GetVarint32(p, &iCurrent);    } +  if( bZero && &pList[nList]!=pEnd ){ +    memset(&pList[nList], 0, pEnd - &pList[nList]); +  }    *ppList = pList;    *pnList = nList;  } @@ -2598,19 +2609,19 @@ int sqlite3Fts3MsrIncrNext(        if( rc!=SQLITE_OK ) return rc;        fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); +      if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ +        rc = fts3MsrBufferData(pMsr, pList, nList+1); +        if( rc!=SQLITE_OK ) return rc; +        assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); +        pList = pMsr->aBuffer; +      } +        if( pMsr->iColFilter>=0 ){ -        fts3ColumnFilter(pMsr->iColFilter, &pList, &nList); +        fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList);        }        if( nList>0 ){ -        if( fts3SegReaderIsPending(apSegment[0]) ){ -          rc = fts3MsrBufferData(pMsr, pList, nList+1); -          if( rc!=SQLITE_OK ) return rc; -          *paPoslist = pMsr->aBuffer; -          assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); -        }else{ -          *paPoslist = pList; -        } +        *paPoslist = pList;          *piDocid = iDocid;          *pnPoslist = nList;          break; @@ -2853,7 +2864,7 @@ int sqlite3Fts3SegReaderStep(          }          if( isColFilter ){ -          fts3ColumnFilter(pFilter->iCol, &pList, &nList); +          fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList);          }          if( !isIgnoreEmpty || nList>0 ){ @@ -3290,7 +3301,7 @@ static int fts3DoRebuild(Fts3Table *p){        int iCol;        int iLangid = langidFromSelect(p, pStmt);        rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); -      aSz[p->nColumn] = 0; +      memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));        for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){          const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);          rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); @@ -4934,9 +4945,9 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){          rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);          while( rc==SQLITE_OK ){            char const *zToken;       /* Buffer containing token */ -          int nToken;               /* Number of bytes in token */ -          int iDum1, iDum2;         /* Dummy variables */ -          int iPos;                 /* Position of token in zText */ +          int nToken = 0;           /* Number of bytes in token */ +          int iDum1 = 0, iDum2 = 0; /* Dummy variables */ +          int iPos = 0;             /* Position of token in zText */            rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);            if( rc==SQLITE_OK ){ @@ -5103,9 +5114,9 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){        rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);        while( rc==SQLITE_OK ){          char const *zToken;       /* Buffer containing token */ -        int nToken;               /* Number of bytes in token */ -        int iDum1, iDum2;         /* Dummy variables */ -        int iPos;                 /* Position of token in zText */ +        int nToken = 0;           /* Number of bytes in token */ +        int iDum1 = 0, iDum2 = 0; /* Dummy variables */ +        int iPos = 0;             /* Position of token in zText */          rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);          for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ @@ -5194,28 +5205,32 @@ int sqlite3Fts3DeferToken(  static int fts3DeleteByRowid(    Fts3Table *p,     sqlite3_value *pRowid,  -  int *pnDoc, +  int *pnChng,                    /* IN/OUT: Decrement if row is deleted */    u32 *aSzDel  ){ -  int isEmpty = 0; -  int rc = fts3IsEmpty(p, pRowid, &isEmpty); -  if( rc==SQLITE_OK ){ -    if( isEmpty ){ -      /* Deleting this row means the whole table is empty. In this case -      ** delete the contents of all three tables and throw away any -      ** data in the pendingTerms hash table.  */ -      rc = fts3DeleteAll(p, 1); -      *pnDoc = *pnDoc - 1; -    }else{ -      fts3DeleteTerms(&rc, p, pRowid, aSzDel); -      if( p->zContentTbl==0 ){ -        fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); -        if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1; +  int rc = SQLITE_OK;             /* Return code */ +  int bFound = 0;                 /* True if *pRowid really is in the table */ + +  fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); +  if( bFound && rc==SQLITE_OK ){ +    int isEmpty = 0;              /* Deleting *pRowid leaves the table empty */ +    rc = fts3IsEmpty(p, pRowid, &isEmpty); +    if( rc==SQLITE_OK ){ +      if( isEmpty ){ +        /* Deleting this row means the whole table is empty. In this case +        ** delete the contents of all three tables and throw away any +        ** data in the pendingTerms hash table.  */ +        rc = fts3DeleteAll(p, 1); +        *pnChng = 0; +        memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);        }else{ -        *pnDoc = *pnDoc - 1; -      } -      if( p->bHasDocsize ){ -        fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); +        *pnChng = *pnChng - 1; +        if( p->zContentTbl==0 ){ +          fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); +        } +        if( p->bHasDocsize ){ +          fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); +        }        }      }    } @@ -5246,7 +5261,7 @@ int sqlite3Fts3UpdateMethod(    int rc = SQLITE_OK;             /* Return Code */    int isRemove = 0;               /* True for an UPDATE or DELETE */    u32 *aSzIns = 0;                /* Sizes of inserted documents */ -  u32 *aSzDel;                    /* Sizes of deleted documents */ +  u32 *aSzDel = 0;                /* Sizes of deleted documents */    int nChng = 0;                  /* Net change in number of documents */    int bInsertDone = 0; @@ -5274,13 +5289,13 @@ int sqlite3Fts3UpdateMethod(    }    /* Allocate space to hold the change in document sizes */ -  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 ); -  if( aSzIns==0 ){ +  aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); +  if( aSzDel==0 ){      rc = SQLITE_NOMEM;      goto update_out;    } -  aSzDel = &aSzIns[p->nColumn+1]; -  memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2); +  aSzIns = &aSzDel[p->nColumn+1]; +  memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);    /* If this is an INSERT operation, or an UPDATE that modifies the rowid    ** value, then this operation requires constraint handling. @@ -5365,7 +5380,7 @@ int sqlite3Fts3UpdateMethod(    }   update_out: -  sqlite3_free(aSzIns); +  sqlite3_free(aSzDel);    sqlite3Fts3SegmentsClose(p);    return rc;  } diff --git a/ext/icu/README.txt b/ext/icu/README.txt index c5cadb5..d744f74 100644 --- a/ext/icu/README.txt +++ b/ext/icu/README.txt @@ -98,7 +98,7 @@ SQLite. Documentation follows.          <string> REGEXP <re-pattern>      This extension uses the ICU defaults for regular expression matching -    behaviour. Specifically, this means that: +    behavior. Specifically, this means that:          * Matching is case-sensitive,          * Regular expression comments are not allowed within patterns, and diff --git a/ext/misc/amatch.c b/ext/misc/amatch.c new file mode 100644 index 0000000..b613080 --- /dev/null +++ b/ext/misc/amatch.c @@ -0,0 +1,1483 @@ +/* +** 2013-03-14 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code for a demonstration virtual table that finds +** "approximate matches" - strings from a finite set that are nearly the +** same as a single input string.  The virtual table is called "amatch". +** +** A amatch virtual table is created like this: +** +**     CREATE VIRTUAL TABLE f USING approximate_match( +**        vocabulary_table=<tablename>,      -- V +**        vocabulary_word=<columnname>,      -- W +**        vocabulary_language=<columnname>,  -- L +**        edit_distances=<edit-cost-table> +**     ); +** +** When it is created, the new amatch table must be supplied with the +** the name of a table V and columns V.W and V.L such that  +** +**     SELECT W FROM V WHERE L=$language +** +** returns the allowed vocabulary for the match.  If the "vocabulary_language" +** or L columnname is left unspecified or is an empty string, then no +** filtering of the vocabulary by language is performed.  +** +** For efficiency, it is essential that the vocabulary table be indexed: +** +**     CREATE vocab_index ON V(W) +** +** A separate edit-cost-table provides scoring information that defines  +** what it means for one string to be "close" to another. +** +** The edit-cost-table must contain exactly four columns (more precisely, +** the statement "SELECT * FROM <edit-cost-table>" must return records +** that consist of four columns). It does not matter what the columns are +** named.  +** +** Each row in the edit-cost-table represents a single character +** transformation going from user input to the vocabulary. The leftmost  +** column of the row (column 0) contains an integer identifier of the +** language to which the transformation rule belongs (see "MULTIPLE LANGUAGES" +** below). The second column of the row (column 1) contains the input +** character or characters - the characters of user input. The third  +** column contains characters as they appear in the vocabulary table. +** And the fourth column contains the integer cost of making the +** transformation. For example: +** +**    CREATE TABLE f_data(iLang, cFrom, cTo, Cost); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '', 'a', 100); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'b', '', 87); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40); +** +** The first row inserted into the edit-cost-table by the SQL script +** above indicates that the cost of having an extra 'a' in the vocabulary +** table that is missing in the user input 100.  (All costs are integers. +** Overall cost must not exceed 16777216.)  The second INSERT statement  +** creates a rule saying that the cost of having a single letter 'b' in +** user input which is missing in the vocabulary table is 87.  The third +** INSERT statement mean that the cost of matching an 'o' in user input  +** against an 'oe' in the vocabulary table is 38.  And so forth. +** +** The following rules are special: +** +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '?', '', 97); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '', '?', 98); +**    INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '?', '?', 99); +** +** The '?' to '' rule is the cost of having any single character in the input +** that is not found in the vocabular.  The '' to '?' rule is the cost of +** having a character in the vocabulary table that is missing from input. +** And the '?' to '?' rule is the cost of doing an arbitrary character +** substitution.  These three generic rules apply across all languages. +** In other words, the iLang field is ignored for the generic substitution +** rules.  If more than one cost is given for a generic substitution rule, +** then the lowest cost is used. +** +** Once it has been created, the amatch virtual table can be queried +** as follows: +** +**    SELECT word, distance FROM f +**     WHERE word MATCH 'abcdefg' +**       AND distance<200; +** +** This query outputs the strings contained in the T(F) field that +** are close to "abcdefg" and in order of increasing distance.  No string +** is output more than once.  If there are multiple ways to transform the +** target string ("abcdefg") into a string in the vocabulary table then +** the lowest cost transform is the one that is returned.  In this example, +** the search is limited to strings with a total distance of less than 200. +** +** For efficiency, it is important to put tight bounds on the distance. +** The time and memory space needed to perform this query is exponential +** in the maximum distance.  A good rule of thumb is to limit the distance +** to no more than 1.5 or 2 times the maximum cost of any rule in the +** edit-cost-table. +** +** The amatch is a read-only table.  Any attempt to DELETE, INSERT, or +** UPDATE on a amatch table will throw an error. +** +** It is important to put some kind of a limit on the amatch output.  This +** can be either in the form of a LIMIT clause at the end of the query, +** or better, a "distance<NNN" constraint where NNN is some number.  The +** running time and memory requirement is exponential in the value of NNN  +** so you want to make sure that NNN is not too big.  A value of NNN that +** is about twice the average transformation cost seems to give good results. +** +** The amatch table can be useful for tasks such as spelling correction. +** Suppose all allowed words are in table vocabulary(w).  Then one would create +** an amatch virtual table like this: +** +**   CREATE VIRTUAL TABLE ex1 USING amatch( +**       vocabtable=vocabulary, +**       vocabcolumn=w, +**       edit_distances=ec1 +**   ); +** +** Then given an input word $word, look up close spellings this way: +** +**   SELECT word, distance FROM ex1 +**    WHERE word MATCH $word AND distance<200; +** +** MULTIPLE LANGUAGES +** +** Normally, the "iLang" value associated with all character transformations +** in the edit-cost-table is zero. However, if required, the amatch  +** virtual table allows multiple languages to be defined. Each query uses  +** only a single iLang value.   This allows, for example, a single  +** amatch table to support multiple languages. +** +** By default, only the rules with iLang=0 are used. To specify an  +** alternative language, a "language = ?" expression must be added to the +** WHERE clause of a SELECT, where ? is the integer identifier of the desired  +** language. For example: +** +**   SELECT word, distance FROM ex1 +**    WHERE word MATCH $word +**      AND distance<=200 +**      AND language=1 -- Specify use language 1 instead of 0 +** +** If no "language = ?" constraint is specified in the WHERE clause, language +** 0 is used. +** +** LIMITS +** +** The maximum language number is 2147483647.  The maximum length of either +** of the strings in the second or third column of the amatch data table +** is 50 bytes.  The maximum cost on a rule is 1000. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> +#include <ctype.h> + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct amatch_vtab amatch_vtab; +typedef struct amatch_cursor amatch_cursor; +typedef struct amatch_rule amatch_rule; +typedef struct amatch_word amatch_word; +typedef struct amatch_avl amatch_avl; + + +/***************************************************************************** +** AVL Tree implementation +*/ +/* +** Objects that want to be members of the AVL tree should embedded an +** instance of this structure. +*/ +struct amatch_avl { +  amatch_word *pWord;   /* Points to the object being stored in the tree */ +  char *zKey;           /* Key.  zero-terminated string.  Must be unique */ +  amatch_avl *pBefore;  /* Other elements less than zKey */ +  amatch_avl *pAfter;   /* Other elements greater than zKey */ +  amatch_avl *pUp;      /* Parent element */ +  short int height;     /* Height of this node.  Leaf==1 */ +  short int imbalance;  /* Height difference between pBefore and pAfter */ +}; + +/* Recompute the amatch_avl.height and amatch_avl.imbalance fields for p. +** Assume that the children of p have correct heights. +*/ +static void amatchAvlRecomputeHeight(amatch_avl *p){ +  short int hBefore = p->pBefore ? p->pBefore->height : 0; +  short int hAfter = p->pAfter ? p->pAfter->height : 0; +  p->imbalance = hBefore - hAfter;  /* -: pAfter higher.  +: pBefore higher */ +  p->height = (hBefore>hAfter ? hBefore : hAfter)+1; +} + +/* +**     P                B +**    / \              / \ +**   B   Z    ==>     X   P +**  / \                  / \ +** X   Y                Y   Z +** +*/ +static amatch_avl *amatchAvlRotateBefore(amatch_avl *pP){ +  amatch_avl *pB = pP->pBefore; +  amatch_avl *pY = pB->pAfter; +  pB->pUp = pP->pUp; +  pB->pAfter = pP; +  pP->pUp = pB; +  pP->pBefore = pY; +  if( pY ) pY->pUp = pP; +  amatchAvlRecomputeHeight(pP); +  amatchAvlRecomputeHeight(pB); +  return pB; +} + +/* +**     P                A +**    / \              / \ +**   X   A    ==>     P   Z +**      / \          / \ +**     Y   Z        X   Y +** +*/ +static amatch_avl *amatchAvlRotateAfter(amatch_avl *pP){ +  amatch_avl *pA = pP->pAfter; +  amatch_avl *pY = pA->pBefore; +  pA->pUp = pP->pUp; +  pA->pBefore = pP; +  pP->pUp = pA; +  pP->pAfter = pY; +  if( pY ) pY->pUp = pP; +  amatchAvlRecomputeHeight(pP); +  amatchAvlRecomputeHeight(pA); +  return pA; +} + +/* +** Return a pointer to the pBefore or pAfter pointer in the parent +** of p that points to p.  Or if p is the root node, return pp. +*/ +static amatch_avl **amatchAvlFromPtr(amatch_avl *p, amatch_avl **pp){ +  amatch_avl *pUp = p->pUp; +  if( pUp==0 ) return pp; +  if( pUp->pAfter==p ) return &pUp->pAfter; +  return &pUp->pBefore; +} + +/* +** Rebalance all nodes starting with p and working up to the root. +** Return the new root. +*/ +static amatch_avl *amatchAvlBalance(amatch_avl *p){ +  amatch_avl *pTop = p; +  amatch_avl **pp; +  while( p ){ +    amatchAvlRecomputeHeight(p); +    if( p->imbalance>=2 ){ +      amatch_avl *pB = p->pBefore; +      if( pB->imbalance<0 ) p->pBefore = amatchAvlRotateAfter(pB); +      pp = amatchAvlFromPtr(p,&p); +      p = *pp = amatchAvlRotateBefore(p); +    }else if( p->imbalance<=(-2) ){ +      amatch_avl *pA = p->pAfter; +      if( pA->imbalance>0 ) p->pAfter = amatchAvlRotateBefore(pA); +      pp = amatchAvlFromPtr(p,&p); +      p = *pp = amatchAvlRotateAfter(p); +    } +    pTop = p; +    p = p->pUp; +  } +  return pTop; +} + +/* Search the tree rooted at p for an entry with zKey.  Return a pointer +** to the entry or return NULL. +*/ +static amatch_avl *amatchAvlSearch(amatch_avl *p, const char *zKey){ +  int c; +  while( p && (c = strcmp(zKey, p->zKey))!=0 ){ +    p = (c<0) ? p->pBefore : p->pAfter; +  } +  return p; +} + +/* Find the first node (the one with the smallest key). +*/ +static amatch_avl *amatchAvlFirst(amatch_avl *p){ +  if( p ) while( p->pBefore ) p = p->pBefore; +  return p; +} + +#if 0 /* NOT USED */ +/* Return the node with the next larger key after p. +*/ +static amatch_avl *amatchAvlNext(amatch_avl *p){ +  amatch_avl *pPrev = 0; +  while( p && p->pAfter==pPrev ){ +    pPrev = p; +    p = p->pUp; +  } +  if( p && pPrev==0 ){ +    p = amatchAvlFirst(p->pAfter); +  } +  return p; +} +#endif + +#if 0 /* NOT USED */ +/* Verify AVL tree integrity +*/ +static int amatchAvlIntegrity(amatch_avl *pHead){ +  amatch_avl *p; +  if( pHead==0 ) return 1; +  if( (p = pHead->pBefore)!=0 ){ +    assert( p->pUp==pHead ); +    assert( amatchAvlIntegrity(p) ); +    assert( strcmp(p->zKey, pHead->zKey)<0 ); +    while( p->pAfter ) p = p->pAfter; +    assert( strcmp(p->zKey, pHead->zKey)<0 ); +  } +  if( (p = pHead->pAfter)!=0 ){ +    assert( p->pUp==pHead ); +    assert( amatchAvlIntegrity(p) ); +    assert( strcmp(p->zKey, pHead->zKey)>0 ); +    p = amatchAvlFirst(p); +    assert( strcmp(p->zKey, pHead->zKey)>0 ); +  } +  return 1; +} +static int amatchAvlIntegrity2(amatch_avl *pHead){ +  amatch_avl *p, *pNext; +  for(p=amatchAvlFirst(pHead); p; p=pNext){ +    pNext = amatchAvlNext(p); +    if( pNext==0 ) break; +    assert( strcmp(p->zKey, pNext->zKey)<0 ); +  } +  return 1; +} +#endif + +/* Insert a new node pNew.  Return NULL on success.  If the key is not +** unique, then do not perform the insert but instead leave pNew unchanged +** and return a pointer to an existing node with the same key. +*/ +static amatch_avl *amatchAvlInsert(amatch_avl **ppHead, amatch_avl *pNew){ +  int c; +  amatch_avl *p = *ppHead; +  if( p==0 ){ +    p = pNew; +    pNew->pUp = 0; +  }else{ +    while( p ){ +      c = strcmp(pNew->zKey, p->zKey); +      if( c<0 ){ +        if( p->pBefore ){ +          p = p->pBefore; +        }else{ +          p->pBefore = pNew; +          pNew->pUp = p; +          break; +        } +      }else if( c>0 ){ +        if( p->pAfter ){ +          p = p->pAfter; +        }else{ +          p->pAfter = pNew; +          pNew->pUp = p; +          break; +        } +      }else{ +        return p; +      } +    } +  } +  pNew->pBefore = 0; +  pNew->pAfter = 0; +  pNew->height = 1; +  pNew->imbalance = 0; +  *ppHead = amatchAvlBalance(p); +  /* assert( amatchAvlIntegrity(*ppHead) ); */ +  /* assert( amatchAvlIntegrity2(*ppHead) ); */ +  return 0; +} + +/* Remove node pOld from the tree.  pOld must be an element of the tree or +** the AVL tree will become corrupt. +*/ +static void amatchAvlRemove(amatch_avl **ppHead, amatch_avl *pOld){ +  amatch_avl **ppParent; +  amatch_avl *pBalance; +  /* assert( amatchAvlSearch(*ppHead, pOld->zKey)==pOld ); */ +  ppParent = amatchAvlFromPtr(pOld, ppHead); +  if( pOld->pBefore==0 && pOld->pAfter==0 ){ +    *ppParent = 0; +    pBalance = pOld->pUp; +  }else if( pOld->pBefore && pOld->pAfter ){ +    amatch_avl *pX, *pY; +    pX = amatchAvlFirst(pOld->pAfter); +    *amatchAvlFromPtr(pX, 0) = pX->pAfter; +    if( pX->pAfter ) pX->pAfter->pUp = pX->pUp; +    pBalance = pX->pUp; +    pX->pAfter = pOld->pAfter; +    if( pX->pAfter ){ +      pX->pAfter->pUp = pX; +    }else{ +      assert( pBalance==pOld ); +      pBalance = pX; +    } +    pX->pBefore = pY = pOld->pBefore; +    if( pY ) pY->pUp = pX; +    pX->pUp = pOld->pUp; +    *ppParent = pX; +  }else if( pOld->pBefore==0 ){ +    *ppParent = pBalance = pOld->pAfter; +    pBalance->pUp = pOld->pUp; +  }else if( pOld->pAfter==0 ){ +    *ppParent = pBalance = pOld->pBefore; +    pBalance->pUp = pOld->pUp; +  } +  *ppHead = amatchAvlBalance(pBalance); +  pOld->pUp = 0; +  pOld->pBefore = 0; +  pOld->pAfter = 0; +  /* assert( amatchAvlIntegrity(*ppHead) ); */ +  /* assert( amatchAvlIntegrity2(*ppHead) ); */ +} +/* +** End of the AVL Tree implementation +******************************************************************************/ + + +/* +** Various types. +** +** amatch_cost is the "cost" of an edit operation. +** +** amatch_len is the length of a matching string.   +** +** amatch_langid is an ruleset identifier. +*/ +typedef int amatch_cost; +typedef signed char amatch_len; +typedef int amatch_langid; + +/* +** Limits +*/ +#define AMATCH_MX_LENGTH          50  /* Maximum length of a rule string */ +#define AMATCH_MX_LANGID  2147483647  /* Maximum rule ID */ +#define AMATCH_MX_COST          1000  /* Maximum single-rule cost */ + +/* +** A match or partial match +*/ +struct amatch_word { +  amatch_word *pNext;   /* Next on a list of all amatch_words */ +  amatch_avl sCost;     /* Linkage of this node into the cost tree */ +  amatch_avl sWord;     /* Linkage of this node into the word tree */ +  amatch_cost rCost;    /* Cost of the match so far */ +  int iSeq;             /* Sequence number */ +  char zCost[10];       /* Cost key (text rendering of rCost) */ +  short int nMatch;     /* Input characters matched */ +  char zWord[4];        /* Text of the word.  Extra space appended as needed */ +}; + +/* +** Each transformation rule is stored as an instance of this object. +** All rules are kept on a linked list sorted by rCost. +*/ +struct amatch_rule { +  amatch_rule *pNext;      /* Next rule in order of increasing rCost */ +  char *zFrom;             /* Transform from (a string from user input) */ +  amatch_cost rCost;       /* Cost of this transformation */ +  amatch_langid iLang;     /* The langauge to which this rule belongs */ +  amatch_len nFrom, nTo;   /* Length of the zFrom and zTo strings */ +  char zTo[4];             /* Tranform to V.W value (extra space appended) */ +}; + +/*  +** A amatch virtual-table object  +*/ +struct amatch_vtab { +  sqlite3_vtab base;         /* Base class - must be first */ +  char *zClassName;          /* Name of this class.  Default: "amatch" */ +  char *zDb;                 /* Name of database.  (ex: "main") */ +  char *zSelf;               /* Name of this virtual table */ +  char *zCostTab;            /* Name of edit-cost-table */ +  char *zVocabTab;           /* Name of vocabulary table */ +  char *zVocabWord;          /* Name of vocabulary table word column */ +  char *zVocabLang;          /* Name of vocabulary table language column */ +  amatch_rule *pRule;        /* All active rules in this amatch */ +  amatch_cost rIns;          /* Generic insertion cost  '' -> ? */ +  amatch_cost rDel;          /* Generic deletion cost  ? -> '' */ +  amatch_cost rSub;          /* Generic substitution cost ? -> ? */ +  sqlite3 *db;               /* The database connection */ +  sqlite3_stmt *pVCheck;     /* Query to check zVocabTab */ +  int nCursor;               /* Number of active cursors */ +}; + +/* A amatch cursor object */ +struct amatch_cursor { +  sqlite3_vtab_cursor base;  /* Base class - must be first */ +  sqlite3_int64 iRowid;      /* The rowid of the current word */ +  amatch_langid iLang;       /* Use this language ID */ +  amatch_cost rLimit;        /* Maximum cost of any term */ +  int nBuf;                  /* Space allocated for zBuf */ +  int oomErr;                /* True following an OOM error */ +  int nWord;                 /* Number of amatch_word objects */ +  char *zBuf;                /* Temp-use buffer space */ +  char *zInput;              /* Input word to match against */ +  amatch_vtab *pVtab;        /* The virtual table this cursor belongs to */ +  amatch_word *pAllWords;    /* List of all amatch_word objects */ +  amatch_word *pCurrent;     /* Most recent solution */ +  amatch_avl *pCost;         /* amatch_word objects keyed by iCost */ +  amatch_avl *pWord;         /* amatch_word objects keyed by zWord */ +}; + +/* +** The two input rule lists are both sorted in order of increasing +** cost.  Merge them together into a single list, sorted by cost, and +** return a pointer to the head of that list. +*/ +static amatch_rule *amatchMergeRules(amatch_rule *pA, amatch_rule *pB){ +  amatch_rule head; +  amatch_rule *pTail; + +  pTail =  &head; +  while( pA && pB ){ +    if( pA->rCost<=pB->rCost ){ +      pTail->pNext = pA; +      pTail = pA; +      pA = pA->pNext; +    }else{ +      pTail->pNext = pB; +      pTail = pB; +      pB = pB->pNext; +    } +  } +  if( pA==0 ){ +    pTail->pNext = pB; +  }else{ +    pTail->pNext = pA; +  } +  return head.pNext; +} + +/* +** Statement pStmt currently points to a row in the amatch data table. This +** function allocates and populates a amatch_rule structure according to +** the content of the row. +** +** If successful, *ppRule is set to point to the new object and SQLITE_OK +** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point +** to an error message and an SQLite error code returned. +*/ +static int amatchLoadOneRule( +  amatch_vtab *p,                 /* Fuzzer virtual table handle */ +  sqlite3_stmt *pStmt,            /* Base rule on statements current row */ +  amatch_rule **ppRule,           /* OUT: New rule object */ +  char **pzErr                    /* OUT: Error message */ +){ +  sqlite3_int64 iLang = sqlite3_column_int64(pStmt, 0); +  const char *zFrom = (const char *)sqlite3_column_text(pStmt, 1); +  const char *zTo = (const char *)sqlite3_column_text(pStmt, 2); +  amatch_cost rCost = sqlite3_column_int(pStmt, 3); + +  int rc = SQLITE_OK;             /* Return code */ +  int nFrom;                      /* Size of string zFrom, in bytes */ +  int nTo;                        /* Size of string zTo, in bytes */ +  amatch_rule *pRule = 0;         /* New rule object to return */ + +  if( zFrom==0 ) zFrom = ""; +  if( zTo==0 ) zTo = ""; +  nFrom = (int)strlen(zFrom); +  nTo = (int)strlen(zTo); + +  /* Silently ignore null transformations */ +  if( strcmp(zFrom, zTo)==0 ){ +    if( zFrom[0]=='?' && zFrom[1]==0 ){ +      if( p->rSub==0 || p->rSub>rCost ) p->rSub = rCost; +    } +    *ppRule = 0; +    return SQLITE_OK; +  } + +  if( rCost<=0 || rCost>AMATCH_MX_COST ){ +    *pzErr = sqlite3_mprintf("%s: cost must be between 1 and %d",  +        p->zClassName, AMATCH_MX_COST +    ); +    rc = SQLITE_ERROR; +  }else +  if( nFrom>AMATCH_MX_LENGTH || nTo>AMATCH_MX_LENGTH ){ +    *pzErr = sqlite3_mprintf("%s: maximum string length is %d",  +        p->zClassName, AMATCH_MX_LENGTH +    ); +    rc = SQLITE_ERROR;     +  }else +  if( iLang<0 || iLang>AMATCH_MX_LANGID ){ +    *pzErr = sqlite3_mprintf("%s: iLang must be between 0 and %d",  +        p->zClassName, AMATCH_MX_LANGID +    ); +    rc = SQLITE_ERROR;     +  }else +  if( strcmp(zFrom,"")==0 && strcmp(zTo,"?")==0 ){ +    if( p->rIns==0 || p->rIns>rCost ) p->rIns = rCost; +  }else +  if( strcmp(zFrom,"?")==0 && strcmp(zTo,"")==0 ){ +    if( p->rDel==0 || p->rDel>rCost ) p->rDel = rCost; +  }else +  { +    pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); +    if( pRule==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      memset(pRule, 0, sizeof(*pRule)); +      pRule->zFrom = &pRule->zTo[nTo+1]; +      pRule->nFrom = nFrom; +      memcpy(pRule->zFrom, zFrom, nFrom+1); +      memcpy(pRule->zTo, zTo, nTo+1); +      pRule->nTo = nTo; +      pRule->rCost = rCost; +      pRule->iLang = (int)iLang; +    } +  } + +  *ppRule = pRule; +  return rc; +} + +/* +** Free all the content in the edit-cost-table +*/ +static void amatchFreeRules(amatch_vtab *p){ +  while( p->pRule ){ +    amatch_rule *pRule = p->pRule; +    p->pRule = pRule->pNext; +    sqlite3_free(pRule); +  } +  p->pRule = 0; +} + +/* +** Load the content of the amatch data table into memory. +*/ +static int amatchLoadRules( +  sqlite3 *db,                    /* Database handle */ +  amatch_vtab *p,                 /* Virtual amatch table to configure */ +  char **pzErr                    /* OUT: Error message */ +){ +  int rc = SQLITE_OK;             /* Return code */ +  char *zSql;                     /* SELECT used to read from rules table */ +  amatch_rule *pHead = 0; + +  zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", p->zDb, p->zCostTab); +  if( zSql==0 ){ +    rc = SQLITE_NOMEM; +  }else{ +    int rc2;                      /* finalize() return code */ +    sqlite3_stmt *pStmt = 0; +    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); +    if( rc!=SQLITE_OK ){ +      *pzErr = sqlite3_mprintf("%s: %s", p->zClassName, sqlite3_errmsg(db)); +    }else if( sqlite3_column_count(pStmt)!=4 ){ +      *pzErr = sqlite3_mprintf("%s: %s has %d columns, expected 4", +          p->zClassName, p->zCostTab, sqlite3_column_count(pStmt) +      ); +      rc = SQLITE_ERROR; +    }else{ +      while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ +        amatch_rule *pRule = 0; +        rc = amatchLoadOneRule(p, pStmt, &pRule, pzErr); +        if( pRule ){ +          pRule->pNext = pHead; +          pHead = pRule; +        } +      } +    } +    rc2 = sqlite3_finalize(pStmt); +    if( rc==SQLITE_OK ) rc = rc2; +  } +  sqlite3_free(zSql); + +  /* All rules are now in a singly linked list starting at pHead. This +  ** block sorts them by cost and then sets amatch_vtab.pRule to point to  +  ** point to the head of the sorted list. +  */ +  if( rc==SQLITE_OK ){ +    unsigned int i; +    amatch_rule *pX; +    amatch_rule *a[15]; +    for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0; +    while( (pX = pHead)!=0 ){ +      pHead = pX->pNext; +      pX->pNext = 0; +      for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){ +        pX = amatchMergeRules(a[i], pX); +        a[i] = 0; +      } +      a[i] = amatchMergeRules(a[i], pX); +    } +    for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){ +      pX = amatchMergeRules(a[i], pX); +    } +    p->pRule = amatchMergeRules(p->pRule, pX); +  }else{ +    /* An error has occurred. Setting p->pRule to point to the head of the +    ** allocated list ensures that the list will be cleaned up in this case. +    */ +    assert( p->pRule==0 ); +    p->pRule = pHead; +  } + +  return rc; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc()  +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +**     "abc"   becomes   abc +**     'xyz'   becomes   xyz +**     [pqr]   becomes   pqr +**     `mno`   becomes   mno +*/ +static char *amatchDequote(const char *zIn){ +  int nIn;                        /* Size of input string, in bytes */ +  char *zOut;                     /* Output (dequoted) string */ + +  nIn = (int)strlen(zIn); +  zOut = sqlite3_malloc(nIn+1); +  if( zOut ){ +    char q = zIn[0];              /* Quote character (if any ) */ + +    if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ +      memcpy(zOut, zIn, nIn+1); +    }else{ +      int iOut = 0;               /* Index of next byte to write to output */ +      int iIn;                    /* Index of next byte to read from input */ + +      if( q=='[' ) q = ']'; +      for(iIn=1; iIn<nIn; iIn++){ +        if( zIn[iIn]==q ) iIn++; +        zOut[iOut++] = zIn[iIn]; +      } +    } +    assert( (int)strlen(zOut)<=nIn ); +  } +  return zOut; +} + +/* +** Deallocate the pVCheck prepared statement. +*/ +static void amatchVCheckClear(amatch_vtab *p){ +  if( p->pVCheck ){ +    sqlite3_finalize(p->pVCheck); +    p->pVCheck = 0; +  } +} + +/* +** Deallocate an amatch_vtab object +*/ +static void amatchFree(amatch_vtab *p){ +  if( p ){ +    amatchFreeRules(p); +    amatchVCheckClear(p); +    sqlite3_free(p->zClassName); +    sqlite3_free(p->zDb); +    sqlite3_free(p->zCostTab); +    sqlite3_free(p->zVocabTab); +    sqlite3_free(p->zVocabWord); +    sqlite3_free(p->zVocabLang); +    memset(p, 0, sizeof(*p)); +    sqlite3_free(p); +  } +} + +/* +** xDisconnect/xDestroy method for the amatch module. +*/ +static int amatchDisconnect(sqlite3_vtab *pVtab){ +  amatch_vtab *p = (amatch_vtab*)pVtab; +  assert( p->nCursor==0 ); +  amatchFree(p); +  return SQLITE_OK; +} + +/* +** Check to see if the argument is of the form: +** +**       KEY = VALUE +** +** If it is, return a pointer to the first character of VALUE. +** If not, return NULL.  Spaces around the = are ignored. +*/ +static const char *amatchValueOfKey(const char *zKey, const char *zStr){ +  int nKey = (int)strlen(zKey); +  int nStr = (int)strlen(zStr); +  int i; +  if( nStr<nKey+1 ) return 0; +  if( memcmp(zStr, zKey, nKey)!=0 ) return 0; +  for(i=nKey; isspace(zStr[i]); i++){} +  if( zStr[i]!='=' ) return 0; +  i++; +  while( isspace(zStr[i]) ){ i++; } +  return zStr+i; +} + +/* +** xConnect/xCreate method for the amatch module. Arguments are: +** +**   argv[0]    -> module name  ("approximate_match") +**   argv[1]    -> database name +**   argv[2]    -> table name +**   argv[3...] -> arguments +*/ +static int amatchConnect( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVtab, +  char **pzErr +){ +  int rc = SQLITE_OK;             /* Return code */ +  amatch_vtab *pNew = 0;          /* New virtual table */ +  const char *zModule = argv[0]; +  const char *zDb = argv[1]; +  const char *zVal; +  int i; + +  (void)pAux; +  *ppVtab = 0; +  pNew = sqlite3_malloc( sizeof(*pNew) ); +  if( pNew==0 ) return SQLITE_NOMEM; +  rc = SQLITE_NOMEM; +  memset(pNew, 0, sizeof(*pNew)); +  pNew->db = db; +  pNew->zClassName = sqlite3_mprintf("%s", zModule); +  if( pNew->zClassName==0 ) goto amatchConnectError; +  pNew->zDb = sqlite3_mprintf("%s", zDb); +  if( pNew->zDb==0 ) goto amatchConnectError; +  pNew->zSelf = sqlite3_mprintf("%s", argv[2]); +  if( pNew->zSelf==0 ) goto amatchConnectError; +  for(i=3; i<argc; i++){ +    zVal = amatchValueOfKey("vocabulary_table", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zVocabTab); +      pNew->zVocabTab = amatchDequote(zVal); +      if( pNew->zVocabTab==0 ) goto amatchConnectError; +      continue; +    } +    zVal = amatchValueOfKey("vocabulary_word", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zVocabWord); +      pNew->zVocabWord = amatchDequote(zVal); +      if( pNew->zVocabWord==0 ) goto amatchConnectError; +      continue; +    } +    zVal = amatchValueOfKey("vocabulary_language", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zVocabLang); +      pNew->zVocabLang = amatchDequote(zVal); +      if( pNew->zVocabLang==0 ) goto amatchConnectError; +      continue; +    } +    zVal = amatchValueOfKey("edit_distances", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zCostTab); +      pNew->zCostTab = amatchDequote(zVal); +      if( pNew->zCostTab==0 ) goto amatchConnectError; +      continue; +    } +    *pzErr = sqlite3_mprintf("unrecognized argument: [%s]\n", argv[i]); +    amatchFree(pNew); +    *ppVtab = 0; +    return SQLITE_ERROR; +  } +  rc = SQLITE_OK; +  if( pNew->zCostTab==0 ){ +    *pzErr = sqlite3_mprintf("no edit_distances table specified"); +    rc = SQLITE_ERROR; +  }else{ +    rc = amatchLoadRules(db, pNew, pzErr); +  } +  if( rc==SQLITE_OK ){ +    rc = sqlite3_declare_vtab(db, +           "CREATE TABLE x(word,distance,language," +           "command HIDDEN,nword HIDDEN)" +         ); +#define AMATCH_COL_WORD       0 +#define AMATCH_COL_DISTANCE   1 +#define AMATCH_COL_LANGUAGE   2 +#define AMATCH_COL_COMMAND    3 +#define AMATCH_COL_NWORD      4 +  } +  if( rc!=SQLITE_OK ){ +    amatchFree(pNew); +  } +  *ppVtab = &pNew->base; +  return rc; + +amatchConnectError: +  amatchFree(pNew); +  return rc; +} + +/* +** Open a new amatch cursor. +*/ +static int amatchOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ +  amatch_vtab *p = (amatch_vtab*)pVTab; +  amatch_cursor *pCur; +  pCur = sqlite3_malloc( sizeof(*pCur) ); +  if( pCur==0 ) return SQLITE_NOMEM; +  memset(pCur, 0, sizeof(*pCur)); +  pCur->pVtab = p; +  *ppCursor = &pCur->base; +  p->nCursor++; +  return SQLITE_OK; +} + +/* +** Free up all the memory allocated by a cursor.  Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void amatchClearCursor(amatch_cursor *pCur){ +  amatch_word *pWord, *pNextWord; +  for(pWord=pCur->pAllWords; pWord; pWord=pNextWord){ +    pNextWord = pWord->pNext; +    sqlite3_free(pWord); +  } +  pCur->pAllWords = 0; +  sqlite3_free(pCur->zInput); +  pCur->zInput = 0; +  pCur->pCost = 0; +  pCur->pWord = 0; +  pCur->pCurrent = 0; +  pCur->rLimit = 1000000; +  pCur->iLang = 0; +  pCur->nWord = 0; +} + +/* +** Close a amatch cursor. +*/ +static int amatchClose(sqlite3_vtab_cursor *cur){ +  amatch_cursor *pCur = (amatch_cursor *)cur; +  amatchClearCursor(pCur); +  pCur->pVtab->nCursor--; +  sqlite3_free(pCur); +  return SQLITE_OK; +} + +/* +** Render a 24-bit unsigned integer as a 4-byte base-64 number. +*/ +static void amatchEncodeInt(int x, char *z){ +  static const char a[] =  +    "0123456789" +    "ABCDEFGHIJ" +    "KLMNOPQRST" +    "UVWXYZ^abc" +    "defghijklm" +    "nopqrstuvw" +    "xyz~"; +  z[0] = a[(x>>18)&0x3f]; +  z[1] = a[(x>>12)&0x3f]; +  z[2] = a[(x>>6)&0x3f]; +  z[3] = a[x&0x3f]; +} + +/* +** Write the zCost[] field for a amatch_word object +*/ +static void amatchWriteCost(amatch_word *pWord){ +  amatchEncodeInt(pWord->rCost, pWord->zCost); +  amatchEncodeInt(pWord->iSeq, pWord->zCost+4); +  pWord->zCost[8] = 0; +} + +/* +** Add a new amatch_word object to the queue. +** +** If a prior amatch_word object with the same zWord, and nMatch +** already exists, update its rCost (if the new rCost is less) but +** otherwise leave it unchanged.  Do not add a duplicate. +** +** Do nothing if the cost exceeds threshold. +*/ +static void amatchAddWord( +  amatch_cursor *pCur, +  amatch_cost rCost, +  int nMatch, +  const char *zWordBase, +  const char *zWordTail +){ +  amatch_word *pWord; +  amatch_avl *pNode; +  amatch_avl *pOther; +  int nBase, nTail; +  char zBuf[4]; +   +  if( rCost>pCur->rLimit ){ +    return; +  } +  nBase = (int)strlen(zWordBase); +  nTail = (int)strlen(zWordTail); +  if( nBase+nTail+3>pCur->nBuf ){ +    pCur->nBuf = nBase+nTail+100; +    pCur->zBuf = sqlite3_realloc(pCur->zBuf, pCur->nBuf); +    if( pCur->zBuf==0 ){ +      pCur->nBuf = 0; +      return; +    } +  } +  amatchEncodeInt(nMatch, zBuf); +  memcpy(pCur->zBuf, zBuf+2, 2); +  memcpy(pCur->zBuf+2, zWordBase, nBase); +  memcpy(pCur->zBuf+2+nBase, zWordTail, nTail+1); +  pNode = amatchAvlSearch(pCur->pWord, pCur->zBuf); +  if( pNode ){ +    pWord = pNode->pWord; +    if( pWord->rCost>rCost ){ +#ifdef AMATCH_TRACE_1 +      printf("UPDATE [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", +             pWord->zWord+2, pWord->nMatch, pCur->zInput, pCur->zInput, +             pWord->rCost, pWord->zWord, pWord->zCost); +#endif +      amatchAvlRemove(&pCur->pCost, &pWord->sCost); +      pWord->rCost = rCost; +      amatchWriteCost(pWord); +#ifdef AMATCH_TRACE_1 +      printf("  ---> %d (\"%s\" \"%s\")\n", +             pWord->rCost, pWord->zWord, pWord->zCost); +#endif +      pOther = amatchAvlInsert(&pCur->pCost, &pWord->sCost); +      assert( pOther==0 ); (void)pOther; +    } +    return; +  } +  pWord = sqlite3_malloc( sizeof(*pWord) + nBase + nTail - 1 ); +  if( pWord==0 ) return; +  memset(pWord, 0, sizeof(*pWord)); +  pWord->rCost = rCost; +  pWord->iSeq = pCur->nWord++; +  amatchWriteCost(pWord); +  pWord->nMatch = nMatch; +  pWord->pNext = pCur->pAllWords; +  pCur->pAllWords = pWord; +  pWord->sCost.zKey = pWord->zCost; +  pWord->sCost.pWord = pWord; +  pOther = amatchAvlInsert(&pCur->pCost, &pWord->sCost); +  assert( pOther==0 ); (void)pOther; +  pWord->sWord.zKey = pWord->zWord; +  pWord->sWord.pWord = pWord; +  strcpy(pWord->zWord, pCur->zBuf); +  pOther = amatchAvlInsert(&pCur->pWord, &pWord->sWord); +  assert( pOther==0 ); (void)pOther; +#ifdef AMATCH_TRACE_1 +  printf("INSERT [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", pWord->zWord+2, +       pWord->nMatch, pCur->zInput, pCur->zInput+pWord->nMatch, rCost, +       pWord->zWord, pWord->zCost); +#endif +} + +/* +** Advance a cursor to its next row of output +*/ +static int amatchNext(sqlite3_vtab_cursor *cur){ +  amatch_cursor *pCur = (amatch_cursor*)cur; +  amatch_word *pWord = 0; +  amatch_avl *pNode; +  int isMatch = 0; +  amatch_vtab *p = pCur->pVtab; +  int nWord; +  int rc; +  int i; +  const char *zW; +  amatch_rule *pRule; +  char *zBuf = 0; +  char nBuf = 0; +  char zNext[8]; +  char zNextIn[8]; +  int nNextIn; + +  if( p->pVCheck==0 ){ +    char *zSql; +    if( p->zVocabLang && p->zVocabLang[0] ){ +      zSql = sqlite3_mprintf( +          "SELECT \"%s\" FROM \"%s\"", +          " WHERE \"%w\">=?1 AND \"%w\"=?2" +          " ORDER BY 1", +          p->zVocabWord, p->zVocabTab, +          p->zVocabWord, p->zVocabLang +      ); +    }else{ +      zSql = sqlite3_mprintf( +          "SELECT \"%s\" FROM \"%s\"" +          " WHERE \"%w\">=?1" +          " ORDER BY 1", +          p->zVocabWord, p->zVocabTab, +          p->zVocabWord +      ); +    } +    rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->pVCheck, 0); +    sqlite3_free(zSql); +    if( rc ) return rc; +  } +  sqlite3_bind_int(p->pVCheck, 2, pCur->iLang); + +  do{ +    pNode = amatchAvlFirst(pCur->pCost); +    if( pNode==0 ){ +      pWord = 0; +      break; +    } +    pWord = pNode->pWord; +    amatchAvlRemove(&pCur->pCost, &pWord->sCost); + +#ifdef AMATCH_TRACE_1 +    printf("PROCESS [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", +       pWord->zWord+2, pWord->nMatch, pCur->zInput, pCur->zInput+pWord->nMatch, +       pWord->rCost, pWord->zWord, pWord->zCost); +#endif +    nWord = (int)strlen(pWord->zWord+2); +    if( nWord+20>nBuf ){ +      nBuf = nWord+100; +      zBuf = sqlite3_realloc(zBuf, nBuf); +      if( zBuf==0 ) return SQLITE_NOMEM; +    } +    strcpy(zBuf, pWord->zWord+2); +    zNext[0] = 0; +    zNextIn[0] = pCur->zInput[pWord->nMatch]; +    if( zNextIn[0] ){ +      for(i=1; i<=4 && (pCur->zInput[pWord->nMatch+i]&0xc0)==0x80; i++){ +        zNextIn[i] = pCur->zInput[pWord->nMatch+i]; +      } +      zNextIn[i] = 0; +      nNextIn = i; +    }else{ +      nNextIn = 0; +    } + +    if( zNextIn[0] && zNextIn[0]!='*' ){ +      sqlite3_reset(p->pVCheck); +      strcat(zBuf, zNextIn); +      sqlite3_bind_text(p->pVCheck, 1, zBuf, nWord+nNextIn, SQLITE_STATIC); +      rc = sqlite3_step(p->pVCheck); +      if( rc==SQLITE_ROW ){ +        zW = (const char*)sqlite3_column_text(p->pVCheck, 0); +        if( strncmp(zBuf, zW, nWord+nNextIn)==0 ){ +          amatchAddWord(pCur, pWord->rCost, pWord->nMatch+nNextIn, zBuf, ""); +        } +      } +      zBuf[nWord] = 0; +    } + +    while( 1 ){ +      strcpy(zBuf+nWord, zNext); +      sqlite3_reset(p->pVCheck); +      sqlite3_bind_text(p->pVCheck, 1, zBuf, -1, SQLITE_TRANSIENT); +      rc = sqlite3_step(p->pVCheck); +      if( rc!=SQLITE_ROW ) break; +      zW = (const char*)sqlite3_column_text(p->pVCheck, 0); +      strcpy(zBuf+nWord, zNext); +      if( strncmp(zW, zBuf, nWord)!=0 ) break; +      if( (zNextIn[0]=='*' && zNextIn[1]==0) +       || (zNextIn[0]==0 && zW[nWord]==0) +      ){ +        isMatch = 1; +        zNextIn[0] = 0; +        nNextIn = 0; +        break; +      } +      zNext[0] = zW[nWord]; +      for(i=1; i<=4 && (zW[nWord+i]&0xc0)==0x80; i++){ +        zNext[i] = zW[nWord+i]; +      } +      zNext[i] = 0; +      zBuf[nWord] = 0; +      if( p->rIns>0 ){ +        amatchAddWord(pCur, pWord->rCost+p->rIns, pWord->nMatch,  +                      zBuf, zNext); +      } +      if( p->rSub>0 ){ +        amatchAddWord(pCur, pWord->rCost+p->rSub, pWord->nMatch+nNextIn,  +                      zBuf, zNext); +      } +      if( p->rIns<0 && p->rSub<0 ) break; +      zNext[i-1]++;  /* FIX ME */ +    } +    sqlite3_reset(p->pVCheck); + +    if( p->rDel>0 ){ +      zBuf[nWord] = 0; +      amatchAddWord(pCur, pWord->rCost+p->rDel, pWord->nMatch+nNextIn, +                    zBuf, ""); +    } + +    for(pRule=p->pRule; pRule; pRule=pRule->pNext){ +      if( pRule->iLang!=pCur->iLang ) continue; +      if( strncmp(pRule->zFrom, pCur->zInput+pWord->nMatch, pRule->nFrom)==0 ){ +        amatchAddWord(pCur, pWord->rCost+pRule->rCost, +                      pWord->nMatch+pRule->nFrom, pWord->zWord+2, pRule->zTo); +      } +    } +  }while( !isMatch ); +  pCur->pCurrent = pWord; +  sqlite3_free(zBuf); +  return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again.  Always called at least once +** prior to any amatchColumn, amatchRowid, or amatchEof call. +*/ +static int amatchFilter( +  sqlite3_vtab_cursor *pVtabCursor,  +  int idxNum, const char *idxStr, +  int argc, sqlite3_value **argv +){ +  amatch_cursor *pCur = (amatch_cursor *)pVtabCursor; +  const char *zWord = "*"; +  int idx; + +  amatchClearCursor(pCur); +  idx = 0; +  if( idxNum & 1 ){ +    zWord = (const char*)sqlite3_value_text(argv[0]); +    idx++; +  } +  if( idxNum & 2 ){ +    pCur->rLimit = (amatch_cost)sqlite3_value_int(argv[idx]); +    idx++; +  } +  if( idxNum & 4 ){ +    pCur->iLang = (amatch_cost)sqlite3_value_int(argv[idx]); +    idx++; +  } +  pCur->zInput = sqlite3_mprintf("%s", zWord); +  if( pCur->zInput==0 ) return SQLITE_NOMEM; +  amatchAddWord(pCur, 0, 0, "", ""); +  amatchNext(pVtabCursor); + +  return SQLITE_OK; +} + +/* +** Only the word and distance columns have values.  All other columns +** return NULL +*/ +static int amatchColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ +  amatch_cursor *pCur = (amatch_cursor*)cur; +  switch( i ){ +    case AMATCH_COL_WORD: { +      sqlite3_result_text(ctx, pCur->pCurrent->zWord+2, -1, SQLITE_STATIC); +      break; +    } +    case AMATCH_COL_DISTANCE: { +      sqlite3_result_int(ctx, pCur->pCurrent->rCost); +      break; +    } +    case AMATCH_COL_LANGUAGE: { +      sqlite3_result_int(ctx, pCur->iLang); +      break; +    } +    case AMATCH_COL_NWORD: { +      sqlite3_result_int(ctx, pCur->nWord); +      break; +    } +    default: { +      sqlite3_result_null(ctx); +      break; +    } +  } +  return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int amatchRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ +  amatch_cursor *pCur = (amatch_cursor*)cur; +  *pRowid = pCur->iRowid; +  return SQLITE_OK; +} + +/* +** EOF indicator +*/ +static int amatchEof(sqlite3_vtab_cursor *cur){ +  amatch_cursor *pCur = (amatch_cursor*)cur; +  return pCur->pCurrent==0; +} + +/* +** Search for terms of these forms: +** +**   (A)    word MATCH $str +**   (B1)   distance < $value +**   (B2)   distance <= $value +**   (C)    language == $language +** +** The distance< and distance<= are both treated as distance<=. +** The query plan number is a bit vector: +** +**   bit 1:   Term of the form (A) found +**   bit 2:   Term like (B1) or (B2) found +**   bit 3:   Term like (C) found +** +** If bit-1 is set, $str is always in filter.argv[0].  If bit-2 is set +** then $value is in filter.argv[0] if bit-1 is clear and is in  +** filter.argv[1] if bit-1 is set.  If bit-3 is set, then $ruleid is +** in filter.argv[0] if bit-1 and bit-2 are both zero, is in +** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in +** filter.argv[2] if both bit-1 and bit-2 are set. +*/ +static int amatchBestIndex( +  sqlite3_vtab *tab, +  sqlite3_index_info *pIdxInfo +){ +  int iPlan = 0; +  int iDistTerm = -1; +  int iLangTerm = -1; +  int i; +  const struct sqlite3_index_constraint *pConstraint; + +  (void)tab; +  pConstraint = pIdxInfo->aConstraint; +  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ +    if( pConstraint->usable==0 ) continue; +    if( (iPlan & 1)==0  +     && pConstraint->iColumn==0 +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH +    ){ +      iPlan |= 1; +      pIdxInfo->aConstraintUsage[i].argvIndex = 1; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +    if( (iPlan & 2)==0 +     && pConstraint->iColumn==1 +     && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT +           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) +    ){ +      iPlan |= 2; +      iDistTerm = i; +    } +    if( (iPlan & 4)==0 +     && pConstraint->iColumn==2 +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 4; +      pIdxInfo->aConstraintUsage[i].omit = 1; +      iLangTerm = i; +    } +  } +  if( iPlan & 2 ){ +    pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); +  } +  if( iPlan & 4 ){ +    int idx = 1; +    if( iPlan & 1 ) idx++; +    if( iPlan & 2 ) idx++; +    pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx; +  } +  pIdxInfo->idxNum = iPlan; +  if( pIdxInfo->nOrderBy==1 +   && pIdxInfo->aOrderBy[0].iColumn==1 +   && pIdxInfo->aOrderBy[0].desc==0 +  ){ +    pIdxInfo->orderByConsumed = 1; +  } +  pIdxInfo->estimatedCost = (double)10000; +    +  return SQLITE_OK; +} + +/* +** The xUpdate() method.   +** +** This implementation disallows DELETE and UPDATE.  The only thing +** allowed is INSERT into the "command" column. +*/ +static int amatchUpdate( +  sqlite3_vtab *pVTab, +  int argc, +  sqlite3_value **argv, +  sqlite_int64 *pRowid +){ +  amatch_vtab *p = (amatch_vtab*)pVTab; +  const unsigned char *zCmd; +  (void)pRowid; +  if( argc==1 ){ +    pVTab->zErrMsg = sqlite3_mprintf("DELETE from %s is not allowed",  +                                      p->zSelf); +    return SQLITE_ERROR; +  } +  if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){ +    pVTab->zErrMsg = sqlite3_mprintf("UPDATE of %s is not allowed",  +                                      p->zSelf); +    return SQLITE_ERROR; +  } +  if( sqlite3_value_type(argv[2+AMATCH_COL_WORD])!=SQLITE_NULL +   || sqlite3_value_type(argv[2+AMATCH_COL_DISTANCE])!=SQLITE_NULL +   || sqlite3_value_type(argv[2+AMATCH_COL_LANGUAGE])!=SQLITE_NULL +  ){ +    pVTab->zErrMsg = sqlite3_mprintf( +            "INSERT INTO %s allowed for column [command] only", p->zSelf); +    return SQLITE_ERROR; +  } +  zCmd = sqlite3_value_text(argv[2+AMATCH_COL_COMMAND]); +  if( zCmd==0 ) return SQLITE_OK; +   +  return SQLITE_OK; +} + +/* +** A virtual table module that implements the "approximate_match". +*/ +static sqlite3_module amatchModule = { +  0,                      /* iVersion */ +  amatchConnect,          /* xCreate */ +  amatchConnect,          /* xConnect */ +  amatchBestIndex,        /* xBestIndex */ +  amatchDisconnect,       /* xDisconnect */ +  amatchDisconnect,       /* xDestroy */ +  amatchOpen,             /* xOpen - open a cursor */ +  amatchClose,            /* xClose - close a cursor */ +  amatchFilter,           /* xFilter - configure scan constraints */ +  amatchNext,             /* xNext - advance a cursor */ +  amatchEof,              /* xEof - check for end of scan */ +  amatchColumn,           /* xColumn - read data */ +  amatchRowid,            /* xRowid - read data */ +  amatchUpdate,           /* xUpdate */ +  0,                      /* xBegin */ +  0,                      /* xSync */ +  0,                      /* xCommit */ +  0,                      /* xRollback */ +  0,                      /* xFindMethod */ +  0,                      /* xRename */ +  0,                      /* xSavepoint */ +  0,                      /* xRelease */ +  0                       /* xRollbackTo */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Register the amatch virtual table +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_amatch_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  (void)pzErrMsg;  /* Not used */ +#ifndef SQLITE_OMIT_VIRTUALTABLE +  rc = sqlite3_create_module(db, "approximate_match", &amatchModule, 0); +#endif /* SQLITE_OMIT_VIRTUALTABLE */ +  return rc; +} diff --git a/ext/misc/closure.c b/ext/misc/closure.c new file mode 100644 index 0000000..213b763 --- /dev/null +++ b/ext/misc/closure.c @@ -0,0 +1,948 @@ +/* +** 2013-04-16 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code for a virtual table that finds the transitive +** closure of a parent/child relationship in a real table.  The virtual  +** table is called "transitive_closure". +** +** A transitive_closure virtual table is created like this: +** +**     CREATE VIRTUAL TABLE x USING transitive_closure( +**        tablename=<tablename>,      -- T +**        idcolumn=<columnname>,      -- X +**        parentcolumn=<columnname>   -- P +**     ); +** +** When it is created, the new transitive_closure table may be supplied  +** with default values for the name of a table T and columns T.X and T.P. +** The T.X and T.P columns must contain integers.  The ideal case is for  +** T.X to be the INTEGER PRIMARY KEY.  The T.P column should reference +** the T.X column. The row referenced by T.P is the parent of the current row. +** +** The tablename, idcolumn, and parentcolumn supplied by the CREATE VIRTUAL +** TABLE statement may be overridden in individual queries by including +** terms like tablename='newtable', idcolumn='id2', or  +** parentcolumn='parent3' in the WHERE clause of the query. +** +** For efficiency, it is essential that there be an index on the P column: +** +**    CREATE Tidx1 ON T(P) +** +** Suppose a specific instance of the closure table is as follows: +** +**    CREATE VIRTUAL TABLE ct1 USING transitive_closure( +**       tablename='group', +**       idcolumn='groupId', +**       parentcolumn='parentId' +**    ); +** +** Such an instance of the transitive_closure virtual table would be +** appropriate for walking a tree defined using a table like this, for example: +** +**    CREATE TABLE group( +**      groupId INTEGER PRIMARY KEY, +**      parentId INTEGER REFERENCES group +**    ); +**    CREATE INDEX group_idx1 ON group(parentId); +** +** The group table above would presumably have other application-specific +** fields.  The key point here is that rows of the group table form a +** tree.  The purpose of the ct1 virtual table is to easily extract +** branches of that tree. +** +** Once it has been created, the ct1 virtual table can be queried +** as follows: +** +**    SELECT * FROM element +**     WHERE element.groupId IN (SELECT id FROM ct1 WHERE root=?1); +** +** The above query will return all elements that are part of group ?1 +** or children of group ?1 or grand-children of ?1 and so forth for all +** descendents of group ?1.  The same query can be formulated as a join: +** +**    SELECT element.* FROM element, ct1 +**     WHERE element.groupid=ct1.id +**       AND ct1.root=?1; +** +** The depth of the transitive_closure (the number of generations of +** parent/child relations to follow) can be limited by setting "depth" +** column in the WHERE clause.  So, for example, the following query +** finds only children and grandchildren but no further descendents: +** +**    SELECT element.* FROM element, ct1 +**     WHERE element.groupid=ct1.id +**       AND ct1.root=?1 +**       AND ct1.depth<=2; +** +** The "ct1.depth<=2" term could be a strict equality "ct1.depth=2" in +** order to find only the grandchildren of ?1, not ?1 itself or the +** children of ?1. +**  +** The root=?1 term must be supplied in WHERE clause or else the query +** of the ct1 virtual table will return an empty set.  The tablename, +** idcolumn, and parentcolumn attributes can be overridden in the WHERE +** clause if desired.  So, for example, the ct1 table could be repurposed +** to find ancestors rather than descendents by inverting the roles of +** the idcolumn and parentcolumn: +** +**    SELECT element.* FROM element, ct1 +**     WHERE element.groupid=ct1.id +**       AND ct1.root=?1 +**       AND ct1.idcolumn='parentId' +**       AND ct1.parentcolumn='groupId'; +** +** Multiple calls to ct1 could be combined.  For example, the following +** query finds all elements that "cousins" of groupId ?1.  That is to say +** elements where the groupId is a grandchild of the grandparent of ?1. +** (This definition of "cousins" also includes siblings and self.) +** +**    SELECT element.* FROM element, ct1 +**     WHERE element.groupId=ct1.id +**       AND ct1.depth=2 +**       AND ct1.root IN (SELECT id FROM ct1 +**                         WHERE root=?1 +**                           AND depth=2 +**                           AND idcolumn='parentId' +**                           AND parentcolumn='groupId'); +** +** In our example, the group.groupId column is unique and thus the +** subquery will return exactly one row.  For that reason, the IN +** operator could be replaced by "=" to get the same result.  But +** in the general case where the idcolumn is not unique, an IN operator +** would be required for this kind of query. +** +** Note that because the tablename, idcolumn, and parentcolumn can +** all be specified in the query, it is possible for an application +** to define a single transitive_closure virtual table for use on lots +** of different hierarchy tables.  One might say: +** +**     CREATE VIRTUAL TABLE temp.closure USING transitive_closure; +** +** As each database connection is being opened.  Then the application +** would always have a "closure" virtual table handy to use for querying. +** +**    SELECT element.* FROM element, closure +**     WHERE element.groupid=ct1.id +**       AND closure.root=?1 +**       AND closure.tablename='group' +**       AND closure.idname='groupId' +**       AND closure.parentname='parentId'; +** +** See the documentation at http://www.sqlite.org/loadext.html for information +** on how to compile and use loadable extensions such as this one. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> +#include <ctype.h> + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct closure_vtab closure_vtab; +typedef struct closure_cursor closure_cursor; +typedef struct closure_queue closure_queue; +typedef struct closure_avl closure_avl; + +/***************************************************************************** +** AVL Tree implementation +*/ +/* +** Objects that want to be members of the AVL tree should embedded an +** instance of this structure. +*/ +struct closure_avl { +  sqlite3_int64 id;     /* Id of this entry in the table */ +  int iGeneration;      /* Which generation is this entry part of */ +  closure_avl *pList;   /* A linked list of nodes */ +  closure_avl *pBefore; /* Other elements less than id */ +  closure_avl *pAfter;  /* Other elements greater than id */ +  closure_avl *pUp;     /* Parent element */ +  short int height;     /* Height of this node.  Leaf==1 */ +  short int imbalance;  /* Height difference between pBefore and pAfter */ +}; + +/* Recompute the closure_avl.height and closure_avl.imbalance fields for p. +** Assume that the children of p have correct heights. +*/ +static void closureAvlRecomputeHeight(closure_avl *p){ +  short int hBefore = p->pBefore ? p->pBefore->height : 0; +  short int hAfter = p->pAfter ? p->pAfter->height : 0; +  p->imbalance = hBefore - hAfter;  /* -: pAfter higher.  +: pBefore higher */ +  p->height = (hBefore>hAfter ? hBefore : hAfter)+1; +} + +/* +**     P                B +**    / \              / \ +**   B   Z    ==>     X   P +**  / \                  / \ +** X   Y                Y   Z +** +*/ +static closure_avl *closureAvlRotateBefore(closure_avl *pP){ +  closure_avl *pB = pP->pBefore; +  closure_avl *pY = pB->pAfter; +  pB->pUp = pP->pUp; +  pB->pAfter = pP; +  pP->pUp = pB; +  pP->pBefore = pY; +  if( pY ) pY->pUp = pP; +  closureAvlRecomputeHeight(pP); +  closureAvlRecomputeHeight(pB); +  return pB; +} + +/* +**     P                A +**    / \              / \ +**   X   A    ==>     P   Z +**      / \          / \ +**     Y   Z        X   Y +** +*/ +static closure_avl *closureAvlRotateAfter(closure_avl *pP){ +  closure_avl *pA = pP->pAfter; +  closure_avl *pY = pA->pBefore; +  pA->pUp = pP->pUp; +  pA->pBefore = pP; +  pP->pUp = pA; +  pP->pAfter = pY; +  if( pY ) pY->pUp = pP; +  closureAvlRecomputeHeight(pP); +  closureAvlRecomputeHeight(pA); +  return pA; +} + +/* +** Return a pointer to the pBefore or pAfter pointer in the parent +** of p that points to p.  Or if p is the root node, return pp. +*/ +static closure_avl **closureAvlFromPtr(closure_avl *p, closure_avl **pp){ +  closure_avl *pUp = p->pUp; +  if( pUp==0 ) return pp; +  if( pUp->pAfter==p ) return &pUp->pAfter; +  return &pUp->pBefore; +} + +/* +** Rebalance all nodes starting with p and working up to the root. +** Return the new root. +*/ +static closure_avl *closureAvlBalance(closure_avl *p){ +  closure_avl *pTop = p; +  closure_avl **pp; +  while( p ){ +    closureAvlRecomputeHeight(p); +    if( p->imbalance>=2 ){ +      closure_avl *pB = p->pBefore; +      if( pB->imbalance<0 ) p->pBefore = closureAvlRotateAfter(pB); +      pp = closureAvlFromPtr(p,&p); +      p = *pp = closureAvlRotateBefore(p); +    }else if( p->imbalance<=(-2) ){ +      closure_avl *pA = p->pAfter; +      if( pA->imbalance>0 ) p->pAfter = closureAvlRotateBefore(pA); +      pp = closureAvlFromPtr(p,&p); +      p = *pp = closureAvlRotateAfter(p); +    } +    pTop = p; +    p = p->pUp; +  } +  return pTop; +} + +/* Search the tree rooted at p for an entry with id.  Return a pointer +** to the entry or return NULL. +*/ +static closure_avl *closureAvlSearch(closure_avl *p, sqlite3_int64 id){ +  while( p && id!=p->id ){ +    p = (id<p->id) ? p->pBefore : p->pAfter; +  } +  return p; +} + +/* Find the first node (the one with the smallest key). +*/ +static closure_avl *closureAvlFirst(closure_avl *p){ +  if( p ) while( p->pBefore ) p = p->pBefore; +  return p; +} + +/* Return the node with the next larger key after p. +*/ +closure_avl *closureAvlNext(closure_avl *p){ +  closure_avl *pPrev = 0; +  while( p && p->pAfter==pPrev ){ +    pPrev = p; +    p = p->pUp; +  } +  if( p && pPrev==0 ){ +    p = closureAvlFirst(p->pAfter); +  } +  return p; +} + +/* Insert a new node pNew.  Return NULL on success.  If the key is not +** unique, then do not perform the insert but instead leave pNew unchanged +** and return a pointer to an existing node with the same key. +*/ +static closure_avl *closureAvlInsert( +  closure_avl **ppHead,  /* Head of the tree */ +  closure_avl *pNew      /* New node to be inserted */ +){ +  closure_avl *p = *ppHead; +  if( p==0 ){ +    p = pNew; +    pNew->pUp = 0; +  }else{ +    while( p ){ +      if( pNew->id<p->id ){ +        if( p->pBefore ){ +          p = p->pBefore; +        }else{ +          p->pBefore = pNew; +          pNew->pUp = p; +          break; +        } +      }else if( pNew->id>p->id ){ +        if( p->pAfter ){ +          p = p->pAfter; +        }else{ +          p->pAfter = pNew; +          pNew->pUp = p; +          break; +        } +      }else{ +        return p; +      } +    } +  } +  pNew->pBefore = 0; +  pNew->pAfter = 0; +  pNew->height = 1; +  pNew->imbalance = 0; +  *ppHead = closureAvlBalance(p); +  return 0; +} + +/* Walk the tree can call xDestroy on each node +*/ +static void closureAvlDestroy(closure_avl *p, void (*xDestroy)(closure_avl*)){ +  if( p ){ +    closureAvlDestroy(p->pBefore, xDestroy); +    closureAvlDestroy(p->pAfter, xDestroy); +    xDestroy(p); +  } +} +/* +** End of the AVL Tree implementation +******************************************************************************/ + +/*  +** A closure virtual-table object  +*/ +struct closure_vtab { +  sqlite3_vtab base;         /* Base class - must be first */ +  char *zDb;                 /* Name of database.  (ex: "main") */ +  char *zSelf;               /* Name of this virtual table */ +  char *zTableName;          /* Name of table holding parent/child relation */ +  char *zIdColumn;           /* Name of ID column of zTableName */ +  char *zParentColumn;       /* Name of PARENT column in zTableName */ +  sqlite3 *db;               /* The database connection */ +  int nCursor;               /* Number of pending cursors */ +}; + +/* A closure cursor object */ +struct closure_cursor { +  sqlite3_vtab_cursor base;  /* Base class - must be first */ +  closure_vtab *pVtab;       /* The virtual table this cursor belongs to */ +  char *zTableName;          /* Name of table holding parent/child relation */ +  char *zIdColumn;           /* Name of ID column of zTableName */ +  char *zParentColumn;       /* Name of PARENT column in zTableName */ +  closure_avl *pCurrent;     /* Current element of output */ +  closure_avl *pClosure;     /* The complete closure tree */ +}; + +/* A queue of AVL nodes */ +struct closure_queue { +  closure_avl *pFirst;       /* Oldest node on the queue */ +  closure_avl *pLast;        /* Youngest node on the queue */ +}; + +/* +** Add a node to the end of the queue +*/ +static void queuePush(closure_queue *pQueue, closure_avl *pNode){ +  pNode->pList = 0; +  if( pQueue->pLast ){ +    pQueue->pLast->pList = pNode; +  }else{ +    pQueue->pFirst = pNode; +  } +  pQueue->pLast = pNode; +} + +/* +** Extract the oldest element (the front element) from the queue. +*/ +static closure_avl *queuePull(closure_queue *pQueue){ +  closure_avl *p = pQueue->pFirst; +  if( p ){ +    pQueue->pFirst = p->pList; +    if( pQueue->pFirst==0 ) pQueue->pLast = 0; +  } +  return p; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc()  +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +**     "abc"   becomes   abc +**     'xyz'   becomes   xyz +**     [pqr]   becomes   pqr +**     `mno`   becomes   mno +*/ +static char *closureDequote(const char *zIn){ +  int nIn;                        /* Size of input string, in bytes */ +  char *zOut;                     /* Output (dequoted) string */ + +  nIn = (int)strlen(zIn); +  zOut = sqlite3_malloc(nIn+1); +  if( zOut ){ +    char q = zIn[0];              /* Quote character (if any ) */ + +    if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ +      memcpy(zOut, zIn, nIn+1); +    }else{ +      int iOut = 0;               /* Index of next byte to write to output */ +      int iIn;                    /* Index of next byte to read from input */ + +      if( q=='[' ) q = ']'; +      for(iIn=1; iIn<nIn; iIn++){ +        if( zIn[iIn]==q ) iIn++; +        zOut[iOut++] = zIn[iIn]; +      } +    } +    assert( (int)strlen(zOut)<=nIn ); +  } +  return zOut; +} + +/* +** Deallocate an closure_vtab object +*/ +static void closureFree(closure_vtab *p){ +  if( p ){ +    sqlite3_free(p->zDb); +    sqlite3_free(p->zSelf); +    sqlite3_free(p->zTableName); +    sqlite3_free(p->zIdColumn); +    sqlite3_free(p->zParentColumn); +    memset(p, 0, sizeof(*p)); +    sqlite3_free(p); +  } +} + +/* +** xDisconnect/xDestroy method for the closure module. +*/ +static int closureDisconnect(sqlite3_vtab *pVtab){ +  closure_vtab *p = (closure_vtab*)pVtab; +  assert( p->nCursor==0 ); +  closureFree(p); +  return SQLITE_OK; +} + +/* +** Check to see if the argument is of the form: +** +**       KEY = VALUE +** +** If it is, return a pointer to the first character of VALUE. +** If not, return NULL.  Spaces around the = are ignored. +*/ +static const char *closureValueOfKey(const char *zKey, const char *zStr){ +  int nKey = (int)strlen(zKey); +  int nStr = (int)strlen(zStr); +  int i; +  if( nStr<nKey+1 ) return 0; +  if( memcmp(zStr, zKey, nKey)!=0 ) return 0; +  for(i=nKey; isspace(zStr[i]); i++){} +  if( zStr[i]!='=' ) return 0; +  i++; +  while( isspace(zStr[i]) ){ i++; } +  return zStr+i; +} + +/* +** xConnect/xCreate method for the closure module. Arguments are: +** +**   argv[0]    -> module name  ("approximate_match") +**   argv[1]    -> database name +**   argv[2]    -> table name +**   argv[3...] -> arguments +*/ +static int closureConnect( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVtab, +  char **pzErr +){ +  int rc = SQLITE_OK;              /* Return code */ +  closure_vtab *pNew = 0;          /* New virtual table */ +  const char *zDb = argv[1]; +  const char *zVal; +  int i; + +  (void)pAux; +  *ppVtab = 0; +  pNew = sqlite3_malloc( sizeof(*pNew) ); +  if( pNew==0 ) return SQLITE_NOMEM; +  rc = SQLITE_NOMEM; +  memset(pNew, 0, sizeof(*pNew)); +  pNew->db = db; +  pNew->zDb = sqlite3_mprintf("%s", zDb); +  if( pNew->zDb==0 ) goto closureConnectError; +  pNew->zSelf = sqlite3_mprintf("%s", argv[2]); +  if( pNew->zSelf==0 ) goto closureConnectError; +  for(i=3; i<argc; i++){ +    zVal = closureValueOfKey("tablename", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zTableName); +      pNew->zTableName = closureDequote(zVal); +      if( pNew->zTableName==0 ) goto closureConnectError; +      continue; +    } +    zVal = closureValueOfKey("idcolumn", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zIdColumn); +      pNew->zIdColumn = closureDequote(zVal); +      if( pNew->zIdColumn==0 ) goto closureConnectError; +      continue; +    } +    zVal = closureValueOfKey("parentcolumn", argv[i]); +    if( zVal ){ +      sqlite3_free(pNew->zParentColumn); +      pNew->zParentColumn = closureDequote(zVal); +      if( pNew->zParentColumn==0 ) goto closureConnectError; +      continue; +    } +    *pzErr = sqlite3_mprintf("unrecognized argument: [%s]\n", argv[i]); +    closureFree(pNew); +    *ppVtab = 0; +    return SQLITE_ERROR; +  } +  rc = sqlite3_declare_vtab(db, +         "CREATE TABLE x(id,depth,root HIDDEN,tablename HIDDEN," +                        "idcolumn HIDDEN,parentcolumn HIDDEN)" +       ); +#define CLOSURE_COL_ID              0 +#define CLOSURE_COL_DEPTH           1 +#define CLOSURE_COL_ROOT            2 +#define CLOSURE_COL_TABLENAME       3 +#define CLOSURE_COL_IDCOLUMN        4 +#define CLOSURE_COL_PARENTCOLUMN    5 +  if( rc!=SQLITE_OK ){ +    closureFree(pNew); +  } +  *ppVtab = &pNew->base; +  return rc; + +closureConnectError: +  closureFree(pNew); +  return rc; +} + +/* +** Open a new closure cursor. +*/ +static int closureOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ +  closure_vtab *p = (closure_vtab*)pVTab; +  closure_cursor *pCur; +  pCur = sqlite3_malloc( sizeof(*pCur) ); +  if( pCur==0 ) return SQLITE_NOMEM; +  memset(pCur, 0, sizeof(*pCur)); +  pCur->pVtab = p; +  *ppCursor = &pCur->base; +  p->nCursor++; +  return SQLITE_OK; +} + +/* +** Free up all the memory allocated by a cursor.  Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void closureClearCursor(closure_cursor *pCur){ +  closureAvlDestroy(pCur->pClosure, (void(*)(closure_avl*))sqlite3_free); +  sqlite3_free(pCur->zTableName); +  sqlite3_free(pCur->zIdColumn); +  sqlite3_free(pCur->zParentColumn); +  pCur->zTableName = 0; +  pCur->zIdColumn = 0; +  pCur->zParentColumn = 0; +  pCur->pCurrent = 0; +  pCur->pClosure = 0; +} + +/* +** Close a closure cursor. +*/ +static int closureClose(sqlite3_vtab_cursor *cur){ +  closure_cursor *pCur = (closure_cursor *)cur; +  closureClearCursor(pCur); +  pCur->pVtab->nCursor--; +  sqlite3_free(pCur); +  return SQLITE_OK; +} + +/* +** Advance a cursor to its next row of output +*/ +static int closureNext(sqlite3_vtab_cursor *cur){ +  closure_cursor *pCur = (closure_cursor*)cur; +  pCur->pCurrent = closureAvlNext(pCur->pCurrent); +  return SQLITE_OK; +} + +/* +** Allocate and insert a node +*/ +static int closureInsertNode( +  closure_queue *pQueue,  /* Add new node to this queue */ +  closure_cursor *pCur,   /* The cursor into which to add the node */ +  sqlite3_int64 id,       /* The node ID */ +  int iGeneration         /* The generation number for this node */ +){ +  closure_avl *pNew = sqlite3_malloc( sizeof(*pNew) ); +  if( pNew==0 ) return SQLITE_NOMEM; +  memset(pNew, 0, sizeof(*pNew)); +  pNew->id = id; +  pNew->iGeneration = iGeneration; +  closureAvlInsert(&pCur->pClosure, pNew); +  queuePush(pQueue, pNew); +  return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again.  Always called at least once +** prior to any closureColumn, closureRowid, or closureEof call. +** +** This routine actually computes the closure. +** +** See the comment at the beginning of closureBestIndex() for a  +** description of the meaning of idxNum.  The idxStr parameter is +** not used. +*/ +static int closureFilter( +  sqlite3_vtab_cursor *pVtabCursor,  +  int idxNum, const char *idxStr, +  int argc, sqlite3_value **argv +){ +  closure_cursor *pCur = (closure_cursor *)pVtabCursor; +  closure_vtab *pVtab = pCur->pVtab; +  sqlite3_int64 iRoot; +  int mxGen = 999999999; +  char *zSql; +  sqlite3_stmt *pStmt; +  closure_avl *pAvl; +  int rc = SQLITE_OK; +  const char *zTableName = pVtab->zTableName; +  const char *zIdColumn = pVtab->zIdColumn; +  const char *zParentColumn = pVtab->zParentColumn; +  closure_queue sQueue; + +  (void)idxStr;  /* Unused parameter */ +  (void)argc;    /* Unused parameter */ +  closureClearCursor(pCur); +  memset(&sQueue, 0, sizeof(sQueue)); +  if( (idxNum & 1)==0 ){ +    /* No root=$root in the WHERE clause.  Return an empty set */ +    return SQLITE_OK; +  } +  iRoot = sqlite3_value_int64(argv[0]); +  if( (idxNum & 0x000f0)!=0 ){ +    mxGen = sqlite3_value_int(argv[(idxNum>>4)&0x0f]); +    if( (idxNum & 0x00002)!=0 ) mxGen--; +  } +  if( (idxNum & 0x00f00)!=0 ){ +    zTableName = (const char*)sqlite3_value_text(argv[(idxNum>>8)&0x0f]); +    pCur->zTableName = sqlite3_mprintf("%s", zTableName); +  } +  if( (idxNum & 0x0f000)!=0 ){ +    zIdColumn = (const char*)sqlite3_value_text(argv[(idxNum>>12)&0x0f]); +    pCur->zIdColumn = sqlite3_mprintf("%s", zIdColumn); +  } +  if( (idxNum & 0x0f0000)!=0 ){ +    zParentColumn = (const char*)sqlite3_value_text(argv[(idxNum>>16)&0x0f]); +    pCur->zParentColumn = sqlite3_mprintf("%s", zParentColumn); +  } + +  zSql = sqlite3_mprintf( +       "SELECT \"%w\".\"%w\" FROM \"%w\" WHERE \"%w\".\"%w\"=?1", +       zTableName, zIdColumn, zTableName, zTableName, zParentColumn); +  if( zSql==0 ){ +    return SQLITE_NOMEM; +  }else{ +    rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &pStmt, 0); +    sqlite3_free(zSql); +    if( rc ){ +      sqlite3_free(pVtab->base.zErrMsg); +      pVtab->base.zErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pVtab->db)); +      return rc; +    } +  } +  if( rc==SQLITE_OK ){ +    rc = closureInsertNode(&sQueue, pCur, iRoot, 0); +  } +  while( (pAvl = queuePull(&sQueue))!=0 ){ +    if( pAvl->iGeneration>=mxGen ) continue; +    sqlite3_bind_int64(pStmt, 1, pAvl->id); +    while( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ +      if( sqlite3_column_type(pStmt,0)==SQLITE_INTEGER ){ +        sqlite3_int64 iNew = sqlite3_column_int64(pStmt, 0); +        if( closureAvlSearch(pCur->pClosure, iNew)==0 ){ +          rc = closureInsertNode(&sQueue, pCur, iNew, pAvl->iGeneration+1); +        } +      } +    } +    sqlite3_reset(pStmt); +  } +  sqlite3_finalize(pStmt); +  if( rc==SQLITE_OK ){ +    pCur->pCurrent = closureAvlFirst(pCur->pClosure); +  } + +  return rc; +} + +/* +** Only the word and distance columns have values.  All other columns +** return NULL +*/ +static int closureColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ +  closure_cursor *pCur = (closure_cursor*)cur; +  switch( i ){ +    case CLOSURE_COL_ID: { +      sqlite3_result_int64(ctx, pCur->pCurrent->id); +      break; +    } +    case CLOSURE_COL_DEPTH: { +      sqlite3_result_int(ctx, pCur->pCurrent->iGeneration); +      break; +    } +    case CLOSURE_COL_ROOT: { +      sqlite3_result_null(ctx); +      break; +    } +    case CLOSURE_COL_TABLENAME: { +      sqlite3_result_text(ctx, +         pCur->zTableName ? pCur->zTableName : pCur->pVtab->zTableName, +         -1, SQLITE_TRANSIENT); +      break; +    } +    case CLOSURE_COL_IDCOLUMN: { +      sqlite3_result_text(ctx, +         pCur->zIdColumn ? pCur->zIdColumn : pCur->pVtab->zIdColumn, +         -1, SQLITE_TRANSIENT); +      break; +    } +    case CLOSURE_COL_PARENTCOLUMN: { +      sqlite3_result_text(ctx, +         pCur->zParentColumn ? pCur->zParentColumn : pCur->pVtab->zParentColumn, +         -1, SQLITE_TRANSIENT); +      break; +    } +  } +  return SQLITE_OK; +} + +/* +** The rowid.  For the closure table, this is the same as the "id" column. +*/ +static int closureRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ +  closure_cursor *pCur = (closure_cursor*)cur; +  *pRowid = pCur->pCurrent->id; +  return SQLITE_OK; +} + +/* +** EOF indicator +*/ +static int closureEof(sqlite3_vtab_cursor *cur){ +  closure_cursor *pCur = (closure_cursor*)cur; +  return pCur->pCurrent==0; +} + +/* +** Search for terms of these forms: +** +**   (A)    root = $root +**   (B1)   depth < $depth +**   (B2)   depth <= $depth +**   (B3)   depth = $depth +**   (C)    tablename = $tablename +**   (D)    idcolumn = $idcolumn +**   (E)    parentcolumn = $parentcolumn +** +**  +** +**   idxNum       meaning +**   ----------   ------------------------------------------------------ +**   0x00000001   Term of the form (A) found +**   0x00000002   The term of bit-2 is like (B1) +**   0x000000f0   Index in filter.argv[] of $depth.  0 if not used. +**   0x00000f00   Index in filter.argv[] of $tablename.  0 if not used. +**   0x0000f000   Index in filter.argv[] of $idcolumn.  0 if not used +**   0x000f0000   Index in filter.argv[] of $parentcolumn.  0 if not used. +** +** There must be a term of type (A).  If there is not, then the index type +** is 0 and the query will return an empty set. +*/ +static int closureBestIndex( +  sqlite3_vtab *pTab,             /* The virtual table */ +  sqlite3_index_info *pIdxInfo    /* Information about the query */ +){ +  int iPlan = 0; +  int i; +  int idx = 1; +  const struct sqlite3_index_constraint *pConstraint; +  closure_vtab *pVtab = (closure_vtab*)pTab; + +  pConstraint = pIdxInfo->aConstraint; +  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ +    if( pConstraint->usable==0 ) continue; +    if( (iPlan & 1)==0  +     && pConstraint->iColumn==CLOSURE_COL_ROOT +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 1; +      pIdxInfo->aConstraintUsage[i].argvIndex = 1; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +    if( (iPlan & 0x0000f0)==0 +     && pConstraint->iColumn==CLOSURE_COL_DEPTH +     && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT +           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE +           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ) +    ){ +      iPlan |= idx<<4; +      pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; +      if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ) iPlan |= 0x000002; +    } +    if( (iPlan & 0x000f00)==0 +     && pConstraint->iColumn==CLOSURE_COL_TABLENAME +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= idx<<8; +      pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +    if( (iPlan & 0x00f000)==0 +     && pConstraint->iColumn==CLOSURE_COL_IDCOLUMN +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= idx<<12; +      pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +    if( (iPlan & 0x0f0000)==0 +     && pConstraint->iColumn==CLOSURE_COL_PARENTCOLUMN +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= idx<<16; +      pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +  } +  if( (pVtab->zTableName==0    && (iPlan & 0x000f00)==0) +   || (pVtab->zIdColumn==0     && (iPlan & 0x00f000)==0) +   || (pVtab->zParentColumn==0 && (iPlan & 0x0f0000)==0) +  ){ +    /* All of tablename, idcolumn, and parentcolumn must be specified +    ** in either the CREATE VIRTUAL TABLE or in the WHERE clause constraints +    ** or else the result is an empty set. */ +    iPlan = 0; +  } +  pIdxInfo->idxNum = iPlan; +  if( pIdxInfo->nOrderBy==1 +   && pIdxInfo->aOrderBy[0].iColumn==CLOSURE_COL_ID +   && pIdxInfo->aOrderBy[0].desc==0 +  ){ +    pIdxInfo->orderByConsumed = 1; +  } +  pIdxInfo->estimatedCost = (double)10000; +    +  return SQLITE_OK; +} + +/* +** A virtual table module that implements the "approximate_match". +*/ +static sqlite3_module closureModule = { +  0,                      /* iVersion */ +  closureConnect,         /* xCreate */ +  closureConnect,         /* xConnect */ +  closureBestIndex,       /* xBestIndex */ +  closureDisconnect,      /* xDisconnect */ +  closureDisconnect,      /* xDestroy */ +  closureOpen,            /* xOpen - open a cursor */ +  closureClose,           /* xClose - close a cursor */ +  closureFilter,          /* xFilter - configure scan constraints */ +  closureNext,            /* xNext - advance a cursor */ +  closureEof,             /* xEof - check for end of scan */ +  closureColumn,          /* xColumn - read data */ +  closureRowid,           /* xRowid - read data */ +  0,                      /* xUpdate */ +  0,                      /* xBegin */ +  0,                      /* xSync */ +  0,                      /* xCommit */ +  0,                      /* xRollback */ +  0,                      /* xFindMethod */ +  0,                      /* xRename */ +  0,                      /* xSavepoint */ +  0,                      /* xRelease */ +  0                       /* xRollbackTo */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Register the closure virtual table +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_closure_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  (void)pzErrMsg; +#ifndef SQLITE_OMIT_VIRTUALTABLE +  rc = sqlite3_create_module(db, "transitive_closure", &closureModule, 0); +#endif /* SQLITE_OMIT_VIRTUALTABLE */ +  return rc; +} diff --git a/ext/misc/fuzzer.c b/ext/misc/fuzzer.c new file mode 100644 index 0000000..642b8f9 --- /dev/null +++ b/ext/misc/fuzzer.c @@ -0,0 +1,1173 @@ +/* +** 2011 March 24 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** +** Code for a demonstration virtual table that generates variations +** on an input word at increasing edit distances from the original. +** +** A fuzzer virtual table is created like this: +** +**     CREATE VIRTUAL TABLE f USING fuzzer(<fuzzer-data-table>); +** +** When it is created, the new fuzzer table must be supplied with the +** name of a "fuzzer data table", which must reside in the same database +** file as the new fuzzer table. The fuzzer data table contains the various +** transformations and their costs that the fuzzer logic uses to generate +** variations. +** +** The fuzzer data table must contain exactly four columns (more precisely, +** the statement "SELECT * FROM <fuzzer_data_table>" must return records +** that consist of four columns). It does not matter what the columns are +** named.  +** +** Each row in the fuzzer data table represents a single character +** transformation. The left most column of the row (column 0) contains an +** integer value - the identifier of the ruleset to which the transformation +** rule belongs (see "MULTIPLE RULE SETS" below). The second column of the +** row (column 0) contains the input character or characters. The third  +** column contains the output character or characters. And the fourth column +** contains the integer cost of making the transformation. For example: +** +**    CREATE TABLE f_data(ruleset, cFrom, cTo, Cost); +**    INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, '', 'a', 100); +**    INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'b', '', 87); +**    INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38); +**    INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40); +** +** The first row inserted into the fuzzer data table by the SQL script +** above indicates that the cost of inserting a letter 'a' is 100.  (All  +** costs are integers.  We recommend that costs be scaled so that the  +** average cost is around 100.) The second INSERT statement creates a rule +** saying that the cost of deleting a single letter 'b' is 87.  The third +** and fourth INSERT statements mean that the cost of transforming a +** single letter "o" into the two-letter sequence "oe" is 38 and that the +** cost of transforming "oe" back into "o" is 40. +** +** The contents of the fuzzer data table are loaded into main memory when +** a fuzzer table is first created, and may be internally reloaded by the +** system at any subsequent time. Therefore, the fuzzer data table should be  +** populated before the fuzzer table is created and not modified thereafter. +** If you do need to modify the contents of the fuzzer data table, it is +** recommended that the associated fuzzer table be dropped, the fuzzer data +** table edited, and the fuzzer table recreated within a single transaction. +** Alternatively, the fuzzer data table can be edited then the database +** connection can be closed and reopened. +** +** Once it has been created, the fuzzer table can be queried as follows: +** +**    SELECT word, distance FROM f +**     WHERE word MATCH 'abcdefg' +**       AND distance<200; +** +** This first query outputs the string "abcdefg" and all strings that +** can be derived from that string by appling the specified transformations. +** The strings are output together with their total transformation cost +** (called "distance") and appear in order of increasing cost.  No string +** is output more than once.  If there are multiple ways to transform the +** target string into the output string then the lowest cost transform is +** the one that is returned.  In the example, the search is limited to  +** strings with a total distance of less than 200. +** +** The fuzzer is a read-only table.  Any attempt to DELETE, INSERT, or +** UPDATE on a fuzzer table will throw an error. +** +** It is important to put some kind of a limit on the fuzzer output.  This +** can be either in the form of a LIMIT clause at the end of the query, +** or better, a "distance<NNN" constraint where NNN is some number.  The +** running time and memory requirement is exponential in the value of NNN  +** so you want to make sure that NNN is not too big.  A value of NNN that +** is about twice the average transformation cost seems to give good results. +** +** The fuzzer table can be useful for tasks such as spelling correction. +** Suppose there is a second table vocabulary(w) where the w column contains +** all correctly spelled words.   Let $word be a word you want to look up. +** +**   SELECT vocabulary.w FROM f, vocabulary +**    WHERE f.word MATCH $word +**      AND f.distance<=200 +**      AND f.word=vocabulary.w +**    LIMIT 20 +** +** The query above gives the 20 closest words to the $word being tested. +** (Note that for good performance, the vocubulary.w column should be +** indexed.) +** +** A similar query can be used to find all words in the dictionary that +** begin with some prefix $prefix: +** +**   SELECT vocabulary.w FROM f, vocabulary +**    WHERE f.word MATCH $prefix +**      AND f.distance<=200 +**      AND vocabulary.w BETWEEN f.word AND (f.word || x'F7BFBFBF') +**    LIMIT 50 +** +** This last query will show up to 50 words out of the vocabulary that +** match or nearly match the $prefix. +** +** MULTIPLE RULE SETS +** +** Normally, the "ruleset" value associated with all character transformations +** in the fuzzer data table is zero. However, if required, the fuzzer table +** allows multiple rulesets to be defined. Each query uses only a single +** ruleset. This allows, for example, a single fuzzer table to support  +** multiple languages. +** +** By default, only the rules from ruleset 0 are used. To specify an  +** alternative ruleset, a "ruleset = ?" expression must be added to the +** WHERE clause of a SELECT, where ? is the identifier of the desired  +** ruleset. For example: +** +**   SELECT vocabulary.w FROM f, vocabulary +**    WHERE f.word MATCH $word +**      AND f.distance<=200 +**      AND f.word=vocabulary.w +**      AND f.ruleset=1  -- Specify the ruleset to use here +**    LIMIT 20 +** +** If no "ruleset = ?" constraint is specified in the WHERE clause, ruleset  +** 0 is used. +** +** LIMITS +** +** The maximum ruleset number is 2147483647.  The maximum length of either +** of the strings in the second or third column of the fuzzer data table +** is 50 bytes.  The maximum cost on a rule is 1000. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +/* If SQLITE_DEBUG is not defined, disable assert statements. */ +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG +#endif + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct fuzzer_vtab fuzzer_vtab; +typedef struct fuzzer_cursor fuzzer_cursor; +typedef struct fuzzer_rule fuzzer_rule; +typedef struct fuzzer_seen fuzzer_seen; +typedef struct fuzzer_stem fuzzer_stem; + +/* +** Various types. +** +** fuzzer_cost is the "cost" of an edit operation. +** +** fuzzer_len is the length of a matching string.   +** +** fuzzer_ruleid is an ruleset identifier. +*/ +typedef int fuzzer_cost; +typedef signed char fuzzer_len; +typedef int fuzzer_ruleid; + +/* +** Limits +*/ +#define FUZZER_MX_LENGTH           50   /* Maximum length of a rule string */ +#define FUZZER_MX_RULEID   2147483647   /* Maximum rule ID */ +#define FUZZER_MX_COST           1000   /* Maximum single-rule cost */ +#define FUZZER_MX_OUTPUT_LENGTH   100   /* Maximum length of an output string */ + + +/* +** Each transformation rule is stored as an instance of this object. +** All rules are kept on a linked list sorted by rCost. +*/ +struct fuzzer_rule { +  fuzzer_rule *pNext;         /* Next rule in order of increasing rCost */ +  char *zFrom;                /* Transform from */ +  fuzzer_cost rCost;          /* Cost of this transformation */ +  fuzzer_len nFrom, nTo;      /* Length of the zFrom and zTo strings */ +  fuzzer_ruleid iRuleset;     /* The rule set to which this rule belongs */ +  char zTo[4];                /* Transform to (extra space appended) */ +}; + +/* +** A stem object is used to generate variants.  It is also used to record +** previously generated outputs. +** +** Every stem is added to a hash table as it is output.  Generation of +** duplicate stems is suppressed. +** +** Active stems (those that might generate new outputs) are kepts on a linked +** list sorted by increasing cost.  The cost is the sum of rBaseCost and +** pRule->rCost. +*/ +struct fuzzer_stem { +  char *zBasis;              /* Word being fuzzed */ +  const fuzzer_rule *pRule;  /* Current rule to apply */ +  fuzzer_stem *pNext;        /* Next stem in rCost order */ +  fuzzer_stem *pHash;        /* Next stem with same hash on zBasis */ +  fuzzer_cost rBaseCost;     /* Base cost of getting to zBasis */ +  fuzzer_cost rCostX;        /* Precomputed rBaseCost + pRule->rCost */ +  fuzzer_len nBasis;         /* Length of the zBasis string */ +  fuzzer_len n;              /* Apply pRule at this character offset */ +}; + +/*  +** A fuzzer virtual-table object  +*/ +struct fuzzer_vtab { +  sqlite3_vtab base;         /* Base class - must be first */ +  char *zClassName;          /* Name of this class.  Default: "fuzzer" */ +  fuzzer_rule *pRule;        /* All active rules in this fuzzer */ +  int nCursor;               /* Number of active cursors */ +}; + +#define FUZZER_HASH  4001    /* Hash table size */ +#define FUZZER_NQUEUE  20    /* Number of slots on the stem queue */ + +/* A fuzzer cursor object */ +struct fuzzer_cursor { +  sqlite3_vtab_cursor base;  /* Base class - must be first */ +  sqlite3_int64 iRowid;      /* The rowid of the current word */ +  fuzzer_vtab *pVtab;        /* The virtual table this cursor belongs to */ +  fuzzer_cost rLimit;        /* Maximum cost of any term */ +  fuzzer_stem *pStem;        /* Stem with smallest rCostX */ +  fuzzer_stem *pDone;        /* Stems already processed to completion */ +  fuzzer_stem *aQueue[FUZZER_NQUEUE];  /* Queue of stems with higher rCostX */ +  int mxQueue;               /* Largest used index in aQueue[] */ +  char *zBuf;                /* Temporary use buffer */ +  int nBuf;                  /* Bytes allocated for zBuf */ +  int nStem;                 /* Number of stems allocated */ +  int iRuleset;              /* Only process rules from this ruleset */ +  fuzzer_rule nullRule;      /* Null rule used first */ +  fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ +}; + +/* +** The two input rule lists are both sorted in order of increasing +** cost.  Merge them together into a single list, sorted by cost, and +** return a pointer to the head of that list. +*/ +static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){ +  fuzzer_rule head; +  fuzzer_rule *pTail; + +  pTail =  &head; +  while( pA && pB ){ +    if( pA->rCost<=pB->rCost ){ +      pTail->pNext = pA; +      pTail = pA; +      pA = pA->pNext; +    }else{ +      pTail->pNext = pB; +      pTail = pB; +      pB = pB->pNext; +    } +  } +  if( pA==0 ){ +    pTail->pNext = pB; +  }else{ +    pTail->pNext = pA; +  } +  return head.pNext; +} + +/* +** Statement pStmt currently points to a row in the fuzzer data table. This +** function allocates and populates a fuzzer_rule structure according to +** the content of the row. +** +** If successful, *ppRule is set to point to the new object and SQLITE_OK +** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point +** to an error message and an SQLite error code returned. +*/ +static int fuzzerLoadOneRule( +  fuzzer_vtab *p,                 /* Fuzzer virtual table handle */ +  sqlite3_stmt *pStmt,            /* Base rule on statements current row */ +  fuzzer_rule **ppRule,           /* OUT: New rule object */ +  char **pzErr                    /* OUT: Error message */ +){ +  sqlite3_int64 iRuleset = sqlite3_column_int64(pStmt, 0); +  const char *zFrom = (const char *)sqlite3_column_text(pStmt, 1); +  const char *zTo = (const char *)sqlite3_column_text(pStmt, 2); +  int nCost = sqlite3_column_int(pStmt, 3); + +  int rc = SQLITE_OK;             /* Return code */ +  int nFrom;                      /* Size of string zFrom, in bytes */ +  int nTo;                        /* Size of string zTo, in bytes */ +  fuzzer_rule *pRule = 0;         /* New rule object to return */ + +  if( zFrom==0 ) zFrom = ""; +  if( zTo==0 ) zTo = ""; +  nFrom = (int)strlen(zFrom); +  nTo = (int)strlen(zTo); + +  /* Silently ignore null transformations */ +  if( strcmp(zFrom, zTo)==0 ){ +    *ppRule = 0; +    return SQLITE_OK; +  } + +  if( nCost<=0 || nCost>FUZZER_MX_COST ){ +    *pzErr = sqlite3_mprintf("%s: cost must be between 1 and %d",  +        p->zClassName, FUZZER_MX_COST +    ); +    rc = SQLITE_ERROR; +  }else +  if( nFrom>FUZZER_MX_LENGTH || nTo>FUZZER_MX_LENGTH ){ +    *pzErr = sqlite3_mprintf("%s: maximum string length is %d",  +        p->zClassName, FUZZER_MX_LENGTH +    ); +    rc = SQLITE_ERROR;     +  }else +  if( iRuleset<0 || iRuleset>FUZZER_MX_RULEID ){ +    *pzErr = sqlite3_mprintf("%s: ruleset must be between 0 and %d",  +        p->zClassName, FUZZER_MX_RULEID +    ); +    rc = SQLITE_ERROR;     +  }else{ + +    pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); +    if( pRule==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      memset(pRule, 0, sizeof(*pRule)); +      pRule->zFrom = &pRule->zTo[nTo+1]; +      pRule->nFrom = nFrom; +      memcpy(pRule->zFrom, zFrom, nFrom+1); +      memcpy(pRule->zTo, zTo, nTo+1); +      pRule->nTo = nTo; +      pRule->rCost = nCost; +      pRule->iRuleset = (int)iRuleset; +    } +  } + +  *ppRule = pRule; +  return rc; +} + +/* +** Load the content of the fuzzer data table into memory. +*/ +static int fuzzerLoadRules( +  sqlite3 *db,                    /* Database handle */ +  fuzzer_vtab *p,                 /* Virtual fuzzer table to configure */ +  const char *zDb,                /* Database containing rules data */ +  const char *zData,              /* Table containing rules data */ +  char **pzErr                    /* OUT: Error message */ +){ +  int rc = SQLITE_OK;             /* Return code */ +  char *zSql;                     /* SELECT used to read from rules table */ +  fuzzer_rule *pHead = 0; + +  zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zData); +  if( zSql==0 ){ +    rc = SQLITE_NOMEM; +  }else{ +    int rc2;                      /* finalize() return code */ +    sqlite3_stmt *pStmt = 0; +    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); +    if( rc!=SQLITE_OK ){ +      *pzErr = sqlite3_mprintf("%s: %s", p->zClassName, sqlite3_errmsg(db)); +    }else if( sqlite3_column_count(pStmt)!=4 ){ +      *pzErr = sqlite3_mprintf("%s: %s has %d columns, expected 4", +          p->zClassName, zData, sqlite3_column_count(pStmt) +      ); +      rc = SQLITE_ERROR; +    }else{ +      while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ +        fuzzer_rule *pRule = 0; +        rc = fuzzerLoadOneRule(p, pStmt, &pRule, pzErr); +        if( pRule ){ +          pRule->pNext = pHead; +          pHead = pRule; +        } +      } +    } +    rc2 = sqlite3_finalize(pStmt); +    if( rc==SQLITE_OK ) rc = rc2; +  } +  sqlite3_free(zSql); + +  /* All rules are now in a singly linked list starting at pHead. This +  ** block sorts them by cost and then sets fuzzer_vtab.pRule to point to  +  ** point to the head of the sorted list. +  */ +  if( rc==SQLITE_OK ){ +    unsigned int i; +    fuzzer_rule *pX; +    fuzzer_rule *a[15]; +    for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0; +    while( (pX = pHead)!=0 ){ +      pHead = pX->pNext; +      pX->pNext = 0; +      for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){ +        pX = fuzzerMergeRules(a[i], pX); +        a[i] = 0; +      } +      a[i] = fuzzerMergeRules(a[i], pX); +    } +    for(pX=a[0], i=1; i<sizeof(a)/sizeof(a[0]); i++){ +      pX = fuzzerMergeRules(a[i], pX); +    } +    p->pRule = fuzzerMergeRules(p->pRule, pX); +  }else{ +    /* An error has occurred. Setting p->pRule to point to the head of the +    ** allocated list ensures that the list will be cleaned up in this case. +    */ +    assert( p->pRule==0 ); +    p->pRule = pHead; +  } + +  return rc; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc()  +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +**     "abc"   becomes   abc +**     'xyz'   becomes   xyz +**     [pqr]   becomes   pqr +**     `mno`   becomes   mno +*/ +static char *fuzzerDequote(const char *zIn){ +  int nIn;                        /* Size of input string, in bytes */ +  char *zOut;                     /* Output (dequoted) string */ + +  nIn = (int)strlen(zIn); +  zOut = sqlite3_malloc(nIn+1); +  if( zOut ){ +    char q = zIn[0];              /* Quote character (if any ) */ + +    if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ +      memcpy(zOut, zIn, nIn+1); +    }else{ +      int iOut = 0;               /* Index of next byte to write to output */ +      int iIn;                    /* Index of next byte to read from input */ + +      if( q=='[' ) q = ']'; +      for(iIn=1; iIn<nIn; iIn++){ +        if( zIn[iIn]==q ) iIn++; +        zOut[iOut++] = zIn[iIn]; +      } +    } +    assert( (int)strlen(zOut)<=nIn ); +  } +  return zOut; +} + +/* +** xDisconnect/xDestroy method for the fuzzer module. +*/ +static int fuzzerDisconnect(sqlite3_vtab *pVtab){ +  fuzzer_vtab *p = (fuzzer_vtab*)pVtab; +  assert( p->nCursor==0 ); +  while( p->pRule ){ +    fuzzer_rule *pRule = p->pRule; +    p->pRule = pRule->pNext; +    sqlite3_free(pRule); +  } +  sqlite3_free(p); +  return SQLITE_OK; +} + +/* +** xConnect/xCreate method for the fuzzer module. Arguments are: +** +**   argv[0]   -> module name  ("fuzzer") +**   argv[1]   -> database name +**   argv[2]   -> table name +**   argv[3]   -> fuzzer rule table name +*/ +static int fuzzerConnect( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVtab, +  char **pzErr +){ +  int rc = SQLITE_OK;             /* Return code */ +  fuzzer_vtab *pNew = 0;          /* New virtual table */ +  const char *zModule = argv[0]; +  const char *zDb = argv[1]; + +  if( argc!=4 ){ +    *pzErr = sqlite3_mprintf( +        "%s: wrong number of CREATE VIRTUAL TABLE arguments", zModule +    ); +    rc = SQLITE_ERROR; +  }else{ +    int nModule;                  /* Length of zModule, in bytes */ + +    nModule = (int)strlen(zModule); +    pNew = sqlite3_malloc( sizeof(*pNew) + nModule + 1); +    if( pNew==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      char *zTab;                 /* Dequoted name of fuzzer data table */ + +      memset(pNew, 0, sizeof(*pNew)); +      pNew->zClassName = (char*)&pNew[1]; +      memcpy(pNew->zClassName, zModule, nModule+1); + +      zTab = fuzzerDequote(argv[3]); +      if( zTab==0 ){ +        rc = SQLITE_NOMEM; +      }else{ +        rc = fuzzerLoadRules(db, pNew, zDb, zTab, pzErr); +        sqlite3_free(zTab); +      } + +      if( rc==SQLITE_OK ){ +        rc = sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,ruleset)"); +      } +      if( rc!=SQLITE_OK ){ +        fuzzerDisconnect((sqlite3_vtab *)pNew); +        pNew = 0; +      } +    } +  } + +  *ppVtab = (sqlite3_vtab *)pNew; +  return rc; +} + +/* +** Open a new fuzzer cursor. +*/ +static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ +  fuzzer_vtab *p = (fuzzer_vtab*)pVTab; +  fuzzer_cursor *pCur; +  pCur = sqlite3_malloc( sizeof(*pCur) ); +  if( pCur==0 ) return SQLITE_NOMEM; +  memset(pCur, 0, sizeof(*pCur)); +  pCur->pVtab = p; +  *ppCursor = &pCur->base; +  p->nCursor++; +  return SQLITE_OK; +} + +/* +** Free all stems in a list. +*/ +static void fuzzerClearStemList(fuzzer_stem *pStem){ +  while( pStem ){ +    fuzzer_stem *pNext = pStem->pNext; +    sqlite3_free(pStem); +    pStem = pNext; +  } +} + +/* +** Free up all the memory allocated by a cursor.  Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){ +  int i; +  fuzzerClearStemList(pCur->pStem); +  fuzzerClearStemList(pCur->pDone); +  for(i=0; i<FUZZER_NQUEUE; i++) fuzzerClearStemList(pCur->aQueue[i]); +  pCur->rLimit = (fuzzer_cost)0; +  if( clearHash && pCur->nStem ){ +    pCur->mxQueue = 0; +    pCur->pStem = 0; +    pCur->pDone = 0; +    memset(pCur->aQueue, 0, sizeof(pCur->aQueue)); +    memset(pCur->apHash, 0, sizeof(pCur->apHash)); +  } +  pCur->nStem = 0; +} + +/* +** Close a fuzzer cursor. +*/ +static int fuzzerClose(sqlite3_vtab_cursor *cur){ +  fuzzer_cursor *pCur = (fuzzer_cursor *)cur; +  fuzzerClearCursor(pCur, 0); +  sqlite3_free(pCur->zBuf); +  pCur->pVtab->nCursor--; +  sqlite3_free(pCur); +  return SQLITE_OK; +} + +/* +** Compute the current output term for a fuzzer_stem. +*/ +static int fuzzerRender( +  fuzzer_stem *pStem,   /* The stem to be rendered */ +  char **pzBuf,         /* Write results into this buffer.  realloc if needed */ +  int *pnBuf            /* Size of the buffer */ +){ +  const fuzzer_rule *pRule = pStem->pRule; +  int n;                          /* Size of output term without nul-term */ +  char *z;                        /* Buffer to assemble output term in */ + +  n = pStem->nBasis + pRule->nTo - pRule->nFrom; +  if( (*pnBuf)<n+1 ){ +    (*pzBuf) = sqlite3_realloc((*pzBuf), n+100); +    if( (*pzBuf)==0 ) return SQLITE_NOMEM; +    (*pnBuf) = n+100; +  } +  n = pStem->n; +  z = *pzBuf; +  if( n<0 ){ +    memcpy(z, pStem->zBasis, pStem->nBasis+1); +  }else{ +    memcpy(z, pStem->zBasis, n); +    memcpy(&z[n], pRule->zTo, pRule->nTo); +    memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom],  +           pStem->nBasis-n-pRule->nFrom+1); +  } + +  assert( z[pStem->nBasis + pRule->nTo - pRule->nFrom]==0 ); +  return SQLITE_OK; +} + +/* +** Compute a hash on zBasis. +*/ +static unsigned int fuzzerHash(const char *z){ +  unsigned int h = 0; +  while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); } +  return h % FUZZER_HASH; +} + +/* +** Current cost of a stem +*/ +static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){ +  return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost; +} + +#if 0 +/* +** Print a description of a fuzzer_stem on stderr. +*/ +static void fuzzerStemPrint( +  const char *zPrefix, +  fuzzer_stem *pStem, +  const char *zSuffix +){ +  if( pStem->n<0 ){ +    fprintf(stderr, "%s[%s](%d)-->self%s", +       zPrefix, +       pStem->zBasis, pStem->rBaseCost, +       zSuffix +    ); +  }else{ +    char *zBuf = 0; +    int nBuf = 0; +    if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return; +    fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s", +      zPrefix, +      pStem->zBasis, pStem->rBaseCost, zBuf, pStem->, +      zSuffix +    ); +    sqlite3_free(zBuf); +  } +} +#endif + +/* +** Return 1 if the string to which the cursor is point has already +** been emitted.  Return 0 if not.  Return -1 on a memory allocation +** failures. +*/ +static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){ +  unsigned int h; +  fuzzer_stem *pLookup; + +  if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ +    return -1; +  } +  h = fuzzerHash(pCur->zBuf); +  pLookup = pCur->apHash[h]; +  while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){ +    pLookup = pLookup->pHash; +  } +  return pLookup!=0; +} + +/* +** If argument pRule is NULL, this function returns false. +** +** Otherwise, it returns true if rule pRule should be skipped. A rule  +** should be skipped if it does not belong to rule-set iRuleset, or if +** applying it to stem pStem would create a string longer than  +** FUZZER_MX_OUTPUT_LENGTH bytes. +*/ +static int fuzzerSkipRule( +  const fuzzer_rule *pRule,       /* Determine whether or not to skip this */ +  fuzzer_stem *pStem,             /* Stem rule may be applied to */ +  int iRuleset                    /* Rule-set used by the current query */ +){ +  return pRule && ( +      (pRule->iRuleset!=iRuleset) +   || (pStem->nBasis + pRule->nTo - pRule->nFrom)>FUZZER_MX_OUTPUT_LENGTH +  ); +} + +/* +** Advance a fuzzer_stem to its next value.   Return 0 if there are +** no more values that can be generated by this fuzzer_stem.  Return +** -1 on a memory allocation failure. +*/ +static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ +  const fuzzer_rule *pRule; +  while( (pRule = pStem->pRule)!=0 ){ +    assert( pRule==&pCur->nullRule || pRule->iRuleset==pCur->iRuleset ); +    while( pStem->n < pStem->nBasis - pRule->nFrom ){ +      pStem->n++; +      if( pRule->nFrom==0 +       || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0 +      ){ +        /* Found a rewrite case.  Make sure it is not a duplicate */ +        int rc = fuzzerSeen(pCur, pStem); +        if( rc<0 ) return -1; +        if( rc==0 ){ +          fuzzerCost(pStem); +          return 1; +        } +      } +    } +    pStem->n = -1; +    do{ +      pRule = pRule->pNext; +    }while( fuzzerSkipRule(pRule, pStem, pCur->iRuleset) ); +    pStem->pRule = pRule; +    if( pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0; +  } +  return 0; +} + +/* +** The two input stem lists are both sorted in order of increasing +** rCostX.  Merge them together into a single list, sorted by rCostX, and +** return a pointer to the head of that new list. +*/ +static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){ +  fuzzer_stem head; +  fuzzer_stem *pTail; + +  pTail =  &head; +  while( pA && pB ){ +    if( pA->rCostX<=pB->rCostX ){ +      pTail->pNext = pA; +      pTail = pA; +      pA = pA->pNext; +    }else{ +      pTail->pNext = pB; +      pTail = pB; +      pB = pB->pNext; +    } +  } +  if( pA==0 ){ +    pTail->pNext = pB; +  }else{ +    pTail->pNext = pA; +  } +  return head.pNext; +} + +/* +** Load pCur->pStem with the lowest-cost stem.  Return a pointer +** to the lowest-cost stem. +*/ +static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){ +  fuzzer_stem *pBest, *pX; +  int iBest; +  int i; + +  if( pCur->pStem==0 ){ +    iBest = -1; +    pBest = 0; +    for(i=0; i<=pCur->mxQueue; i++){ +      pX = pCur->aQueue[i]; +      if( pX==0 ) continue; +      if( pBest==0 || pBest->rCostX>pX->rCostX ){ +        pBest = pX; +        iBest = i; +      } +    }  +    if( pBest ){ +      pCur->aQueue[iBest] = pBest->pNext; +      pBest->pNext = 0; +      pCur->pStem = pBest; +    } +  } +  return pCur->pStem; +} + +/* +** Insert pNew into queue of pending stems.  Then find the stem +** with the lowest rCostX and move it into pCur->pStem. +** list.  The insert is done such the pNew is in the correct order +** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost. +*/ +static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){ +  fuzzer_stem *pX; +  int i; + +  /* If pCur->pStem exists and is greater than pNew, then make pNew +  ** the new pCur->pStem and insert the old pCur->pStem instead. +  */ +  if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){ +    pNew->pNext = 0; +    pCur->pStem = pNew; +    pNew = pX; +  } + +  /* Insert the new value */ +  pNew->pNext = 0; +  pX = pNew; +  for(i=0; i<=pCur->mxQueue; i++){ +    if( pCur->aQueue[i] ){ +      pX = fuzzerMergeStems(pX, pCur->aQueue[i]); +      pCur->aQueue[i] = 0; +    }else{ +      pCur->aQueue[i] = pX; +      break; +    } +  } +  if( i>pCur->mxQueue ){ +    if( i<FUZZER_NQUEUE ){ +      pCur->mxQueue = i; +      pCur->aQueue[i] = pX; +    }else{ +      assert( pCur->mxQueue==FUZZER_NQUEUE-1 ); +      pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]); +      pCur->aQueue[FUZZER_NQUEUE-1] = pX; +    } +  } + +  return fuzzerLowestCostStem(pCur); +} + +/* +** Allocate a new fuzzer_stem.  Add it to the hash table but do not +** link it into either the pCur->pStem or pCur->pDone lists. +*/ +static fuzzer_stem *fuzzerNewStem( +  fuzzer_cursor *pCur, +  const char *zWord, +  fuzzer_cost rBaseCost +){ +  fuzzer_stem *pNew; +  fuzzer_rule *pRule; +  unsigned int h; + +  pNew = sqlite3_malloc( sizeof(*pNew) + (int)strlen(zWord) + 1 ); +  if( pNew==0 ) return 0; +  memset(pNew, 0, sizeof(*pNew)); +  pNew->zBasis = (char*)&pNew[1]; +  pNew->nBasis = (int)strlen(zWord); +  memcpy(pNew->zBasis, zWord, pNew->nBasis+1); +  pRule = pCur->pVtab->pRule; +  while( fuzzerSkipRule(pRule, pNew, pCur->iRuleset) ){ +    pRule = pRule->pNext; +  } +  pNew->pRule = pRule; +  pNew->n = -1; +  pNew->rBaseCost = pNew->rCostX = rBaseCost; +  h = fuzzerHash(pNew->zBasis); +  pNew->pHash = pCur->apHash[h]; +  pCur->apHash[h] = pNew; +  pCur->nStem++; +  return pNew; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int fuzzerNext(sqlite3_vtab_cursor *cur){ +  fuzzer_cursor *pCur = (fuzzer_cursor*)cur; +  int rc; +  fuzzer_stem *pStem, *pNew; + +  pCur->iRowid++; + +  /* Use the element the cursor is currently point to to create +  ** a new stem and insert the new stem into the priority queue. +  */ +  pStem = pCur->pStem; +  if( pStem->rCostX>0 ){ +    rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf); +    if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM; +    pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX); +    if( pNew ){ +      if( fuzzerAdvance(pCur, pNew)==0 ){ +        pNew->pNext = pCur->pDone; +        pCur->pDone = pNew; +      }else{ +        if( fuzzerInsert(pCur, pNew)==pNew ){ +          return SQLITE_OK; +        } +      } +    }else{ +      return SQLITE_NOMEM; +    } +  } + +  /* Adjust the priority queue so that the first element of the +  ** stem list is the next lowest cost word. +  */ +  while( (pStem = pCur->pStem)!=0 ){ +    int res = fuzzerAdvance(pCur, pStem); +    if( res<0 ){ +      return SQLITE_NOMEM; +    }else if( res>0 ){ +      pCur->pStem = 0; +      pStem = fuzzerInsert(pCur, pStem); +      if( (rc = fuzzerSeen(pCur, pStem))!=0 ){ +        if( rc<0 ) return SQLITE_NOMEM; +        continue; +      } +      return SQLITE_OK;  /* New word found */ +    } +    pCur->pStem = 0; +    pStem->pNext = pCur->pDone; +    pCur->pDone = pStem; +    if( fuzzerLowestCostStem(pCur) ){ +      rc = fuzzerSeen(pCur, pCur->pStem); +      if( rc<0 ) return SQLITE_NOMEM; +      if( rc==0 ){ +        return SQLITE_OK; +      } +    } +  } + +  /* Reach this point only if queue has been exhausted and there is +  ** nothing left to be output. */ +  pCur->rLimit = (fuzzer_cost)0; +  return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again.  Always called at least once +** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call. +*/ +static int fuzzerFilter( +  sqlite3_vtab_cursor *pVtabCursor,  +  int idxNum, const char *idxStr, +  int argc, sqlite3_value **argv +){ +  fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor; +  const char *zWord = ""; +  fuzzer_stem *pStem; +  int idx; + +  fuzzerClearCursor(pCur, 1); +  pCur->rLimit = 2147483647; +  idx = 0; +  if( idxNum & 1 ){ +    zWord = (const char*)sqlite3_value_text(argv[0]); +    idx++; +  } +  if( idxNum & 2 ){ +    pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[idx]); +    idx++; +  } +  if( idxNum & 4 ){ +    pCur->iRuleset = (fuzzer_cost)sqlite3_value_int(argv[idx]); +    idx++; +  } +  pCur->nullRule.pNext = pCur->pVtab->pRule; +  pCur->nullRule.rCost = 0; +  pCur->nullRule.nFrom = 0; +  pCur->nullRule.nTo = 0; +  pCur->nullRule.zFrom = ""; +  pCur->iRowid = 1; +  assert( pCur->pStem==0 ); + +  /* If the query term is longer than FUZZER_MX_OUTPUT_LENGTH bytes, this +  ** query will return zero rows.  */ +  if( (int)strlen(zWord)<FUZZER_MX_OUTPUT_LENGTH ){ +    pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0); +    if( pStem==0 ) return SQLITE_NOMEM; +    pStem->pRule = &pCur->nullRule; +    pStem->n = pStem->nBasis; +  }else{ +    pCur->rLimit = 0; +  } + +  return SQLITE_OK; +} + +/* +** Only the word and distance columns have values.  All other columns +** return NULL +*/ +static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ +  fuzzer_cursor *pCur = (fuzzer_cursor*)cur; +  if( i==0 ){ +    /* the "word" column */ +    if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ +      return SQLITE_NOMEM; +    } +    sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT); +  }else if( i==1 ){ +    /* the "distance" column */ +    sqlite3_result_int(ctx, pCur->pStem->rCostX); +  }else{ +    /* All other columns are NULL */ +    sqlite3_result_null(ctx); +  } +  return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ +  fuzzer_cursor *pCur = (fuzzer_cursor*)cur; +  *pRowid = pCur->iRowid; +  return SQLITE_OK; +} + +/* +** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal +** that the cursor has nothing more to output. +*/ +static int fuzzerEof(sqlite3_vtab_cursor *cur){ +  fuzzer_cursor *pCur = (fuzzer_cursor*)cur; +  return pCur->rLimit<=(fuzzer_cost)0; +} + +/* +** Search for terms of these forms: +** +**   (A)    word MATCH $str +**   (B1)   distance < $value +**   (B2)   distance <= $value +**   (C)    ruleid == $ruleid +** +** The distance< and distance<= are both treated as distance<=. +** The query plan number is a bit vector: +** +**   bit 1:   Term of the form (A) found +**   bit 2:   Term like (B1) or (B2) found +**   bit 3:   Term like (C) found +** +** If bit-1 is set, $str is always in filter.argv[0].  If bit-2 is set +** then $value is in filter.argv[0] if bit-1 is clear and is in  +** filter.argv[1] if bit-1 is set.  If bit-3 is set, then $ruleid is +** in filter.argv[0] if bit-1 and bit-2 are both zero, is in +** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in +** filter.argv[2] if both bit-1 and bit-2 are set. +*/ +static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ +  int iPlan = 0; +  int iDistTerm = -1; +  int iRulesetTerm = -1; +  int i; +  const struct sqlite3_index_constraint *pConstraint; +  pConstraint = pIdxInfo->aConstraint; +  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ +    if( pConstraint->usable==0 ) continue; +    if( (iPlan & 1)==0  +     && pConstraint->iColumn==0 +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH +    ){ +      iPlan |= 1; +      pIdxInfo->aConstraintUsage[i].argvIndex = 1; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } +    if( (iPlan & 2)==0 +     && pConstraint->iColumn==1 +     && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT +           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) +    ){ +      iPlan |= 2; +      iDistTerm = i; +    } +    if( (iPlan & 4)==0 +     && pConstraint->iColumn==2 +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 4; +      pIdxInfo->aConstraintUsage[i].omit = 1; +      iRulesetTerm = i; +    } +  } +  if( iPlan & 2 ){ +    pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); +  } +  if( iPlan & 4 ){ +    int idx = 1; +    if( iPlan & 1 ) idx++; +    if( iPlan & 2 ) idx++; +    pIdxInfo->aConstraintUsage[iRulesetTerm].argvIndex = idx; +  } +  pIdxInfo->idxNum = iPlan; +  if( pIdxInfo->nOrderBy==1 +   && pIdxInfo->aOrderBy[0].iColumn==1 +   && pIdxInfo->aOrderBy[0].desc==0 +  ){ +    pIdxInfo->orderByConsumed = 1; +  } +  pIdxInfo->estimatedCost = (double)10000; +    +  return SQLITE_OK; +} + +/* +** A virtual table module that implements the "fuzzer". +*/ +static sqlite3_module fuzzerModule = { +  0,                           /* iVersion */ +  fuzzerConnect, +  fuzzerConnect, +  fuzzerBestIndex, +  fuzzerDisconnect,  +  fuzzerDisconnect, +  fuzzerOpen,                  /* xOpen - open a cursor */ +  fuzzerClose,                 /* xClose - close a cursor */ +  fuzzerFilter,                /* xFilter - configure scan constraints */ +  fuzzerNext,                  /* xNext - advance a cursor */ +  fuzzerEof,                   /* xEof - check for end of scan */ +  fuzzerColumn,                /* xColumn - read data */ +  fuzzerRowid,                 /* xRowid - read data */ +  0,                           /* xUpdate */ +  0,                           /* xBegin */ +  0,                           /* xSync */ +  0,                           /* xCommit */ +  0,                           /* xRollback */ +  0,                           /* xFindMethod */ +  0,                           /* xRename */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fuzzer_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +#ifndef SQLITE_OMIT_VIRTUALTABLE +  rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0); +#endif +  return rc; +} diff --git a/ext/misc/ieee754.c b/ext/misc/ieee754.c new file mode 100644 index 0000000..436b11e --- /dev/null +++ b/ext/misc/ieee754.c @@ -0,0 +1,131 @@ +/* +** 2013-04-17 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite extension implements functions for the exact display +** and input of IEEE754 Binary64 floating-point numbers. +** +**   ieee754(X) +**   ieee754(Y,Z) +** +** In the first form, the value X should be a floating-point number. +** The function will return a string of the form 'ieee754(Y,Z)' where +** Y and Z are integers such that X==Y*pow(w.0,Z). +** +** In the second form, Y and Z are integers which are the mantissa and +** base-2 exponent of a new floating point number.  The function returns +** a floating-point value equal to Y*pow(2.0,Z). +** +** Examples: +** +**     ieee754(2.0)       ->     'ieee754(2,0)' +**     ieee754(45.25)     ->     'ieee754(181,-2)' +**     ieee754(2, 0)      ->     2.0 +**     ieee754(181, -2)   ->     45.25 +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <assert.h> +#include <string.h> + +/* +** Implementation of the ieee754() function +*/ +static void ieee754func( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  if( argc==1 ){ +    sqlite3_int64 m, a; +    double r; +    int e; +    int isNeg; +    char zResult[100]; +    assert( sizeof(m)==sizeof(r) ); +    if( sqlite3_value_type(argv[0])!=SQLITE_FLOAT ) return; +    r = sqlite3_value_double(argv[0]); +    if( r<0.0 ){ +      isNeg = 1; +      r = -r; +    }else{ +      isNeg = 0; +    } +    memcpy(&a,&r,sizeof(a)); +    if( a==0 ){ +      e = 0; +      m = 0; +    }else{ +      e = a>>52; +      m = a & ((((sqlite3_int64)1)<<52)-1); +      m |= ((sqlite3_int64)1)<<52; +      while( e<1075 && m>0 && (m&1)==0 ){ +        m >>= 1; +        e++; +      } +      if( isNeg ) m = -m; +    } +    sqlite3_snprintf(sizeof(zResult), zResult, "ieee754(%lld,%d)", +                     m, e-1075); +    sqlite3_result_text(context, zResult, -1, SQLITE_TRANSIENT); +  }else if( argc==2 ){ +    sqlite3_int64 m, e, a; +    double r; +    int isNeg = 0; +    m = sqlite3_value_int64(argv[0]); +    e = sqlite3_value_int64(argv[1]); +    if( m<0 ){ +      isNeg = 1; +      m = -m; +      if( m<0 ) return; +    }else if( m==0 && e>1000 && e<1000 ){ +      sqlite3_result_double(context, 0.0); +      return; +    } +    while( (m>>32)&0xffe00000 ){ +      m >>= 1; +      e++; +    } +    while( ((m>>32)&0xfff00000)==0 ){ +      m <<= 1; +      e--; +    } +    e += 1075; +    if( e<0 ) e = m = 0; +    if( e>0x7ff ) m = 0; +    a = m & ((((sqlite3_int64)1)<<52)-1); +    a |= e<<52; +    if( isNeg ) a |= ((sqlite3_int64)1)<<63; +    memcpy(&r, &a, sizeof(r)); +    sqlite3_result_double(context, r); +  } +} + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_ieee_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  (void)pzErrMsg;  /* Unused parameter */ +  rc = sqlite3_create_function(db, "ieee754", 1, SQLITE_UTF8, 0, +                               ieee754func, 0, 0); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "ieee754", 2, SQLITE_UTF8, 0, +                                 ieee754func, 0, 0); +  } +  return rc; +} diff --git a/ext/misc/nextchar.c b/ext/misc/nextchar.c new file mode 100644 index 0000000..e063043 --- /dev/null +++ b/ext/misc/nextchar.c @@ -0,0 +1,265 @@ +/* +** 2013-02-28 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code to implement the next_char(A,T,F,W) SQL function. +** +** The next_char(A,T,F,H) function finds all valid "next" characters for +** string A given the vocabulary in T.F.  The T.F field should be indexed. +** If the W value exists and is a non-empty string, then it is an SQL +** expression that limits the entries in T.F that will be considered. +** +** For example, suppose an application has a dictionary like this: +** +**   CREATE TABLE dictionary(word TEXT UNIQUE); +** +** Further suppose that for user keypad entry, it is desired to disable +** (gray out) keys that are not valid as the next character.  If the +** the user has previously entered (say) 'cha' then to find all allowed +** next characters (and thereby determine when keys should not be grayed +** out) run the following query: +** +**   SELECT next_char('cha','dictionary','word'); +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <string.h> + +/* +** A structure to hold context of the next_char() computation across +** nested function calls. +*/ +typedef struct nextCharContext nextCharContext; +struct nextCharContext { +  sqlite3 *db;                      /* Database connection */ +  sqlite3_stmt *pStmt;              /* Prepared statement used to query */ +  const unsigned char *zPrefix;     /* Prefix to scan */ +  int nPrefix;                      /* Size of zPrefix in bytes */ +  int nAlloc;                       /* Space allocated to aResult */ +  int nUsed;                        /* Space used in aResult */ +  unsigned int *aResult;            /* Array of next characters */ +  int mallocFailed;                 /* True if malloc fails */ +  int otherError;                   /* True for any other failure */ +}; + +/* +** Append a result character if the character is not already in the +** result. +*/ +static void nextCharAppend(nextCharContext *p, unsigned c){ +  int i; +  for(i=0; i<p->nUsed; i++){ +    if( p->aResult[i]==c ) return; +  } +  if( p->nUsed+1 > p->nAlloc ){ +    unsigned int *aNew; +    int n = p->nAlloc*2 + 30; +    aNew = sqlite3_realloc(p->aResult, n*sizeof(unsigned int)); +    if( aNew==0 ){ +      p->mallocFailed = 1; +      return; +    }else{ +      p->aResult = aNew; +      p->nAlloc = n; +    } +  } +  p->aResult[p->nUsed++] = c; +} + +/* +** Write a character into z[] as UTF8.  Return the number of bytes needed +** to hold the character +*/ +static int writeUtf8(unsigned char *z, unsigned c){ +  if( c<0x00080 ){ +    z[0] = (unsigned char)(c&0xff); +    return 1; +  } +  if( c<0x00800 ){ +    z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F); +    z[1] = 0x80 + (unsigned char)(c & 0x3F); +    return 2; +  } +  if( c<0x10000 ){ +    z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F); +    z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F); +    z[2] = 0x80 + (unsigned char)(c & 0x3F); +    return 3; +  } +  z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07); +  z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F); +  z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F); +  z[3] = 0x80 + (unsigned char)(c & 0x3F); +  return 4; +} + +/* +** Read a UTF8 character out of z[] and write it into *pOut.  Return +** the number of bytes in z[] that were used to construct the character. +*/ +static int readUtf8(const unsigned char *z, unsigned *pOut){ +  static const unsigned char validBits[] = { +    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, +    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, +    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +    0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +  }; +  unsigned c = z[0]; +  if( c<0xc0 ){ +    *pOut = c; +    return 1; +  }else{ +    int n = 1; +    c = validBits[c-0xc0]; +    while( (z[n] & 0xc0)==0x80 ){ +      c = (c<<6) + (0x3f & z[n++]); +    } +    if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ +      c = 0xFFFD; +    } +    *pOut = c; +    return n; +  } +} + +/* +** The nextCharContext structure has been set up.  Add all "next" characters +** to the result set. +*/ +static void findNextChars(nextCharContext *p){ +  unsigned cPrev = 0; +  unsigned char zPrev[8]; +  int n, rc; +   +  for(;;){ +    sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix, +                      SQLITE_STATIC); +    n = writeUtf8(zPrev, cPrev+1); +    sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC); +    rc = sqlite3_step(p->pStmt); +    if( rc==SQLITE_DONE ){ +      sqlite3_reset(p->pStmt); +      return; +    }else if( rc!=SQLITE_ROW ){ +      p->otherError = rc; +      return; +    }else{ +      const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0); +      unsigned cNext; +      n = readUtf8(zOut+p->nPrefix, &cNext); +      sqlite3_reset(p->pStmt); +      nextCharAppend(p, cNext); +      cPrev = cNext; +      if( p->mallocFailed ) return; +    } +  } +} + + +/* +** next_character(A,T,F,W) +** +** Return a string composted of all next possible characters after +** A for elements of T.F.  If W is supplied, then it is an SQL expression +** that limits the elements in T.F that are considered. +*/ +static void nextCharFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  nextCharContext c; +  const unsigned char *zTable = sqlite3_value_text(argv[1]); +  const unsigned char *zField = sqlite3_value_text(argv[2]); +  const unsigned char *zWhere; +  char *zSql; +  int rc; + +  memset(&c, 0, sizeof(c)); +  c.db = sqlite3_context_db_handle(context); +  c.zPrefix = sqlite3_value_text(argv[0]); +  c.nPrefix = sqlite3_value_bytes(argv[0]); +  if( zTable==0 || zField==0 || c.zPrefix==0 ) return; +  if( argc<4 +   || (zWhere = sqlite3_value_text(argv[3]))==0 +   || zWhere[0]==0 +  ){ +    zSql = sqlite3_mprintf( +        "SELECT \"%w\" FROM \"%w\"" +        " WHERE \"%w\">=(?1 || ?2)" +        "   AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ +        " ORDER BY 1 ASC LIMIT 1", +        zField, zTable, zField, zField); +  }else{ +    zSql = sqlite3_mprintf( +        "SELECT \"%w\" FROM \"%w\"" +        " WHERE \"%w\">=(?1 || ?2)" +        "   AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ +        "   AND (%s)" +        " ORDER BY 1 ASC LIMIT 1", +        zField, zTable, zField, zField, zWhere); +  } +  if( zSql==0 ){ +    sqlite3_result_error_nomem(context); +    return; +  } + +  rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0); +  sqlite3_free(zSql); +  if( rc ){ +    sqlite3_result_error(context, sqlite3_errmsg(c.db), -1); +    return; +  } +  findNextChars(&c); +  if( c.mallocFailed ){ +    sqlite3_result_error_nomem(context); +  }else{ +    unsigned char *pRes; +    pRes = sqlite3_malloc( c.nUsed*4 + 1 ); +    if( pRes==0 ){ +      sqlite3_result_error_nomem(context); +    }else{ +      int i; +      int n = 0; +      for(i=0; i<c.nUsed; i++){ +        n += writeUtf8(pRes+n, c.aResult[i]); +      } +      pRes[n] = 0; +      sqlite3_result_text(context, (const char*)pRes, n, sqlite3_free); +    } +  } +  sqlite3_finalize(c.pStmt); +  sqlite3_free(c.aResult); +} + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_nextchar_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  (void)pzErrMsg;  /* Unused parameter */ +  rc = sqlite3_create_function(db, "next_char", 3, SQLITE_UTF8, 0, +                               nextCharFunc, 0, 0); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "next_char", 4, SQLITE_UTF8, 0, +                                 nextCharFunc, 0, 0); +  } +  return rc; +} diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c new file mode 100644 index 0000000..16fa7d0 --- /dev/null +++ b/ext/misc/regexp.c @@ -0,0 +1,756 @@ +/* +** 2012-11-13 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +****************************************************************************** +** +** The code in this file implements a compact but reasonably +** efficient regular-expression matcher for posix extended regular +** expressions against UTF8 text. +** +** This file is an SQLite extension.  It registers a single function +** named "regexp(A,B)" where A is the regular expression and B is the +** string to be matched.  By registering this function, SQLite will also +** then implement the "B regexp A" operator.  Note that with the function +** the regular expression comes first, but with the operator it comes +** second. +** +**  The following regular expression syntax is supported: +** +**     X*      zero or more occurrences of X +**     X+      one or more occurrences of X +**     X?      zero or one occurrences of X +**     X{p,q}  between p and q occurrences of X +**     (X)     match X +**     X|Y     X or Y +**     ^X      X occurring at the beginning of the string +**     X$      X occurring at the end of the string +**     .       Match any single character +**     \c      Character c where c is one of \{}()[]|*+?. +**     \c      C-language escapes for c in afnrtv.  ex: \t or \n +**     \uXXXX  Where XXXX is exactly 4 hex digits, unicode value XXXX +**     \xXX    Where XX is exactly 2 hex digits, unicode value XX +**     [abc]   Any single character from the set abc +**     [^abc]  Any single character not in the set abc +**     [a-z]   Any single character in the range a-z +**     [^a-z]  Any single character not in the range a-z +**     \b      Word boundary +**     \w      Word character.  [A-Za-z0-9_] +**     \W      Non-word character +**     \d      Digit +**     \D      Non-digit +**     \s      Whitespace character +**     \S      Non-whitespace character +** +** A nondeterministic finite automaton (NFA) is used for matching, so the +** performance is bounded by O(N*M) where N is the size of the regular +** expression and M is the size of the input string.  The matcher never +** exhibits exponential behavior.  Note that the X{p,q} operator expands +** to p copies of X following by q-p copies of X? and that the size of the +** regular expression in the O(N*M) performance bound is computed after +** this expansion. +*/ +#include <string.h> +#include <stdlib.h> +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +/* +** The following #defines change the names of some functions implemented in +** this file to prevent name collisions with C-library functions of the +** same name. +*/ +#define re_match   sqlite3re_match +#define re_compile sqlite3re_compile +#define re_free    sqlite3re_free + +/* The end-of-input character */ +#define RE_EOF            0    /* End of input */ + +/* The NFA is implemented as sequence of opcodes taken from the following +** set.  Each opcode has a single integer argument. +*/ +#define RE_OP_MATCH       1    /* Match the one character in the argument */ +#define RE_OP_ANY         2    /* Match any one character.  (Implements ".") */ +#define RE_OP_ANYSTAR     3    /* Special optimized version of .* */ +#define RE_OP_FORK        4    /* Continue to both next and opcode at iArg */ +#define RE_OP_GOTO        5    /* Jump to opcode at iArg */ +#define RE_OP_ACCEPT      6    /* Halt and indicate a successful match */ +#define RE_OP_CC_INC      7    /* Beginning of a [...] character class */ +#define RE_OP_CC_EXC      8    /* Beginning of a [^...] character class */ +#define RE_OP_CC_VALUE    9    /* Single value in a character class */ +#define RE_OP_CC_RANGE   10    /* Range of values in a character class */ +#define RE_OP_WORD       11    /* Perl word character [A-Za-z0-9_] */ +#define RE_OP_NOTWORD    12    /* Not a perl word character */ +#define RE_OP_DIGIT      13    /* digit:  [0-9] */ +#define RE_OP_NOTDIGIT   14    /* Not a digit */ +#define RE_OP_SPACE      15    /* space:  [ \t\n\r\v\f] */ +#define RE_OP_NOTSPACE   16    /* Not a digit */ +#define RE_OP_BOUNDARY   17    /* Boundary between word and non-word */ + +/* Each opcode is a "state" in the NFA */ +typedef unsigned short ReStateNumber; + +/* Because this is an NFA and not a DFA, multiple states can be active at +** once.  An instance of the following object records all active states in +** the NFA.  The implementation is optimized for the common case where the +** number of actives states is small. +*/ +typedef struct ReStateSet { +  unsigned nState;            /* Number of current states */ +  ReStateNumber *aState;      /* Current states */ +} ReStateSet; + +/* An input string read one character at a time. +*/ +typedef struct ReInput ReInput; +struct ReInput { +  const unsigned char *z;  /* All text */ +  int i;                   /* Next byte to read */ +  int mx;                  /* EOF when i>=mx */ +}; + +/* A compiled NFA (or an NFA that is in the process of being compiled) is +** an instance of the following object. +*/ +typedef struct ReCompiled ReCompiled; +struct ReCompiled { +  ReInput sIn;                /* Regular expression text */ +  const char *zErr;           /* Error message to return */ +  char *aOp;                  /* Operators for the virtual machine */ +  int *aArg;                  /* Arguments to each operator */ +  unsigned (*xNextChar)(ReInput*);  /* Next character function */ +  unsigned char zInit[12];    /* Initial text to match */ +  int nInit;                  /* Number of characters in zInit */ +  unsigned nState;            /* Number of entries in aOp[] and aArg[] */ +  unsigned nAlloc;            /* Slots allocated for aOp[] and aArg[] */ +}; + +/* Add a state to the given state set if it is not already there */ +static void re_add_state(ReStateSet *pSet, int newState){ +  unsigned i; +  for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return; +  pSet->aState[pSet->nState++] = newState; +} + +/* Extract the next unicode character from *pzIn and return it.  Advance +** *pzIn to the first byte past the end of the character returned.  To +** be clear:  this routine converts utf8 to unicode.  This routine is  +** optimized for the common case where the next character is a single byte. +*/ +static unsigned re_next_char(ReInput *p){ +  unsigned c; +  if( p->i>=p->mx ) return 0; +  c = p->z[p->i++]; +  if( c>=0x80 ){ +    if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){ +      c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f); +      if( c<0x80 ) c = 0xfffd; +    }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 +           && (p->z[p->i+1]&0xc0)==0x80 ){ +      c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); +      p->i += 2; +      if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; +    }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 +           && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ +      c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) +                       | (p->z[p->i+2]&0x3f); +      p->i += 3; +      if( c<=0xffff || c>0x10ffff ) c = 0xfffd; +    }else{ +      c = 0xfffd; +    } +  } +  return c; +} +static unsigned re_next_char_nocase(ReInput *p){ +  unsigned c = re_next_char(p); +  if( c>='A' && c<='Z' ) c += 'a' - 'A'; +  return c; +} + +/* Return true if c is a perl "word" character:  [A-Za-z0-9_] */ +static int re_word_char(int c){ +  return (c>='0' && c<='9') || (c>='a' && c<='z') +      || (c>='A' && c<='Z') || c=='_'; +} + +/* Return true if c is a "digit" character:  [0-9] */ +static int re_digit_char(int c){ +  return (c>='0' && c<='9'); +} + +/* Return true if c is a perl "space" character:  [ \t\r\n\v\f] */ +static int re_space_char(int c){ +  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; +} + +/* Run a compiled regular expression on the zero-terminated input +** string zIn[].  Return true on a match and false if there is no match. +*/ +static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ +  ReStateSet aStateSet[2], *pThis, *pNext; +  ReStateNumber aSpace[100]; +  ReStateNumber *pToFree; +  unsigned int i = 0; +  unsigned int iSwap = 0; +  int c = RE_EOF+1; +  int cPrev = 0; +  int rc = 0; +  ReInput in; + +  in.z = zIn; +  in.i = 0; +  in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn); + +  /* Look for the initial prefix match, if there is one. */ +  if( pRe->nInit ){ +    unsigned char x = pRe->zInit[0]; +    while( in.i+pRe->nInit<=in.mx  +     && (zIn[in.i]!=x || +         strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0) +    ){ +      in.i++; +    } +    if( in.i+pRe->nInit>in.mx ) return 0; +  } + +  if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ +    pToFree = 0; +    aStateSet[0].aState = aSpace; +  }else{ +    pToFree = sqlite3_malloc( sizeof(ReStateNumber)*2*pRe->nState ); +    if( pToFree==0 ) return -1; +    aStateSet[0].aState = pToFree; +  } +  aStateSet[1].aState = &aStateSet[0].aState[pRe->nState]; +  pNext = &aStateSet[1]; +  pNext->nState = 0; +  re_add_state(pNext, 0); +  while( c!=RE_EOF && pNext->nState>0 ){ +    cPrev = c; +    c = pRe->xNextChar(&in); +    pThis = pNext; +    pNext = &aStateSet[iSwap]; +    iSwap = 1 - iSwap; +    pNext->nState = 0; +    for(i=0; i<pThis->nState; i++){ +      int x = pThis->aState[i]; +      switch( pRe->aOp[x] ){ +        case RE_OP_MATCH: { +          if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_ANY: { +          re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_WORD: { +          if( re_word_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_NOTWORD: { +          if( !re_word_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_DIGIT: { +          if( re_digit_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_NOTDIGIT: { +          if( !re_digit_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_SPACE: { +          if( re_space_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_NOTSPACE: { +          if( !re_space_char(c) ) re_add_state(pNext, x+1); +          break; +        } +        case RE_OP_BOUNDARY: { +          if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); +          break; +        } +        case RE_OP_ANYSTAR: { +          re_add_state(pNext, x); +          re_add_state(pThis, x+1); +          break; +        } +        case RE_OP_FORK: { +          re_add_state(pThis, x+pRe->aArg[x]); +          re_add_state(pThis, x+1); +          break; +        } +        case RE_OP_GOTO: { +          re_add_state(pThis, x+pRe->aArg[x]); +          break; +        } +        case RE_OP_ACCEPT: { +          rc = 1; +          goto re_match_end; +        } +        case RE_OP_CC_INC: +        case RE_OP_CC_EXC: { +          int j = 1; +          int n = pRe->aArg[x]; +          int hit = 0; +          for(j=1; j>0 && j<n; j++){ +            if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){ +              if( pRe->aArg[x+j]==c ){ +                hit = 1; +                j = -1; +              } +            }else{ +              if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){ +                hit = 1; +                j = -1; +              }else{ +                j++; +              } +            } +          } +          if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit; +          if( hit ) re_add_state(pNext, x+n); +          break;             +        } +      } +    } +  } +  for(i=0; i<pNext->nState; i++){ +    if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } +  } +re_match_end: +  sqlite3_free(pToFree); +  return rc; +} + +/* Resize the opcode and argument arrays for an RE under construction. +*/ +static int re_resize(ReCompiled *p, int N){ +  char *aOp; +  int *aArg; +  aOp = sqlite3_realloc(p->aOp, N*sizeof(p->aOp[0])); +  if( aOp==0 ) return 1; +  p->aOp = aOp; +  aArg = sqlite3_realloc(p->aArg, N*sizeof(p->aArg[0])); +  if( aArg==0 ) return 1; +  p->aArg = aArg; +  p->nAlloc = N; +  return 0; +} + +/* Insert a new opcode and argument into an RE under construction.  The +** insertion point is just prior to existing opcode iBefore. +*/ +static int re_insert(ReCompiled *p, int iBefore, int op, int arg){ +  int i; +  if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0; +  for(i=p->nState; i>iBefore; i--){ +    p->aOp[i] = p->aOp[i-1]; +    p->aArg[i] = p->aArg[i-1]; +  } +  p->nState++; +  p->aOp[iBefore] = op; +  p->aArg[iBefore] = arg; +  return iBefore; +} + +/* Append a new opcode and argument to the end of the RE under construction. +*/ +static int re_append(ReCompiled *p, int op, int arg){ +  return re_insert(p, p->nState, op, arg); +} + +/* Make a copy of N opcodes starting at iStart onto the end of the RE +** under construction. +*/ +static void re_copy(ReCompiled *p, int iStart, int N){ +  if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return; +  memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0])); +  memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0])); +  p->nState += N; +} + +/* Return true if c is a hexadecimal digit character:  [0-9a-fA-F] +** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c).  If +** c is not a hex digit *pV is unchanged. +*/ +static int re_hex(int c, int *pV){ +  if( c>='0' && c<='9' ){ +    c -= '0'; +  }else if( c>='a' && c<='f' ){ +    c -= 'a' - 10; +  }else if( c>='A' && c<='F' ){ +    c -= 'A' - 10; +  }else{ +    return 0; +  } +  *pV = (*pV)*16 + (c & 0xff); +  return 1; +} + +/* A backslash character has been seen, read the next character and +** return its interpretation. +*/ +static unsigned re_esc_char(ReCompiled *p){ +  static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; +  static const char zTrans[] = "\a\f\n\r\t\v"; +  int i, v = 0; +  char c; +  if( p->sIn.i>=p->sIn.mx ) return 0; +  c = p->sIn.z[p->sIn.i]; +  if( c=='u' && p->sIn.i+4<p->sIn.mx ){ +    const unsigned char *zIn = p->sIn.z + p->sIn.i; +    if( re_hex(zIn[1],&v) +     && re_hex(zIn[2],&v) +     && re_hex(zIn[3],&v) +     && re_hex(zIn[4],&v) +    ){ +      p->sIn.i += 5; +      return v; +    } +  } +  if( c=='x' && p->sIn.i+2<p->sIn.mx ){ +    const unsigned char *zIn = p->sIn.z + p->sIn.i; +    if( re_hex(zIn[1],&v) +     && re_hex(zIn[2],&v) +    ){ +      p->sIn.i += 3; +      return v; +    } +  } +  for(i=0; zEsc[i] && zEsc[i]!=c; i++){} +  if( zEsc[i] ){ +    if( i<6 ) c = zTrans[i]; +    p->sIn.i++; +  }else{ +    p->zErr = "unknown \\ escape"; +  } +  return c; +} + +/* Forward declaration */ +static const char *re_subcompile_string(ReCompiled*); + +/* Peek at the next byte of input */ +static unsigned char rePeek(ReCompiled *p){ +  return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0; +} + +/* Compile RE text into a sequence of opcodes.  Continue up to the +** first unmatched ")" character, then return.  If an error is found, +** return a pointer to the error message string. +*/ +static const char *re_subcompile_re(ReCompiled *p){ +  const char *zErr; +  int iStart, iEnd, iGoto; +  iStart = p->nState; +  zErr = re_subcompile_string(p); +  if( zErr ) return zErr; +  while( rePeek(p)=='|' ){ +    iEnd = p->nState; +    re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart); +    iGoto = re_append(p, RE_OP_GOTO, 0); +    p->sIn.i++; +    zErr = re_subcompile_string(p); +    if( zErr ) return zErr; +    p->aArg[iGoto] = p->nState - iGoto; +  } +  return 0; +} + +/* Compile an element of regular expression text (anything that can be +** an operand to the "|" operator).  Return NULL on success or a pointer +** to the error message if there is a problem. +*/ +static const char *re_subcompile_string(ReCompiled *p){ +  int iPrev = -1; +  int iStart; +  unsigned c; +  const char *zErr; +  while( (c = p->xNextChar(&p->sIn))!=0 ){ +    iStart = p->nState; +    switch( c ){ +      case '|': +      case '$':  +      case ')': { +        p->sIn.i--; +        return 0; +      } +      case '(': { +        zErr = re_subcompile_re(p); +        if( zErr ) return zErr; +        if( rePeek(p)!=')' ) return "unmatched '('"; +        p->sIn.i++; +        break; +      } +      case '.': { +        if( rePeek(p)=='*' ){ +          re_append(p, RE_OP_ANYSTAR, 0); +          p->sIn.i++; +        }else{  +          re_append(p, RE_OP_ANY, 0); +        } +        break; +      } +      case '*': { +        if( iPrev<0 ) return "'*' without operand"; +        re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1); +        re_append(p, RE_OP_FORK, iPrev - p->nState + 1); +        break; +      } +      case '+': { +        if( iPrev<0 ) return "'+' without operand"; +        re_append(p, RE_OP_FORK, iPrev - p->nState); +        break; +      } +      case '?': { +        if( iPrev<0 ) return "'?' without operand"; +        re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); +        break; +      } +      case '{': { +        int m = 0, n = 0; +        int sz, j; +        if( iPrev<0 ) return "'{m,n}' without operand"; +        while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; } +        n = m; +        if( c==',' ){ +          p->sIn.i++; +          n = 0; +          while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; } +        } +        if( c!='}' ) return "unmatched '{'"; +        if( n>0 && n<m ) return "n less than m in '{m,n}'"; +        p->sIn.i++; +        sz = p->nState - iPrev; +        if( m==0 ){ +          if( n==0 ) return "both m and n are zero in '{m,n}'"; +          re_insert(p, iPrev, RE_OP_FORK, sz+1); +          n--; +        }else{ +          for(j=1; j<m; j++) re_copy(p, iPrev, sz); +        } +        for(j=m; j<n; j++){ +          re_append(p, RE_OP_FORK, sz+1); +          re_copy(p, iPrev, sz); +        } +        if( n==0 && m>0 ){ +          re_append(p, RE_OP_FORK, -sz); +        } +        break; +      } +      case '[': { +        int iFirst = p->nState; +        if( rePeek(p)=='^' ){ +          re_append(p, RE_OP_CC_EXC, 0); +          p->sIn.i++; +        }else{ +          re_append(p, RE_OP_CC_INC, 0); +        } +        while( (c = p->xNextChar(&p->sIn))!=0 ){ +          if( c=='[' && rePeek(p)==':' ){ +            return "POSIX character classes not supported"; +          } +          if( c=='\\' ) c = re_esc_char(p); +          if( rePeek(p)=='-' ){ +            re_append(p, RE_OP_CC_RANGE, c); +            p->sIn.i++; +            c = p->xNextChar(&p->sIn); +            if( c=='\\' ) c = re_esc_char(p); +            re_append(p, RE_OP_CC_RANGE, c); +          }else{ +            re_append(p, RE_OP_CC_VALUE, c); +          } +          if( rePeek(p)==']' ){ p->sIn.i++; break; } +        } +        if( c==0 ) return "unclosed '['"; +        p->aArg[iFirst] = p->nState - iFirst; +        break; +      } +      case '\\': { +        int specialOp = 0; +        switch( rePeek(p) ){ +          case 'b': specialOp = RE_OP_BOUNDARY;   break; +          case 'd': specialOp = RE_OP_DIGIT;      break; +          case 'D': specialOp = RE_OP_NOTDIGIT;   break; +          case 's': specialOp = RE_OP_SPACE;      break; +          case 'S': specialOp = RE_OP_NOTSPACE;   break; +          case 'w': specialOp = RE_OP_WORD;       break; +          case 'W': specialOp = RE_OP_NOTWORD;    break; +        } +        if( specialOp ){ +          p->sIn.i++; +          re_append(p, specialOp, 0); +        }else{ +          c = re_esc_char(p); +          re_append(p, RE_OP_MATCH, c); +        } +        break; +      } +      default: { +        re_append(p, RE_OP_MATCH, c); +        break; +      } +    } +    iPrev = iStart; +  } +  return 0; +} + +/* Free and reclaim all the memory used by a previously compiled +** regular expression.  Applications should invoke this routine once +** for every call to re_compile() to avoid memory leaks. +*/ +void re_free(ReCompiled *pRe){ +  if( pRe ){ +    sqlite3_free(pRe->aOp); +    sqlite3_free(pRe->aArg); +    sqlite3_free(pRe); +  } +} + +/* +** Compile a textual regular expression in zIn[] into a compiled regular +** expression suitable for us by re_match() and return a pointer to the +** compiled regular expression in *ppRe.  Return NULL on success or an +** error message if something goes wrong. +*/ +const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ +  ReCompiled *pRe; +  const char *zErr; +  int i, j; + +  *ppRe = 0; +  pRe = sqlite3_malloc( sizeof(*pRe) ); +  if( pRe==0 ){ +    return "out of memory"; +  } +  memset(pRe, 0, sizeof(*pRe)); +  pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; +  if( re_resize(pRe, 30) ){ +    re_free(pRe); +    return "out of memory"; +  } +  if( zIn[0]=='^' ){ +    zIn++; +  }else{ +    re_append(pRe, RE_OP_ANYSTAR, 0); +  } +  pRe->sIn.z = (unsigned char*)zIn; +  pRe->sIn.i = 0; +  pRe->sIn.mx = (int)strlen(zIn); +  zErr = re_subcompile_re(pRe); +  if( zErr ){ +    re_free(pRe); +    return zErr; +  } +  if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ +    re_append(pRe, RE_OP_MATCH, RE_EOF); +    re_append(pRe, RE_OP_ACCEPT, 0); +    *ppRe = pRe; +  }else if( pRe->sIn.i>=pRe->sIn.mx ){ +    re_append(pRe, RE_OP_ACCEPT, 0); +    *ppRe = pRe; +  }else{ +    re_free(pRe); +    return "unrecognized character"; +  } + +  /* The following is a performance optimization.  If the regex begins with +  ** ".*" (if the input regex lacks an initial "^") and afterwards there are +  ** one or more matching characters, enter those matching characters into +  ** zInit[].  The re_match() routine can then search ahead in the input  +  ** string looking for the initial match without having to run the whole +  ** regex engine over the string.  Do not worry able trying to match +  ** unicode characters beyond plane 0 - those are very rare and this is +  ** just an optimization. */ +  if( pRe->aOp[0]==RE_OP_ANYSTAR ){ +    for(j=0, i=1; j<sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ +      unsigned x = pRe->aArg[i]; +      if( x<=127 ){ +        pRe->zInit[j++] = x; +      }else if( x<=0xfff ){ +        pRe->zInit[j++] = 0xc0 | (x>>6); +        pRe->zInit[j++] = 0x80 | (x&0x3f); +      }else if( x<=0xffff ){ +        pRe->zInit[j++] = 0xd0 | (x>>12); +        pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); +        pRe->zInit[j++] = 0x80 | (x&0x3f); +      }else{ +        break; +      } +    } +    if( j>0 && pRe->zInit[j-1]==0 ) j--; +    pRe->nInit = j; +  } +  return pRe->zErr; +} + +/* +** Implementation of the regexp() SQL function.  This function implements +** the build-in REGEXP operator.  The first argument to the function is the +** pattern and the second argument is the string.  So, the SQL statements: +** +**       A REGEXP B +** +** is implemented as regexp(B,A). +*/ +static void re_sql_func( +  sqlite3_context *context,  +  int argc,  +  sqlite3_value **argv +){ +  ReCompiled *pRe;          /* Compiled regular expression */ +  const char *zPattern;     /* The regular expression */ +  const unsigned char *zStr;/* String being searched */ +  const char *zErr;         /* Compile error message */ + +  pRe = sqlite3_get_auxdata(context, 0); +  if( pRe==0 ){ +    zPattern = (const char*)sqlite3_value_text(argv[0]); +    if( zPattern==0 ) return; +    zErr = re_compile(&pRe, zPattern, 0); +    if( zErr ){ +      re_free(pRe); +      sqlite3_result_error(context, zErr, -1); +      return; +    } +    if( pRe==0 ){ +      sqlite3_result_error_nomem(context); +      return; +    } +    sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); +  } +  zStr = (const unsigned char*)sqlite3_value_text(argv[1]); +  if( zStr!=0 ){ +    sqlite3_result_int(context, re_match(pRe, zStr, -1)); +  } +} + +/* +** Invoke this routine to register the regexp() function with the +** SQLite database connection. +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_regexp_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0, +                                 re_sql_func, 0, 0); +  return rc; +} diff --git a/ext/misc/rot13.c b/ext/misc/rot13.c new file mode 100644 index 0000000..68fdf60 --- /dev/null +++ b/ext/misc/rot13.c @@ -0,0 +1,114 @@ +/* +** 2013-05-15 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite extension implements a rot13() function and a rot13 +** collating sequence. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <assert.h> +#include <string.h> + +/* +** Perform rot13 encoding on a single ASCII character. +*/ +static unsigned char rot13(unsigned char c){ +  if( c>='a' && c<='z' ){ +    c += 13; +    if( c>'z' ) c -= 26; +  }else if( c>='A' && c<='Z' ){ +    c += 13; +    if( c>'Z' ) c -= 26; +  } +  return c; +} + +/* +** Implementation of the rot13() function. +** +** Rotate ASCII alphabetic characters by 13 character positions.   +** Non-ASCII characters are unchanged.  rot13(rot13(X)) should always +** equal X. +*/ +static void rot13func( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  const unsigned char *zIn; +  int nIn; +  unsigned char *zOut; +  char *zToFree = 0; +  int i; +  char zTemp[100]; +  assert( argc==1 ); +  if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; +  zIn = (const unsigned char*)sqlite3_value_text(argv[0]); +  nIn = sqlite3_value_bytes(argv[0]); +  if( nIn<sizeof(zTemp)-1 ){ +    zOut = zTemp; +  }else{ +    zOut = zToFree = sqlite3_malloc( nIn+1 ); +    if( zOut==0 ){ +      sqlite3_result_error_nomem(context); +      return; +    } +  } +  for(i=0; i<nIn; i++) zOut[i] = rot13(zIn[i]); +  zOut[i] = 0; +  sqlite3_result_text(context, (char*)zOut, i, SQLITE_TRANSIENT); +  sqlite3_free(zToFree); +} + +/* +** Implement the rot13 collating sequence so that if +** +**      x=y COLLATE rot13 +** +** Then  +** +**      rot13(x)=rot13(y) COLLATE binary +*/ +static int rot13CollFunc( +  void *notUsed, +  int nKey1, const void *pKey1, +  int nKey2, const void *pKey2 +){ +  const char *zA = (const char*)pKey1; +  const char *zB = (const char*)pKey2; +  int i, x; +  for(i=0; i<nKey1 && i<nKey2; i++){ +    x = (int)rot13(zA[i]) - (int)rot13(zB[i]); +    if( x!=0 ) return x; +  } +  return nKey1 - nKey2; +} + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_rot_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +  (void)pzErrMsg;  /* Unused parameter */ +  rc = sqlite3_create_function(db, "rot13", 1, SQLITE_UTF8, 0, +                               rot13func, 0, 0); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_collation(db, "rot13", SQLITE_UTF8, 0, rot13CollFunc); +  } +  return rc; +} diff --git a/ext/misc/spellfix.c b/ext/misc/spellfix.c new file mode 100644 index 0000000..eb5442e --- /dev/null +++ b/ext/misc/spellfix.c @@ -0,0 +1,2844 @@ +/* +** 2012 April 10 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** +** This module implements the spellfix1 VIRTUAL TABLE that can be used +** to search a large vocabulary for close matches.  See separate +** documentation (http://www.sqlite.org/spellfix1.html) for details. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +#ifndef SQLITE_AMALGAMATION +# include <string.h> +# include <stdio.h> +# include <stdlib.h> +# include <assert.h> +# define ALWAYS(X)  1 +# define NEVER(X)   0 +  typedef unsigned char u8; +  typedef unsigned short u16; +# include <ctype.h> +#endif + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Character classes for ASCII characters: +** +**   0   ''        Silent letters:   H W +**   1   'A'       Any vowel:   A E I O U (Y) +**   2   'B'       A bilabeal stop or fricative:  B F P V W +**   3   'C'       Other fricatives or back stops:  C G J K Q S X Z +**   4   'D'       Alveolar stops:  D T +**   5   'H'       Letter H at the beginning of a word +**   6   'L'       Glide:  L +**   7   'R'       Semivowel:  R +**   8   'M'       Nasals:  M N +**   9   'Y'       Letter Y at the beginning of a word. +**   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9 +**   11  ' '       White space +**   12  '?'       Other. +*/ +#define CCLASS_SILENT         0 +#define CCLASS_VOWEL          1 +#define CCLASS_B              2 +#define CCLASS_C              3 +#define CCLASS_D              4 +#define CCLASS_H              5 +#define CCLASS_L              6 +#define CCLASS_R              7 +#define CCLASS_M              8 +#define CCLASS_Y              9 +#define CCLASS_DIGIT         10 +#define CCLASS_SPACE         11 +#define CCLASS_OTHER         12 + +/* +** The following table gives the character class for non-initial ASCII +** characters. +*/ +static const unsigned char midClass[] = { + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE, + /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER, + /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER, + /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER, + /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER, + /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER, + /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT, + /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT, + /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT, + /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER, + /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER, + /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL, + /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D, + /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C, + /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C, + /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M, + /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B, + /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C, + /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B, + /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL, + /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER, + /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER, + /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B, + /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL, + /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT, + /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C, + /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M, + /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C, + /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D, + /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B, + /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C, + /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER, + /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,    +}; +/*  +** This tables gives the character class for ASCII characters that form the +** initial character of a word.  The only difference from midClass is with +** the letters H, W, and Y. +*/ +static const unsigned char initClass[] = { + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER, + /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE, + /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER, + /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER, + /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER, + /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER, + /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER, + /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT, + /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT, + /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT, + /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER, + /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER, + /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL, + /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D, + /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C, + /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C, + /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M, + /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B, + /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C, + /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B, + /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y, + /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER, + /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER, + /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B, + /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL, + /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT, + /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C, + /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M, + /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C, + /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D, + /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B, + /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C, + /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER, + /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,    +}; + +/* +** Mapping from the character class number (0-13) to a symbol for each +** character class.  Note that initClass[] can be used to map the class +** symbol back into the class number. +*/ +static const unsigned char className[] = ".ABCDHLRMY9 ?"; + +/* +** Generate a "phonetic hash" from a string of ASCII characters +** in zIn[0..nIn-1]. +** +**   * Map characters by character class as defined above. +**   * Omit double-letters +**   * Omit vowels beside R and L +**   * Omit T when followed by CH +**   * Omit W when followed by R +**   * Omit D when followed by J or G +**   * Omit K in KN or G in GN at the beginning of a word +** +** Space to hold the result is obtained from sqlite3_malloc() +** +** Return NULL if memory allocation fails.   +*/ +static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ +  unsigned char *zOut = sqlite3_malloc( nIn + 1 ); +  int i; +  int nOut = 0; +  char cPrev = 0x77; +  char cPrevX = 0x77; +  const unsigned char *aClass = initClass; + +  if( zOut==0 ) return 0; +  if( nIn>2 ){ +    switch( zIn[0] ){ +      case 'g':  +      case 'k': { +        if( zIn[1]=='n' ){ zIn++; nIn--; } +        break; +      } +    } +  } +  for(i=0; i<nIn; i++){ +    unsigned char c = zIn[i]; +    if( i+1<nIn ){ +      if( c=='w' && zIn[i+1]=='r' ) continue; +      if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue; +      if( i+2<nIn ){ +        if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue; +      } +    } +    c = aClass[c&0x7f]; +    if( c==CCLASS_SPACE ) continue; +    if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue; +    aClass = midClass; +    if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){ +       continue; /* No vowels beside L or R */  +    } +    if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){ +       nOut--;   /* No vowels beside L or R */ +    } +    cPrev = c; +    if( c==CCLASS_SILENT ) continue; +    cPrevX = c; +    c = className[c]; +    assert( nOut>=0 ); +    if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c; +  } +  zOut[nOut] = 0; +  return zOut; +} + +/* +** This is an SQL function wrapper around phoneticHash().  See +** the description of phoneticHash() for additional information. +*/ +static void phoneticHashSqlFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  const unsigned char *zIn; +  unsigned char *zOut; + +  zIn = sqlite3_value_text(argv[0]); +  if( zIn==0 ) return; +  zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0])); +  if( zOut==0 ){ +    sqlite3_result_error_nomem(context); +  }else{ +    sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); +  } +} + +/* +** Return the character class number for a character given its +** context. +*/ +static char characterClass(char cPrev, char c){ +  return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f]; +} + +/* +** Return the cost of inserting or deleting character c immediately +** following character cPrev.  If cPrev==0, that means c is the first +** character of the word. +*/ +static int insertOrDeleteCost(char cPrev, char c, char cNext){ +  char classC = characterClass(cPrev, c); +  char classCprev; + +  if( classC==CCLASS_SILENT ){ +    /* Insert or delete "silent" characters such as H or W */ +    return 1; +  } +  if( cPrev==c ){ +    /* Repeated characters, or miss a repeat */ +    return 10; +  } +  if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){ +    return 20;  /* Insert a vowel before or after 'r' */ +  } +  classCprev = characterClass(cPrev, cPrev); +  if( classC==classCprev ){ +    if( classC==CCLASS_VOWEL ){ +      /* Remove or add a new vowel to a vowel cluster */ +      return 15; +    }else{ +      /* Remove or add a consonant not in the same class */ +      return 50; +    } +  } + +  /* any other character insertion or deletion */ +  return 100; +} + +/* +** Divide the insertion cost by this factor when appending to the +** end of the word. +*/ +#define FINAL_INS_COST_DIV  4 + +/* +** Return the cost of substituting cTo in place of cFrom assuming +** the previous character is cPrev.  If cPrev==0 then cTo is the first +** character of the word. +*/ +static int substituteCost(char cPrev, char cFrom, char cTo){ +  char classFrom, classTo; +  if( cFrom==cTo ){ +    /* Exact match */ +    return 0; +  } +  if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){ +    /* differ only in case */ +    return 0; +  } +  classFrom = characterClass(cPrev, cFrom); +  classTo = characterClass(cPrev, cTo); +  if( classFrom==classTo ){ +    /* Same character class */ +    return 40; +  } +  if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y +      && classTo>=CCLASS_B && classTo<=CCLASS_Y ){ +    /* Convert from one consonant to another, but in a different class */ +    return 75; +  } +  /* Any other subsitution */ +  return 100; +} + +/* +** Given two strings zA and zB which are pure ASCII, return the cost +** of transforming zA into zB.  If zA ends with '*' assume that it is +** a prefix of zB and give only minimal penalty for extra characters +** on the end of zB. +** +** Smaller numbers mean a closer match. +** +** Negative values indicate an error: +**    -1  One of the inputs is NULL +**    -2  Non-ASCII characters on input +**    -3  Unable to allocate memory  +** +** If pnMatch is not NULL, then *pnMatch is set to the number of bytes +** of zB that matched the pattern in zA. If zA does not end with a '*', +** then this value is always the number of bytes in zB (i.e. strlen(zB)). +** If zA does end in a '*', then it is the number of bytes in the prefix +** of zB that was deemed to match zA. +*/ +static int editdist1(const char *zA, const char *zB, int *pnMatch){ +  int nA, nB;            /* Number of characters in zA[] and zB[] */ +  int xA, xB;            /* Loop counters for zA[] and zB[] */ +  char cA, cB;           /* Current character of zA and zB */ +  char cAprev, cBprev;   /* Previous character of zA and zB */ +  char cAnext, cBnext;   /* Next character in zA and zB */ +  int d;                 /* North-west cost value */ +  int dc = 0;            /* North-west character value */ +  int res;               /* Final result */ +  int *m;                /* The cost matrix */ +  char *cx;              /* Corresponding character values */ +  int *toFree = 0;       /* Malloced space */ +  int mStack[60+15];     /* Stack space to use if not too much is needed */ +  int nMatch = 0; + +  /* Early out if either input is NULL */ +  if( zA==0 || zB==0 ) return -1; + +  /* Skip any common prefix */ +  while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } +  if( pnMatch ) *pnMatch = nMatch; +  if( zA[0]==0 && zB[0]==0 ) return 0; + +#if 0 +  printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); +#endif + +  /* Verify input strings and measure their lengths */ +  for(nA=0; zA[nA]; nA++){ +    if( zA[nA]&0x80 ) return -2; +  } +  for(nB=0; zB[nB]; nB++){ +    if( zB[nB]&0x80 ) return -2; +  } + +  /* Special processing if either string is empty */ +  if( nA==0 ){ +    cBprev = dc; +    for(xB=res=0; (cB = zB[xB])!=0; xB++){ +      res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; +      cBprev = cB; +    } +    return res; +  } +  if( nB==0 ){ +    cAprev = dc; +    for(xA=res=0; (cA = zA[xA])!=0; xA++){ +      res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); +      cAprev = cA; +    } +    return res; +  } + +  /* A is a prefix of B */ +  if( zA[0]=='*' && zA[1]==0 ) return 0; + +  /* Allocate and initialize the Wagner matrix */ +  if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ +    m = mStack; +  }else{ +    m = toFree = sqlite3_malloc( (nB+1)*5*sizeof(m[0])/4 ); +    if( m==0 ) return -3; +  } +  cx = (char*)&m[nB+1]; + +  /* Compute the Wagner edit distance */ +  m[0] = 0; +  cx[0] = dc; +  cBprev = dc; +  for(xB=1; xB<=nB; xB++){ +    cBnext = zB[xB]; +    cB = zB[xB-1]; +    cx[xB] = cB; +    m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); +    cBprev = cB; +  } +  cAprev = dc; +  for(xA=1; xA<=nA; xA++){ +    int lastA = (xA==nA); +    cA = zA[xA-1]; +    cAnext = zA[xA]; +    if( cA=='*' && lastA ) break; +    d = m[0]; +    dc = cx[0]; +    m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); +    cBprev = 0; +    for(xB=1; xB<=nB; xB++){ +      int totalCost, insCost, delCost, subCost, ncx; +      cB = zB[xB-1]; +      cBnext = zB[xB]; + +      /* Cost to insert cB */ +      insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext); +      if( lastA ) insCost /= FINAL_INS_COST_DIV; + +      /* Cost to delete cA */ +      delCost = insertOrDeleteCost(cx[xB], cA, cBnext); + +      /* Cost to substitute cA->cB */ +      subCost = substituteCost(cx[xB-1], cA, cB); + +      /* Best cost */ +      totalCost = insCost + m[xB-1]; +      ncx = cB; +      if( (delCost + m[xB])<totalCost ){ +        totalCost = delCost + m[xB]; +        ncx = cA; +      } +      if( (subCost + d)<totalCost ){ +        totalCost = subCost + d; +      } + +#if 0 +      printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c" +             " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n", +             xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB, +             insCost, delCost, subCost, totalCost, ncx?ncx:' '); +#endif + +      /* Update the matrix */ +      d = m[xB]; +      dc = cx[xB]; +      m[xB] = totalCost; +      cx[xB] = ncx; +      cBprev = cB; +    } +    cAprev = cA; +  } + +  /* Free the wagner matrix and return the result */ +  if( cA=='*' ){ +    res = m[1]; +    for(xB=1; xB<=nB; xB++){ +      if( m[xB]<res ){ +        res = m[xB]; +        if( pnMatch ) *pnMatch = xB+nMatch; +      } +    } +  }else{ +    res = m[nB]; +    /* In the current implementation, pnMatch is always NULL if zA does +    ** not end in "*" */ +    assert( pnMatch==0 ); +  } +  sqlite3_free(toFree); +  return res; +} + +/* +** Function:    editdist(A,B) +** +** Return the cost of transforming string A into string B.  Both strings +** must be pure ASCII text.  If A ends with '*' then it is assumed to be +** a prefix of B and extra characters on the end of B have minimal additional +** cost. +*/ +static void editdistSqlFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  int res = editdist1( +                    (const char*)sqlite3_value_text(argv[0]), +                    (const char*)sqlite3_value_text(argv[1]), +                    0); +  if( res<0 ){ +    if( res==(-3) ){ +      sqlite3_result_error_nomem(context); +    }else if( res==(-2) ){ +      sqlite3_result_error(context, "non-ASCII input to editdist()", -1); +    }else{ +      sqlite3_result_error(context, "NULL input to editdist()", -1); +    } +  }else{  +    sqlite3_result_int(context, res); +  } +} + +/* End of the fixed-cost edit distance implementation +****************************************************************************** +***************************************************************************** +** Begin: Configurable cost unicode edit distance routines +*/ +/* Forward declaration of structures */ +typedef struct EditDist3Cost EditDist3Cost; +typedef struct EditDist3Config EditDist3Config; +typedef struct EditDist3Point EditDist3Point; +typedef struct EditDist3From EditDist3From; +typedef struct EditDist3FromString EditDist3FromString; +typedef struct EditDist3To EditDist3To; +typedef struct EditDist3ToString EditDist3ToString; +typedef struct EditDist3Lang EditDist3Lang; + + +/* +** An entry in the edit cost table +*/ +struct EditDist3Cost { +  EditDist3Cost *pNext;     /* Next cost element */ +  u8 nFrom;                 /* Number of bytes in aFrom */ +  u8 nTo;                   /* Number of bytes in aTo */ +  u16 iCost;                /* Cost of this transformation */ +  char a[4]    ;            /* FROM string followed by TO string */ +  /* Additional TO and FROM string bytes appended as necessary */ +}; + +/* +** Edit costs for a particular language ID  +*/ +struct EditDist3Lang { +  int iLang;             /* Language ID */ +  int iInsCost;          /* Default insertion cost */ +  int iDelCost;          /* Default deletion cost */ +  int iSubCost;          /* Default substitution cost */ +  EditDist3Cost *pCost;  /* Costs */ +}; + + +/* +** The default EditDist3Lang object, with default costs. +*/ +static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 }; + +/* +** Complete configuration +*/ +struct EditDist3Config { +  int nLang;             /* Number of language IDs.  Size of a[] */ +  EditDist3Lang *a;      /* One for each distinct language ID */ +}; + +/* +** Extra information about each character in the FROM string. +*/ +struct EditDist3From { +  int nSubst;              /* Number of substitution cost entries */ +  int nDel;                /* Number of deletion cost entries */ +  int nByte;               /* Number of bytes in this character */ +  EditDist3Cost **apSubst; /* Array of substitution costs for this element */ +  EditDist3Cost **apDel;   /* Array of deletion cost entries */ +}; + +/* +** A precompiled FROM string. +* +** In the common case we expect the FROM string to be reused multiple times. +** In other words, the common case will be to measure the edit distance +** from a single origin string to multiple target strings. +*/ +struct EditDist3FromString { +  char *z;                 /* The complete text of the FROM string */ +  int n;                   /* Number of characters in the FROM string */ +  int isPrefix;            /* True if ends with '*' character */ +  EditDist3From *a;        /* Extra info about each char of the FROM string */ +}; + +/* +** Extra information about each character in the TO string. +*/ +struct EditDist3To { +  int nIns;                /* Number of insertion cost entries */ +  int nByte;               /* Number of bytes in this character */ +  EditDist3Cost **apIns;   /* Array of deletion cost entries */ +}; + +/* +** A precompiled FROM string +*/ +struct EditDist3ToString { +  char *z;                 /* The complete text of the TO string */ +  int n;                   /* Number of characters in the TO string */ +  EditDist3To *a;          /* Extra info about each char of the TO string */ +}; + +/* +** Clear or delete an instance of the object that records all edit-distance +** weights. +*/ +static void editDist3ConfigClear(EditDist3Config *p){ +  int i; +  if( p==0 ) return; +  for(i=0; i<p->nLang; i++){ +    EditDist3Cost *pCost, *pNext; +    pCost = p->a[i].pCost; +    while( pCost ){ +      pNext = pCost->pNext; +      sqlite3_free(pCost); +      pCost = pNext; +    } +  } +  sqlite3_free(p->a); +  memset(p, 0, sizeof(*p)); +} +static void editDist3ConfigDelete(void *pIn){ +  EditDist3Config *p = (EditDist3Config*)pIn; +  editDist3ConfigClear(p); +  sqlite3_free(p); +} + +/* +** Load all edit-distance weights from a table. +*/ +static int editDist3ConfigLoad( +  EditDist3Config *p,      /* The edit distance configuration to load */ +  sqlite3 *db,            /* Load from this database */ +  const char *zTable      /* Name of the table from which to load */ +){ +  sqlite3_stmt *pStmt; +  int rc, rc2; +  char *zSql; +  int iLangPrev = -9999; +  EditDist3Lang *pLang = 0; + +  zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost" +                         " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable); +  if( zSql==0 ) return SQLITE_NOMEM; +  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); +  sqlite3_free(zSql); +  if( rc ) return rc; +  editDist3ConfigClear(p); +  while( sqlite3_step(pStmt)==SQLITE_ROW ){ +    int iLang = sqlite3_column_int(pStmt, 0); +    const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1); +    int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0; +    const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); +    int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; +    int iCost = sqlite3_column_int(pStmt, 3); + +    assert( zFrom!=0 || nFrom==0 ); +    assert( zTo!=0 || nTo==0 ); +    if( nFrom>100 || nTo>100 ) continue; +    if( iCost<0 ) continue; +    if( pLang==0 || iLang!=iLangPrev ){ +      EditDist3Lang *pNew; +      pNew = sqlite3_realloc(p->a, (p->nLang+1)*sizeof(p->a[0])); +      if( pNew==0 ){ rc = SQLITE_NOMEM; break; } +      p->a = pNew; +      pLang = &p->a[p->nLang]; +      p->nLang++; +      pLang->iLang = iLang; +      pLang->iInsCost = 100; +      pLang->iDelCost = 100; +      pLang->iSubCost = 150; +      pLang->pCost = 0; +      iLangPrev = iLang; +    } +    if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ +      pLang->iDelCost = iCost; +    }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ +      pLang->iInsCost = iCost; +    }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ +      pLang->iSubCost = iCost; +    }else{ +      EditDist3Cost *pCost; +      int nExtra = nFrom + nTo - 4; +      if( nExtra<0 ) nExtra = 0; +      pCost = sqlite3_malloc( sizeof(*pCost) + nExtra ); +      if( pCost==0 ){ rc = SQLITE_NOMEM; break; } +      pCost->nFrom = nFrom; +      pCost->nTo = nTo; +      pCost->iCost = iCost; +      memcpy(pCost->a, zFrom, nFrom); +      memcpy(pCost->a + nFrom, zTo, nTo); +      pCost->pNext = pLang->pCost; +      pLang->pCost = pCost;  +    } +  } +  rc2 = sqlite3_finalize(pStmt); +  if( rc==SQLITE_OK ) rc = rc2; +  return rc; +} + +/* +** Return the length (in bytes) of a utf-8 character.  Or return a maximum +** of N. +*/ +static int utf8Len(unsigned char c, int N){ +  int len = 1; +  if( c>0x7f ){ +    if( (c&0xe0)==0xc0 ){ +      len = 2; +    }else if( (c&0xf0)==0xe0 ){ +      len = 3; +    }else{ +      len = 4; +    } +  } +  if( len>N ) len = N; +  return len; +} + +/* +** Return TRUE (non-zero) if the To side of the given cost matches +** the given string. +*/ +static int matchTo(EditDist3Cost *p, const char *z, int n){ +  if( p->nTo>n ) return 0; +  if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0; +  return 1; +} + +/* +** Return TRUE (non-zero) if the From side of the given cost matches +** the given string. +*/ +static int matchFrom(EditDist3Cost *p, const char *z, int n){ +  assert( p->nFrom<=n ); +  if( strncmp(p->a, z, p->nFrom)!=0 ) return 0; +  return 1; +} + +/* +** Return TRUE (non-zero) of the next FROM character and the next TO +** character are the same. +*/ +static int matchFromTo( +  EditDist3FromString *pStr,  /* Left hand string */ +  int n1,                     /* Index of comparison character on the left */ +  const char *z2,             /* Right-handl comparison character */ +  int n2                      /* Bytes remaining in z2[] */ +){ +  int b1 = pStr->a[n1].nByte; +  if( b1>n2 ) return 0; +  if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0; +  return 1; +} + +/* +** Delete an EditDist3FromString objecct +*/ +static void editDist3FromStringDelete(EditDist3FromString *p){ +  int i; +  if( p ){ +    for(i=0; i<p->n; i++){ +      sqlite3_free(p->a[i].apDel); +      sqlite3_free(p->a[i].apSubst); +    } +    sqlite3_free(p); +  } +} + +/* +** Create a EditDist3FromString object. +*/ +static EditDist3FromString *editDist3FromStringNew( +  const EditDist3Lang *pLang, +  const char *z, +  int n +){ +  EditDist3FromString *pStr; +  EditDist3Cost *p; +  int i; + +  if( z==0 ) return 0; +  if( n<0 ) n = (int)strlen(z); +  pStr = sqlite3_malloc( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); +  if( pStr==0 ) return 0; +  pStr->a = (EditDist3From*)&pStr[1]; +  memset(pStr->a, 0, sizeof(pStr->a[0])*n); +  pStr->n = n; +  pStr->z = (char*)&pStr->a[n]; +  memcpy(pStr->z, z, n+1); +  if( n && z[n-1]=='*' ){ +    pStr->isPrefix = 1; +    n--; +    pStr->n--; +    pStr->z[n] = 0; +  }else{ +    pStr->isPrefix = 0; +  } + +  for(i=0; i<n; i++){ +    EditDist3From *pFrom = &pStr->a[i]; +    memset(pFrom, 0, sizeof(*pFrom)); +    pFrom->nByte = utf8Len((unsigned char)z[i], n-i); +    for(p=pLang->pCost; p; p=p->pNext){ +      EditDist3Cost **apNew; +      if( i+p->nFrom>n ) continue; +      if( matchFrom(p, z+i, n-i)==0 ) continue; +      if( p->nTo==0 ){ +        apNew = sqlite3_realloc(pFrom->apDel, +                                sizeof(*apNew)*(pFrom->nDel+1)); +        if( apNew==0 ) break; +        pFrom->apDel = apNew; +        apNew[pFrom->nDel++] = p; +      }else{ +        apNew = sqlite3_realloc(pFrom->apSubst, +                                sizeof(*apNew)*(pFrom->nSubst+1)); +        if( apNew==0 ) break; +        pFrom->apSubst = apNew; +        apNew[pFrom->nSubst++] = p; +      } +    } +    if( p ){ +      editDist3FromStringDelete(pStr); +      pStr = 0; +      break; +    } +  } +  return pStr; +} + +/* +** Update entry m[i] such that it is the minimum of its current value +** and m[j]+iCost. +** +** If the iCost is 1,000,000 or greater, then consider the cost to be +** infinite and skip the update. +*/ +static void updateCost( +  unsigned int *m, +  int i, +  int j, +  int iCost +){ +  assert( iCost>=0 ); +  if( iCost<10000 ){ +    unsigned int b = m[j] + iCost; +    if( b<m[i] ) m[i] = b; +  } +} + +/* Compute the edit distance between two strings. +** +** If an error occurs, return a negative number which is the error code. +** +** If pnMatch is not NULL, then *pnMatch is set to the number of characters +** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does +** not contain the pattern for a prefix-search, then this is always the number +** of characters in z2. If pFrom does contain a prefix search pattern, then +** it is the number of characters in the prefix of z2 that was deemed to  +** match pFrom. +*/ +static int editDist3Core( +  EditDist3FromString *pFrom,  /* The FROM string */ +  const char *z2,              /* The TO string */ +  int n2,                      /* Length of the TO string */ +  const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */ +  int *pnMatch                 /* OUT: Characters in matched prefix */ +){ +  int k, n; +  int i1, b1; +  int i2, b2; +  EditDist3FromString f = *pFrom; +  EditDist3To *a2; +  unsigned int *m; +  int szRow; +  EditDist3Cost *p; +  int res; + +  /* allocate the Wagner matrix and the aTo[] array for the TO string */ +  n = (f.n+1)*(n2+1); +  n = (n+1)&~1; +  m = sqlite3_malloc( n*sizeof(m[0]) + sizeof(a2[0])*n2 ); +  if( m==0 ) return -1;            /* Out of memory */ +  a2 = (EditDist3To*)&m[n]; +  memset(a2, 0, sizeof(a2[0])*n2); + +  /* Fill in the a1[] matrix for all characters of the TO string */ +  for(i2=0; i2<n2; i2++){ +    a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2); +    for(p=pLang->pCost; p; p=p->pNext){ +      EditDist3Cost **apNew; +      if( p->nFrom>0 ) continue; +      if( i2+p->nTo>n2 ) continue; +      if( matchTo(p, z2+i2, n2-i2)==0 ) continue; +      a2[i2].nIns++; +      apNew = sqlite3_realloc(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); +      if( apNew==0 ){ +        res = -1;  /* Out of memory */ +        goto editDist3Abort; +      } +      a2[i2].apIns = apNew; +      a2[i2].apIns[a2[i2].nIns-1] = p; +    } +  } + +  /* Prepare to compute the minimum edit distance */ +  szRow = f.n+1; +  memset(m, 0x01, (n2+1)*szRow*sizeof(m[0])); +  m[0] = 0; + +  /* First fill in the top-row of the matrix with FROM deletion costs */ +  for(i1=0; i1<f.n; i1 += b1){ +    b1 = f.a[i1].nByte; +    updateCost(m, i1+b1, i1, pLang->iDelCost); +    for(k=0; k<f.a[i1].nDel; k++){ +      p = f.a[i1].apDel[k]; +      updateCost(m, i1+p->nFrom, i1, p->iCost); +    } +  } + +  /* Fill in all subsequent rows, top-to-bottom, left-to-right */ +  for(i2=0; i2<n2; i2 += b2){ +    int rx;      /* Starting index for current row */ +    int rxp;     /* Starting index for previous row */ +    b2 = a2[i2].nByte; +    rx = szRow*(i2+b2); +    rxp = szRow*i2; +    updateCost(m, rx, rxp, pLang->iInsCost); +    for(k=0; k<a2[i2].nIns; k++){ +      p = a2[i2].apIns[k]; +      updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost); +    } +    for(i1=0; i1<f.n; i1+=b1){ +      int cx;    /* Index of current cell */ +      int cxp;   /* Index of cell immediately to the left */ +      int cxd;   /* Index of cell to the left and one row above */ +      int cxu;   /* Index of cell immediately above */ +      b1 = f.a[i1].nByte; +      cxp = rx + i1; +      cx = cxp + b1; +      cxd = rxp + i1; +      cxu = cxd + b1; +      updateCost(m, cx, cxp, pLang->iDelCost); +      for(k=0; k<f.a[i1].nDel; k++){ +        p = f.a[i1].apDel[k]; +        updateCost(m, cxp+p->nFrom, cxp, p->iCost); +      } +      updateCost(m, cx, cxu, pLang->iInsCost); +      if( matchFromTo(&f, i1, z2+i2, n2-i2) ){ +        updateCost(m, cx, cxd, 0); +      } +      updateCost(m, cx, cxd, pLang->iSubCost); +      for(k=0; k<f.a[i1].nSubst; k++){ +        p = f.a[i1].apSubst[k]; +        if( matchTo(p, z2+i2, n2-i2) ){ +          updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost); +        } +      } +    } +  } + +#if 0  /* Enable for debugging */ +  printf("         ^"); +  for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff); +  printf("\n   ^:"); +  for(i1=0; i1<szRow; i1++){ +    int v = m[i1]; +    if( v>9999 ) printf(" ****"); +    else         printf(" %4d", v); +  } +  printf("\n"); +  for(i2=0; i2<n2; i2++){ +    printf("%c-%02x:", z2[i2], z2[i2]&0xff); +    for(i1=0; i1<szRow; i1++){ +      int v = m[(i2+1)*szRow+i1]; +      if( v>9999 ) printf(" ****"); +      else         printf(" %4d", v); +    } +    printf("\n"); +  } +#endif + +  /* Free memory allocations and return the result */ +  res = (int)m[szRow*(n2+1)-1]; +  n = n2; +  if( f.isPrefix ){ +    for(i2=1; i2<=n2; i2++){ +      int b = m[szRow*i2-1]; +      if( b<=res ){  +        res = b; +        n = i2 - 1; +      } +    } +  } +  if( pnMatch ){ +    int nExtra = 0; +    for(k=0; k<n; k++){ +      if( (z2[k] & 0xc0)==0x80 ) nExtra++; +    } +    *pnMatch = n - nExtra; +  } + +editDist3Abort: +  for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns); +  sqlite3_free(m); +  return res; +} + +/* +** Get an appropriate EditDist3Lang object. +*/ +static const EditDist3Lang *editDist3FindLang( +  EditDist3Config *pConfig, +  int iLang +){ +  int i; +  for(i=0; i<pConfig->nLang; i++){ +    if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i]; +  } +  return &editDist3Lang; +} + +/* +** Function:    editdist3(A,B,iLang) +**              editdist3(tablename) +** +** Return the cost of transforming string A into string B using edit +** weights for iLang. +** +** The second form loads edit weights into memory from a table. +*/ +static void editDist3SqlFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context); +  sqlite3 *db = sqlite3_context_db_handle(context); +  int rc; +  if( argc==1 ){ +    const char *zTable = (const char*)sqlite3_value_text(argv[0]); +    rc = editDist3ConfigLoad(pConfig, db, zTable); +    if( rc ) sqlite3_result_error_code(context, rc); +  }else{ +    const char *zA = (const char*)sqlite3_value_text(argv[0]); +    const char *zB = (const char*)sqlite3_value_text(argv[1]); +    int nA = sqlite3_value_bytes(argv[0]); +    int nB = sqlite3_value_bytes(argv[1]); +    int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0; +    const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang); +    EditDist3FromString *pFrom; +    int dist; + +    pFrom = editDist3FromStringNew(pLang, zA, nA); +    if( pFrom==0 ){ +      sqlite3_result_error_nomem(context); +      return; +    } +    dist = editDist3Core(pFrom, zB, nB, pLang, 0); +    editDist3FromStringDelete(pFrom); +    if( dist==(-1) ){ +      sqlite3_result_error_nomem(context); +    }else{ +      sqlite3_result_int(context, dist); +    } +  }  +} + +/* +** Register the editDist3 function with SQLite +*/ +static int editDist3Install(sqlite3 *db){ +  int rc; +  EditDist3Config *pConfig = sqlite3_malloc( sizeof(*pConfig) ); +  if( pConfig==0 ) return SQLITE_NOMEM; +  memset(pConfig, 0, sizeof(*pConfig)); +  rc = sqlite3_create_function_v2(db, "editdist3", +              2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function_v2(db, "editdist3", +                3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); +  } +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function_v2(db, "editdist3", +                1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, +                editDist3ConfigDelete); +  }else{ +    sqlite3_free(pConfig); +  } +  return rc; +} +/* End configurable cost unicode edit distance routines +****************************************************************************** +****************************************************************************** +** Begin transliterate unicode-to-ascii implementation +*/ + +#if !SQLITE_AMALGAMATION +/* +** This lookup table is used to help decode the first byte of +** a multi-byte UTF8 character. +*/ +static const unsigned char sqlite3Utf8Trans1[] = { +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, +  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; +#endif + +/* +** Return the value of the first UTF-8 character in the string. +*/ +static int utf8Read(const unsigned char *z, int n, int *pSize){ +  int c, i; + +  /* All callers to this routine (in the current implementation) +  ** always have n>0. */ +  if( NEVER(n==0) ){ +    c = i = 0; +  }else{ +    c = z[0]; +    i = 1; +    if( c>=0xc0 ){ +      c = sqlite3Utf8Trans1[c-0xc0]; +      while( i<n && (z[i] & 0xc0)==0x80 ){ +        c = (c<<6) + (0x3f & z[i++]); +      } +    } +  } +  *pSize = i; +  return c; +} + +/* +** Return the number of characters in the utf-8 string in the nIn byte +** buffer pointed to by zIn. +*/ +static int utf8Charlen(const char *zIn, int nIn){ +  int i; +  int nChar = 0; +  for(i=0; i<nIn; nChar++){ +    int sz; +    utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); +    i += sz; +  } +  return nChar; +} + +/* +** Table of translations from unicode characters into ASCII. +*/ +static const struct { + unsigned short int cFrom; + unsigned char cTo0, cTo1; +} translit[] = { +  { 0x00A0,  0x20, 0x00 },  /*   to   */ +  { 0x00B5,  0x75, 0x00 },  /* µ to u */ +  { 0x00C0,  0x41, 0x00 },  /* À to A */ +  { 0x00C1,  0x41, 0x00 },  /* Á to A */ +  { 0x00C2,  0x41, 0x00 },  /*  to A */ +  { 0x00C3,  0x41, 0x00 },  /* à to A */ +  { 0x00C4,  0x41, 0x65 },  /* Ä to Ae */ +  { 0x00C5,  0x41, 0x61 },  /* Å to Aa */ +  { 0x00C6,  0x41, 0x45 },  /* Æ to AE */ +  { 0x00C7,  0x43, 0x00 },  /* Ç to C */ +  { 0x00C8,  0x45, 0x00 },  /* È to E */ +  { 0x00C9,  0x45, 0x00 },  /* É to E */ +  { 0x00CA,  0x45, 0x00 },  /* Ê to E */ +  { 0x00CB,  0x45, 0x00 },  /* Ë to E */ +  { 0x00CC,  0x49, 0x00 },  /* Ì to I */ +  { 0x00CD,  0x49, 0x00 },  /* Í to I */ +  { 0x00CE,  0x49, 0x00 },  /* Î to I */ +  { 0x00CF,  0x49, 0x00 },  /* Ï to I */ +  { 0x00D0,  0x44, 0x00 },  /* Ð to D */ +  { 0x00D1,  0x4E, 0x00 },  /* Ñ to N */ +  { 0x00D2,  0x4F, 0x00 },  /* Ò to O */ +  { 0x00D3,  0x4F, 0x00 },  /* Ó to O */ +  { 0x00D4,  0x4F, 0x00 },  /* Ô to O */ +  { 0x00D5,  0x4F, 0x00 },  /* Õ to O */ +  { 0x00D6,  0x4F, 0x65 },  /* Ö to Oe */ +  { 0x00D7,  0x78, 0x00 },  /* × to x */ +  { 0x00D8,  0x4F, 0x00 },  /* Ø to O */ +  { 0x00D9,  0x55, 0x00 },  /* Ù to U */ +  { 0x00DA,  0x55, 0x00 },  /* Ú to U */ +  { 0x00DB,  0x55, 0x00 },  /* Û to U */ +  { 0x00DC,  0x55, 0x65 },  /* Ü to Ue */ +  { 0x00DD,  0x59, 0x00 },  /* Ý to Y */ +  { 0x00DE,  0x54, 0x68 },  /* Þ to Th */ +  { 0x00DF,  0x73, 0x73 },  /* ß to ss */ +  { 0x00E0,  0x61, 0x00 },  /* à to a */ +  { 0x00E1,  0x61, 0x00 },  /* á to a */ +  { 0x00E2,  0x61, 0x00 },  /* â to a */ +  { 0x00E3,  0x61, 0x00 },  /* ã to a */ +  { 0x00E4,  0x61, 0x65 },  /* ä to ae */ +  { 0x00E5,  0x61, 0x61 },  /* å to aa */ +  { 0x00E6,  0x61, 0x65 },  /* æ to ae */ +  { 0x00E7,  0x63, 0x00 },  /* ç to c */ +  { 0x00E8,  0x65, 0x00 },  /* è to e */ +  { 0x00E9,  0x65, 0x00 },  /* é to e */ +  { 0x00EA,  0x65, 0x00 },  /* ê to e */ +  { 0x00EB,  0x65, 0x00 },  /* ë to e */ +  { 0x00EC,  0x69, 0x00 },  /* ì to i */ +  { 0x00ED,  0x69, 0x00 },  /* í to i */ +  { 0x00EE,  0x69, 0x00 },  /* î to i */ +  { 0x00EF,  0x69, 0x00 },  /* ï to i */ +  { 0x00F0,  0x64, 0x00 },  /* ð to d */ +  { 0x00F1,  0x6E, 0x00 },  /* ñ to n */ +  { 0x00F2,  0x6F, 0x00 },  /* ò to o */ +  { 0x00F3,  0x6F, 0x00 },  /* ó to o */ +  { 0x00F4,  0x6F, 0x00 },  /* ô to o */ +  { 0x00F5,  0x6F, 0x00 },  /* õ to o */ +  { 0x00F6,  0x6F, 0x65 },  /* ö to oe */ +  { 0x00F7,  0x3A, 0x00 },  /* ÷ to : */ +  { 0x00F8,  0x6F, 0x00 },  /* ø to o */ +  { 0x00F9,  0x75, 0x00 },  /* ù to u */ +  { 0x00FA,  0x75, 0x00 },  /* ú to u */ +  { 0x00FB,  0x75, 0x00 },  /* û to u */ +  { 0x00FC,  0x75, 0x65 },  /* ü to ue */ +  { 0x00FD,  0x79, 0x00 },  /* ý to y */ +  { 0x00FE,  0x74, 0x68 },  /* þ to th */ +  { 0x00FF,  0x79, 0x00 },  /* ÿ to y */ +  { 0x0100,  0x41, 0x00 },  /* Ā to A */ +  { 0x0101,  0x61, 0x00 },  /* ā to a */ +  { 0x0102,  0x41, 0x00 },  /* Ă to A */ +  { 0x0103,  0x61, 0x00 },  /* ă to a */ +  { 0x0104,  0x41, 0x00 },  /* Ą to A */ +  { 0x0105,  0x61, 0x00 },  /* ą to a */ +  { 0x0106,  0x43, 0x00 },  /* Ć to C */ +  { 0x0107,  0x63, 0x00 },  /* ć to c */ +  { 0x0108,  0x43, 0x68 },  /* Ĉ to Ch */ +  { 0x0109,  0x63, 0x68 },  /* ĉ to ch */ +  { 0x010A,  0x43, 0x00 },  /* Ċ to C */ +  { 0x010B,  0x63, 0x00 },  /* ċ to c */ +  { 0x010C,  0x43, 0x00 },  /* Č to C */ +  { 0x010D,  0x63, 0x00 },  /* č to c */ +  { 0x010E,  0x44, 0x00 },  /* Ď to D */ +  { 0x010F,  0x64, 0x00 },  /* ď to d */ +  { 0x0110,  0x44, 0x00 },  /* Đ to D */ +  { 0x0111,  0x64, 0x00 },  /* đ to d */ +  { 0x0112,  0x45, 0x00 },  /* Ē to E */ +  { 0x0113,  0x65, 0x00 },  /* ē to e */ +  { 0x0114,  0x45, 0x00 },  /* Ĕ to E */ +  { 0x0115,  0x65, 0x00 },  /* ĕ to e */ +  { 0x0116,  0x45, 0x00 },  /* Ė to E */ +  { 0x0117,  0x65, 0x00 },  /* ė to e */ +  { 0x0118,  0x45, 0x00 },  /* Ę to E */ +  { 0x0119,  0x65, 0x00 },  /* ę to e */ +  { 0x011A,  0x45, 0x00 },  /* Ě to E */ +  { 0x011B,  0x65, 0x00 },  /* ě to e */ +  { 0x011C,  0x47, 0x68 },  /* Ĝ to Gh */ +  { 0x011D,  0x67, 0x68 },  /* ĝ to gh */ +  { 0x011E,  0x47, 0x00 },  /* Ğ to G */ +  { 0x011F,  0x67, 0x00 },  /* ğ to g */ +  { 0x0120,  0x47, 0x00 },  /* Ġ to G */ +  { 0x0121,  0x67, 0x00 },  /* ġ to g */ +  { 0x0122,  0x47, 0x00 },  /* Ģ to G */ +  { 0x0123,  0x67, 0x00 },  /* ģ to g */ +  { 0x0124,  0x48, 0x68 },  /* Ĥ to Hh */ +  { 0x0125,  0x68, 0x68 },  /* ĥ to hh */ +  { 0x0126,  0x48, 0x00 },  /* Ħ to H */ +  { 0x0127,  0x68, 0x00 },  /* ħ to h */ +  { 0x0128,  0x49, 0x00 },  /* Ĩ to I */ +  { 0x0129,  0x69, 0x00 },  /* ĩ to i */ +  { 0x012A,  0x49, 0x00 },  /* Ī to I */ +  { 0x012B,  0x69, 0x00 },  /* ī to i */ +  { 0x012C,  0x49, 0x00 },  /* Ĭ to I */ +  { 0x012D,  0x69, 0x00 },  /* ĭ to i */ +  { 0x012E,  0x49, 0x00 },  /* Į to I */ +  { 0x012F,  0x69, 0x00 },  /* į to i */ +  { 0x0130,  0x49, 0x00 },  /* İ to I */ +  { 0x0131,  0x69, 0x00 },  /* ı to i */ +  { 0x0132,  0x49, 0x4A },  /* IJ to IJ */ +  { 0x0133,  0x69, 0x6A },  /* ij to ij */ +  { 0x0134,  0x4A, 0x68 },  /* Ĵ to Jh */ +  { 0x0135,  0x6A, 0x68 },  /* ĵ to jh */ +  { 0x0136,  0x4B, 0x00 },  /* Ķ to K */ +  { 0x0137,  0x6B, 0x00 },  /* ķ to k */ +  { 0x0138,  0x6B, 0x00 },  /* ĸ to k */ +  { 0x0139,  0x4C, 0x00 },  /* Ĺ to L */ +  { 0x013A,  0x6C, 0x00 },  /* ĺ to l */ +  { 0x013B,  0x4C, 0x00 },  /* Ļ to L */ +  { 0x013C,  0x6C, 0x00 },  /* ļ to l */ +  { 0x013D,  0x4C, 0x00 },  /* Ľ to L */ +  { 0x013E,  0x6C, 0x00 },  /* ľ to l */ +  { 0x013F,  0x4C, 0x2E },  /* Ŀ to L. */ +  { 0x0140,  0x6C, 0x2E },  /* ŀ to l. */ +  { 0x0141,  0x4C, 0x00 },  /* Ł to L */ +  { 0x0142,  0x6C, 0x00 },  /* ł to l */ +  { 0x0143,  0x4E, 0x00 },  /* Ń to N */ +  { 0x0144,  0x6E, 0x00 },  /* ń to n */ +  { 0x0145,  0x4E, 0x00 },  /* Ņ to N */ +  { 0x0146,  0x6E, 0x00 },  /* ņ to n */ +  { 0x0147,  0x4E, 0x00 },  /* Ň to N */ +  { 0x0148,  0x6E, 0x00 },  /* ň to n */ +  { 0x0149,  0x27, 0x6E },  /* ʼn to 'n */ +  { 0x014A,  0x4E, 0x47 },  /* Ŋ to NG */ +  { 0x014B,  0x6E, 0x67 },  /* ŋ to ng */ +  { 0x014C,  0x4F, 0x00 },  /* Ō to O */ +  { 0x014D,  0x6F, 0x00 },  /* ō to o */ +  { 0x014E,  0x4F, 0x00 },  /* Ŏ to O */ +  { 0x014F,  0x6F, 0x00 },  /* ŏ to o */ +  { 0x0150,  0x4F, 0x00 },  /* Ő to O */ +  { 0x0151,  0x6F, 0x00 },  /* ő to o */ +  { 0x0152,  0x4F, 0x45 },  /* Œ to OE */ +  { 0x0153,  0x6F, 0x65 },  /* œ to oe */ +  { 0x0154,  0x52, 0x00 },  /* Ŕ to R */ +  { 0x0155,  0x72, 0x00 },  /* ŕ to r */ +  { 0x0156,  0x52, 0x00 },  /* Ŗ to R */ +  { 0x0157,  0x72, 0x00 },  /* ŗ to r */ +  { 0x0158,  0x52, 0x00 },  /* Ř to R */ +  { 0x0159,  0x72, 0x00 },  /* ř to r */ +  { 0x015A,  0x53, 0x00 },  /* Ś to S */ +  { 0x015B,  0x73, 0x00 },  /* ś to s */ +  { 0x015C,  0x53, 0x68 },  /* Ŝ to Sh */ +  { 0x015D,  0x73, 0x68 },  /* ŝ to sh */ +  { 0x015E,  0x53, 0x00 },  /* Ş to S */ +  { 0x015F,  0x73, 0x00 },  /* ş to s */ +  { 0x0160,  0x53, 0x00 },  /* Š to S */ +  { 0x0161,  0x73, 0x00 },  /* š to s */ +  { 0x0162,  0x54, 0x00 },  /* Ţ to T */ +  { 0x0163,  0x74, 0x00 },  /* ţ to t */ +  { 0x0164,  0x54, 0x00 },  /* Ť to T */ +  { 0x0165,  0x74, 0x00 },  /* ť to t */ +  { 0x0166,  0x54, 0x00 },  /* Ŧ to T */ +  { 0x0167,  0x74, 0x00 },  /* ŧ to t */ +  { 0x0168,  0x55, 0x00 },  /* Ũ to U */ +  { 0x0169,  0x75, 0x00 },  /* ũ to u */ +  { 0x016A,  0x55, 0x00 },  /* Ū to U */ +  { 0x016B,  0x75, 0x00 },  /* ū to u */ +  { 0x016C,  0x55, 0x00 },  /* Ŭ to U */ +  { 0x016D,  0x75, 0x00 },  /* ŭ to u */ +  { 0x016E,  0x55, 0x00 },  /* Ů to U */ +  { 0x016F,  0x75, 0x00 },  /* ů to u */ +  { 0x0170,  0x55, 0x00 },  /* Ű to U */ +  { 0x0171,  0x75, 0x00 },  /* ű to u */ +  { 0x0172,  0x55, 0x00 },  /* Ų to U */ +  { 0x0173,  0x75, 0x00 },  /* ų to u */ +  { 0x0174,  0x57, 0x00 },  /* Ŵ to W */ +  { 0x0175,  0x77, 0x00 },  /* ŵ to w */ +  { 0x0176,  0x59, 0x00 },  /* Ŷ to Y */ +  { 0x0177,  0x79, 0x00 },  /* ŷ to y */ +  { 0x0178,  0x59, 0x00 },  /* Ÿ to Y */ +  { 0x0179,  0x5A, 0x00 },  /* Ź to Z */ +  { 0x017A,  0x7A, 0x00 },  /* ź to z */ +  { 0x017B,  0x5A, 0x00 },  /* Ż to Z */ +  { 0x017C,  0x7A, 0x00 },  /* ż to z */ +  { 0x017D,  0x5A, 0x00 },  /* Ž to Z */ +  { 0x017E,  0x7A, 0x00 },  /* ž to z */ +  { 0x017F,  0x73, 0x00 },  /* ſ to s */ +  { 0x0192,  0x66, 0x00 },  /* ƒ to f */ +  { 0x0218,  0x53, 0x00 },  /* Ș to S */ +  { 0x0219,  0x73, 0x00 },  /* ș to s */ +  { 0x021A,  0x54, 0x00 },  /* Ț to T */ +  { 0x021B,  0x74, 0x00 },  /* ț to t */ +  { 0x0386,  0x41, 0x00 },  /* Ά to A */ +  { 0x0388,  0x45, 0x00 },  /* Έ to E */ +  { 0x0389,  0x49, 0x00 },  /* Ή to I */ +  { 0x038A,  0x49, 0x00 },  /* Ί to I */ +  { 0x038C,  0x4f, 0x00 },  /* Ό to O */ +  { 0x038E,  0x59, 0x00 },  /* Ύ to Y */ +  { 0x038F,  0x4f, 0x00 },  /* Ώ to O */ +  { 0x0390,  0x69, 0x00 },  /* ΐ to i */ +  { 0x0391,  0x41, 0x00 },  /* Α to A */ +  { 0x0392,  0x42, 0x00 },  /* Β to B */ +  { 0x0393,  0x47, 0x00 },  /* Γ to G */ +  { 0x0394,  0x44, 0x00 },  /* Δ to D */ +  { 0x0395,  0x45, 0x00 },  /* Ε to E */ +  { 0x0396,  0x5a, 0x00 },  /* Ζ to Z */ +  { 0x0397,  0x49, 0x00 },  /* Η to I */ +  { 0x0398,  0x54, 0x68 },  /* Θ to Th */ +  { 0x0399,  0x49, 0x00 },  /* Ι to I */ +  { 0x039A,  0x4b, 0x00 },  /* Κ to K */ +  { 0x039B,  0x4c, 0x00 },  /* Λ to L */ +  { 0x039C,  0x4d, 0x00 },  /* Μ to M */ +  { 0x039D,  0x4e, 0x00 },  /* Ν to N */ +  { 0x039E,  0x58, 0x00 },  /* Ξ to X */ +  { 0x039F,  0x4f, 0x00 },  /* Ο to O */ +  { 0x03A0,  0x50, 0x00 },  /* Π to P */ +  { 0x03A1,  0x52, 0x00 },  /* Ρ to R */ +  { 0x03A3,  0x53, 0x00 },  /* Σ to S */ +  { 0x03A4,  0x54, 0x00 },  /* Τ to T */ +  { 0x03A5,  0x59, 0x00 },  /* Υ to Y */ +  { 0x03A6,  0x46, 0x00 },  /* Φ to F */ +  { 0x03A7,  0x43, 0x68 },  /* Χ to Ch */ +  { 0x03A8,  0x50, 0x73 },  /* Ψ to Ps */ +  { 0x03A9,  0x4f, 0x00 },  /* Ω to O */ +  { 0x03AA,  0x49, 0x00 },  /* Ϊ to I */ +  { 0x03AB,  0x59, 0x00 },  /* Ϋ to Y */ +  { 0x03AC,  0x61, 0x00 },  /* ά to a */ +  { 0x03AD,  0x65, 0x00 },  /* έ to e */ +  { 0x03AE,  0x69, 0x00 },  /* ή to i */ +  { 0x03AF,  0x69, 0x00 },  /* ί to i */ +  { 0x03B1,  0x61, 0x00 },  /* α to a */ +  { 0x03B2,  0x62, 0x00 },  /* β to b */ +  { 0x03B3,  0x67, 0x00 },  /* γ to g */ +  { 0x03B4,  0x64, 0x00 },  /* δ to d */ +  { 0x03B5,  0x65, 0x00 },  /* ε to e */ +  { 0x03B6,  0x7a, 0x00 },  /* ζ to z */ +  { 0x03B7,  0x69, 0x00 },  /* η to i */ +  { 0x03B8,  0x74, 0x68 },  /* θ to th */ +  { 0x03B9,  0x69, 0x00 },  /* ι to i */ +  { 0x03BA,  0x6b, 0x00 },  /* κ to k */ +  { 0x03BB,  0x6c, 0x00 },  /* λ to l */ +  { 0x03BC,  0x6d, 0x00 },  /* μ to m */ +  { 0x03BD,  0x6e, 0x00 },  /* ν to n */ +  { 0x03BE,  0x78, 0x00 },  /* ξ to x */ +  { 0x03BF,  0x6f, 0x00 },  /* ο to o */ +  { 0x03C0,  0x70, 0x00 },  /* π to p */ +  { 0x03C1,  0x72, 0x00 },  /* ρ to r */ +  { 0x03C3,  0x73, 0x00 },  /* σ to s */ +  { 0x03C4,  0x74, 0x00 },  /* τ to t */ +  { 0x03C5,  0x79, 0x00 },  /* υ to y */ +  { 0x03C6,  0x66, 0x00 },  /* φ to f */ +  { 0x03C7,  0x63, 0x68 },  /* χ to ch */ +  { 0x03C8,  0x70, 0x73 },  /* ψ to ps */ +  { 0x03C9,  0x6f, 0x00 },  /* ω to o */ +  { 0x03CA,  0x69, 0x00 },  /* ϊ to i */ +  { 0x03CB,  0x79, 0x00 },  /* ϋ to y */ +  { 0x03CC,  0x6f, 0x00 },  /* ό to o */ +  { 0x03CD,  0x79, 0x00 },  /* ύ to y */ +  { 0x03CE,  0x69, 0x00 },  /* ώ to i */ +  { 0x0400,  0x45, 0x00 },  /* Ѐ to E */ +  { 0x0401,  0x45, 0x00 },  /* Ё to E */ +  { 0x0402,  0x44, 0x00 },  /* Ђ to D */ +  { 0x0403,  0x47, 0x00 },  /* Ѓ to G */ +  { 0x0404,  0x45, 0x00 },  /* Є to E */ +  { 0x0405,  0x5a, 0x00 },  /* Ѕ to Z */ +  { 0x0406,  0x49, 0x00 },  /* І to I */ +  { 0x0407,  0x49, 0x00 },  /* Ї to I */ +  { 0x0408,  0x4a, 0x00 },  /* Ј to J */ +  { 0x0409,  0x49, 0x00 },  /* Љ to I */ +  { 0x040A,  0x4e, 0x00 },  /* Њ to N */ +  { 0x040B,  0x44, 0x00 },  /* Ћ to D */ +  { 0x040C,  0x4b, 0x00 },  /* Ќ to K */ +  { 0x040D,  0x49, 0x00 },  /* Ѝ to I */ +  { 0x040E,  0x55, 0x00 },  /* Ў to U */ +  { 0x040F,  0x44, 0x00 },  /* Џ to D */ +  { 0x0410,  0x41, 0x00 },  /* А to A */ +  { 0x0411,  0x42, 0x00 },  /* Б to B */ +  { 0x0412,  0x56, 0x00 },  /* В to V */ +  { 0x0413,  0x47, 0x00 },  /* Г to G */ +  { 0x0414,  0x44, 0x00 },  /* Д to D */ +  { 0x0415,  0x45, 0x00 },  /* Е to E */ +  { 0x0416,  0x5a, 0x68 },  /* Ж to Zh */ +  { 0x0417,  0x5a, 0x00 },  /* З to Z */ +  { 0x0418,  0x49, 0x00 },  /* И to I */ +  { 0x0419,  0x49, 0x00 },  /* Й to I */ +  { 0x041A,  0x4b, 0x00 },  /* К to K */ +  { 0x041B,  0x4c, 0x00 },  /* Л to L */ +  { 0x041C,  0x4d, 0x00 },  /* М to M */ +  { 0x041D,  0x4e, 0x00 },  /* Н to N */ +  { 0x041E,  0x4f, 0x00 },  /* О to O */ +  { 0x041F,  0x50, 0x00 },  /* П to P */ +  { 0x0420,  0x52, 0x00 },  /* Р to R */ +  { 0x0421,  0x53, 0x00 },  /* С to S */ +  { 0x0422,  0x54, 0x00 },  /* Т to T */ +  { 0x0423,  0x55, 0x00 },  /* У to U */ +  { 0x0424,  0x46, 0x00 },  /* Ф to F */ +  { 0x0425,  0x4b, 0x68 },  /* Х to Kh */ +  { 0x0426,  0x54, 0x63 },  /* Ц to Tc */ +  { 0x0427,  0x43, 0x68 },  /* Ч to Ch */ +  { 0x0428,  0x53, 0x68 },  /* Ш to Sh */ +  { 0x0429,  0x53, 0x68 },  /* Щ to Shch */ +  { 0x042A,  0x61, 0x00 },  /*  to A */ +  { 0x042B,  0x59, 0x00 },  /* Ы to Y */ +  { 0x042C,  0x59, 0x00 },  /*  to Y */ +  { 0x042D,  0x45, 0x00 },  /* Э to E */ +  { 0x042E,  0x49, 0x75 },  /* Ю to Iu */ +  { 0x042F,  0x49, 0x61 },  /* Я to Ia */ +  { 0x0430,  0x61, 0x00 },  /* а to a */ +  { 0x0431,  0x62, 0x00 },  /* б to b */ +  { 0x0432,  0x76, 0x00 },  /* в to v */ +  { 0x0433,  0x67, 0x00 },  /* г to g */ +  { 0x0434,  0x64, 0x00 },  /* д to d */ +  { 0x0435,  0x65, 0x00 },  /* е to e */ +  { 0x0436,  0x7a, 0x68 },  /* ж to zh */ +  { 0x0437,  0x7a, 0x00 },  /* з to z */ +  { 0x0438,  0x69, 0x00 },  /* и to i */ +  { 0x0439,  0x69, 0x00 },  /* й to i */ +  { 0x043A,  0x6b, 0x00 },  /* к to k */ +  { 0x043B,  0x6c, 0x00 },  /* л to l */ +  { 0x043C,  0x6d, 0x00 },  /* м to m */ +  { 0x043D,  0x6e, 0x00 },  /* н to n */ +  { 0x043E,  0x6f, 0x00 },  /* о to o */ +  { 0x043F,  0x70, 0x00 },  /* п to p */ +  { 0x0440,  0x72, 0x00 },  /* р to r */ +  { 0x0441,  0x73, 0x00 },  /* с to s */ +  { 0x0442,  0x74, 0x00 },  /* т to t */ +  { 0x0443,  0x75, 0x00 },  /* у to u */ +  { 0x0444,  0x66, 0x00 },  /* ф to f */ +  { 0x0445,  0x6b, 0x68 },  /* х to kh */ +  { 0x0446,  0x74, 0x63 },  /* ц to tc */ +  { 0x0447,  0x63, 0x68 },  /* ч to ch */ +  { 0x0448,  0x73, 0x68 },  /* ш to sh */ +  { 0x0449,  0x73, 0x68 },  /* щ to shch */ +  { 0x044A,  0x61, 0x00 },  /*  to a */ +  { 0x044B,  0x79, 0x00 },  /* ы to y */ +  { 0x044C,  0x79, 0x00 },  /*  to y */ +  { 0x044D,  0x65, 0x00 },  /* э to e */ +  { 0x044E,  0x69, 0x75 },  /* ю to iu */ +  { 0x044F,  0x69, 0x61 },  /* я to ia */ +  { 0x0450,  0x65, 0x00 },  /* ѐ to e */ +  { 0x0451,  0x65, 0x00 },  /* ё to e */ +  { 0x0452,  0x64, 0x00 },  /* ђ to d */ +  { 0x0453,  0x67, 0x00 },  /* ѓ to g */ +  { 0x0454,  0x65, 0x00 },  /* є to e */ +  { 0x0455,  0x7a, 0x00 },  /* ѕ to z */ +  { 0x0456,  0x69, 0x00 },  /* і to i */ +  { 0x0457,  0x69, 0x00 },  /* ї to i */ +  { 0x0458,  0x6a, 0x00 },  /* ј to j */ +  { 0x0459,  0x69, 0x00 },  /* љ to i */ +  { 0x045A,  0x6e, 0x00 },  /* њ to n */ +  { 0x045B,  0x64, 0x00 },  /* ћ to d */ +  { 0x045C,  0x6b, 0x00 },  /* ќ to k */ +  { 0x045D,  0x69, 0x00 },  /* ѝ to i */ +  { 0x045E,  0x75, 0x00 },  /* ў to u */ +  { 0x045F,  0x64, 0x00 },  /* џ to d */ +  { 0x1E02,  0x42, 0x00 },  /* Ḃ to B */ +  { 0x1E03,  0x62, 0x00 },  /* ḃ to b */ +  { 0x1E0A,  0x44, 0x00 },  /* Ḋ to D */ +  { 0x1E0B,  0x64, 0x00 },  /* ḋ to d */ +  { 0x1E1E,  0x46, 0x00 },  /* Ḟ to F */ +  { 0x1E1F,  0x66, 0x00 },  /* ḟ to f */ +  { 0x1E40,  0x4D, 0x00 },  /* Ṁ to M */ +  { 0x1E41,  0x6D, 0x00 },  /* ṁ to m */ +  { 0x1E56,  0x50, 0x00 },  /* Ṗ to P */ +  { 0x1E57,  0x70, 0x00 },  /* ṗ to p */ +  { 0x1E60,  0x53, 0x00 },  /* Ṡ to S */ +  { 0x1E61,  0x73, 0x00 },  /* ṡ to s */ +  { 0x1E6A,  0x54, 0x00 },  /* Ṫ to T */ +  { 0x1E6B,  0x74, 0x00 },  /* ṫ to t */ +  { 0x1E80,  0x57, 0x00 },  /* Ẁ to W */ +  { 0x1E81,  0x77, 0x00 },  /* ẁ to w */ +  { 0x1E82,  0x57, 0x00 },  /* Ẃ to W */ +  { 0x1E83,  0x77, 0x00 },  /* ẃ to w */ +  { 0x1E84,  0x57, 0x00 },  /* Ẅ to W */ +  { 0x1E85,  0x77, 0x00 },  /* ẅ to w */ +  { 0x1EF2,  0x59, 0x00 },  /* Ỳ to Y */ +  { 0x1EF3,  0x79, 0x00 },  /* ỳ to y */ +  { 0xFB00,  0x66, 0x66 },  /* ff to ff */ +  { 0xFB01,  0x66, 0x69 },  /* fi to fi */ +  { 0xFB02,  0x66, 0x6C },  /* fl to fl */ +  { 0xFB05,  0x73, 0x74 },  /* ſt to st */ +  { 0xFB06,  0x73, 0x74 },  /* st to st */ +}; + +/* +** Convert the input string from UTF-8 into pure ASCII by converting +** all non-ASCII characters to some combination of characters in the +** ASCII subset. +** +** The returned string might contain more characters than the input. +** +** Space to hold the returned string comes from sqlite3_malloc() and +** should be freed by the caller. +*/ +static unsigned char *transliterate(const unsigned char *zIn, int nIn){ +  unsigned char *zOut = sqlite3_malloc( nIn*4 + 1 ); +  int c, sz, nOut; +  if( zOut==0 ) return 0; +  nOut = 0; +  while( nIn>0 ){ +    c = utf8Read(zIn, nIn, &sz); +    zIn += sz; +    nIn -= sz; +    if( c<=127 ){ +      zOut[nOut++] = c; +    }else{ +      int xTop, xBtm, x; +      xTop = sizeof(translit)/sizeof(translit[0]) - 1; +      xBtm = 0; +      while( xTop>=xBtm ){ +        x = (xTop + xBtm)/2; +        if( translit[x].cFrom==c ){ +          zOut[nOut++] = translit[x].cTo0; +          if( translit[x].cTo1 ){ +            zOut[nOut++] = translit[x].cTo1; +            /* Add an extra "ch" after the "sh" for Щ and щ */ +            if( c==0x0429 || c== 0x0449 ){ +              zOut[nOut++] = 'c'; +              zOut[nOut++] = 'h'; +            } +          } +          c = 0; +          break; +        }else if( translit[x].cFrom>c ){ +          xTop = x-1; +        }else{ +          xBtm = x+1; +        } +      } +      if( c ) zOut[nOut++] = '?'; +    } +  } +  zOut[nOut] = 0; +  return zOut; +} + +/* +** Return the number of characters in the shortest prefix of the input +** string that transliterates to an ASCII string nTrans bytes or longer. +** Or, if the transliteration of the input string is less than nTrans +** bytes in size, return the number of characters in the input string. +*/ +static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ +  int i, c, sz, nOut; +  int nChar; + +  i = nOut = 0; +  for(nChar=0; i<nIn && nOut<nTrans; nChar++){ +    c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); +    i += sz; + +    nOut++; +    if( c>=128 ){ +      int xTop, xBtm, x; +      xTop = sizeof(translit)/sizeof(translit[0]) - 1; +      xBtm = 0; +      while( xTop>=xBtm ){ +        x = (xTop + xBtm)/2; +        if( translit[x].cFrom==c ){ +          if( translit[x].cTo1 ) nOut++; +          if( c==0x0429 || c== 0x0449 ) nOut += 2; +          break; +        }else if( translit[x].cFrom>c ){ +          xTop = x-1; +        }else{ +          xBtm = x+1; +        } +      } +    } +  } + +  return nChar; +} + + +/* +**    spellfix1_translit(X) +** +** Convert a string that contains non-ASCII Roman characters into  +** pure ASCII. +*/ +static void transliterateSqlFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  const unsigned char *zIn = sqlite3_value_text(argv[0]); +  int nIn = sqlite3_value_bytes(argv[0]); +  unsigned char *zOut = transliterate(zIn, nIn); +  if( zOut==0 ){ +    sqlite3_result_error_nomem(context); +  }else{ +    sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); +  } +} + +/* +**    spellfix1_scriptcode(X) +** +** Try to determine the dominant script used by the word X and return +** its ISO 15924 numeric code. +** +** The current implementation only understands the following scripts: +** +**    215  (Latin) +**    220  (Cyrillic) +**    200  (Greek) +** +** This routine will return 998 if the input X contains characters from +** two or more of the above scripts or 999 if X contains no characters +** from any of the above scripts. +*/ +static void scriptCodeSqlFunc( +  sqlite3_context *context, +  int argc, +  sqlite3_value **argv +){ +  const unsigned char *zIn = sqlite3_value_text(argv[0]); +  int nIn = sqlite3_value_bytes(argv[0]); +  int c, sz; +  int scriptMask = 0; +  int res; +# define SCRIPT_LATIN       0x0001 +# define SCRIPT_CYRILLIC    0x0002 +# define SCRIPT_GREEK       0x0004 + +  while( nIn>0 ){ +    c = utf8Read(zIn, nIn, &sz); +    zIn += sz; +    nIn -= sz; +    if( c<0x02af ){ +      scriptMask |= SCRIPT_LATIN; +    }else if( c>=0x0400 && c<=0x04ff ){ +      scriptMask |= SCRIPT_CYRILLIC; +    }else if( c>=0x0386 && c<=0x03ce ){ +      scriptMask |= SCRIPT_GREEK; +    } +  } +  switch( scriptMask ){ +    case 0:                res = 999; break; +    case SCRIPT_LATIN:     res = 215; break; +    case SCRIPT_CYRILLIC:  res = 220; break; +    case SCRIPT_GREEK:     res = 200; break; +    default:               res = 998; break; +  } +  sqlite3_result_int(context, res); +} + +/* End transliterate +****************************************************************************** +****************************************************************************** +** Begin spellfix1 virtual table. +*/ + +/* Maximum length of a phonehash used for querying the shadow table */ +#define SPELLFIX_MX_HASH  8 + +/* Maximum number of hash strings to examine per query */ +#define SPELLFIX_MX_RUN   1 + +typedef struct spellfix1_vtab spellfix1_vtab; +typedef struct spellfix1_cursor spellfix1_cursor; + +/* Fuzzy-search virtual table object */ +struct spellfix1_vtab { +  sqlite3_vtab base;         /* Base class - must be first */ +  sqlite3 *db;               /* Database connection */ +  char *zDbName;             /* Name of database holding this table */ +  char *zTableName;          /* Name of the virtual table */ +  char *zCostTable;          /* Table holding edit-distance cost numbers */ +  EditDist3Config *pConfig3; /* Parsed edit distance costs */ +}; + +/* Fuzzy-search cursor object */ +struct spellfix1_cursor { +  sqlite3_vtab_cursor base;    /* Base class - must be first */ +  spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */ +  char *zPattern;              /* rhs of MATCH clause */ +  int nRow;                    /* Number of rows of content */ +  int nAlloc;                  /* Number of allocated rows */ +  int iRow;                    /* Current row of content */ +  int iLang;                   /* Value of the langid= constraint */ +  int iTop;                    /* Value of the top= constraint */ +  int iScope;                  /* Value of the scope= constraint */ +  int nSearch;                 /* Number of vocabulary items checked */ +  sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */ +  struct spellfix1_row {       /* For each row of content */ +    sqlite3_int64 iRowid;         /* Rowid for this row */ +    char *zWord;                  /* Text for this row */ +    int iRank;                    /* Rank for this row */ +    int iDistance;                /* Distance from pattern for this row */ +    int iScore;                   /* Score for sorting */ +    int iMatchlen;                /* Value of matchlen column (or -1) */ +    char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */ +  } *a;  +}; + +/* +** Construct one or more SQL statements from the format string given +** and then evaluate those statements. The success code is written +** into *pRc. +** +** If *pRc is initially non-zero then this routine is a no-op. +*/ +static void spellfix1DbExec( +  int *pRc,              /* Success code */ +  sqlite3 *db,           /* Database in which to run SQL */ +  const char *zFormat,   /* Format string for SQL */ +  ...                    /* Arguments to the format string */ +){ +  va_list ap; +  char *zSql; +  if( *pRc ) return; +  va_start(ap, zFormat); +  zSql = sqlite3_vmprintf(zFormat, ap); +  va_end(ap); +  if( zSql==0 ){ +    *pRc = SQLITE_NOMEM; +  }else{ +    *pRc = sqlite3_exec(db, zSql, 0, 0, 0); +    sqlite3_free(zSql); +  } +} + +/* +** xDisconnect/xDestroy method for the fuzzy-search module. +*/ +static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){ +  spellfix1_vtab *p = (spellfix1_vtab*)pVTab; +  int rc = SQLITE_OK; +  if( isDestroy ){ +    sqlite3 *db = p->db; +    spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"", +                  p->zDbName, p->zTableName); +  } +  if( rc==SQLITE_OK ){ +    sqlite3_free(p->zTableName); +    editDist3ConfigDelete(p->pConfig3); +    sqlite3_free(p->zCostTable); +    sqlite3_free(p); +  } +  return rc; +} +static int spellfix1Disconnect(sqlite3_vtab *pVTab){ +  return spellfix1Uninit(0, pVTab); +} +static int spellfix1Destroy(sqlite3_vtab *pVTab){ +  return spellfix1Uninit(1, pVTab); +} + +/* +** Make a copy of a string.  Remove leading and trailing whitespace +** and dequote it. +*/ +static char *spellfix1Dequote(const char *zIn){ +  char *zOut; +  int i, j; +  char c; +  while( isspace(zIn[0]) ) zIn++; +  zOut = sqlite3_mprintf("%s", zIn); +  if( zOut==0 ) return 0; +  i = (int)strlen(zOut); +#if 0  /* The parser will never leave spaces at the end */ +  while( i>0 && isspace(zOut[i-1]) ){ i--; } +#endif +  zOut[i] = 0; +  c = zOut[0]; +  if( c=='\'' || c=='"' ){ +    for(i=1, j=0; ALWAYS(zOut[i]); i++){ +      zOut[j++] = zOut[i]; +      if( zOut[i]==c ){ +        if( zOut[i+1]==c ){ +          i++; +        }else{ +          zOut[j-1] = 0; +          break; +        } +      } +    } +  } +  return zOut; +} + + +/* +** xConnect/xCreate method for the spellfix1 module. Arguments are: +** +**   argv[0]   -> module name  ("spellfix1") +**   argv[1]   -> database name +**   argv[2]   -> table name +**   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter) +*/ +static int spellfix1Init( +  int isCreate, +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVTab, +  char **pzErr +){ +  spellfix1_vtab *pNew = 0; +  const char *zModule = argv[0]; +  const char *zDbName = argv[1]; +  const char *zTableName = argv[2]; +  int nDbName; +  int rc = SQLITE_OK; +  int i; + +  nDbName = (int)strlen(zDbName); +  pNew = sqlite3_malloc( sizeof(*pNew) + nDbName + 1); +  if( pNew==0 ){ +    rc = SQLITE_NOMEM; +  }else{ +    memset(pNew, 0, sizeof(*pNew)); +    pNew->zDbName = (char*)&pNew[1]; +    memcpy(pNew->zDbName, zDbName, nDbName+1); +    pNew->zTableName = sqlite3_mprintf("%s", zTableName); +    pNew->db = db; +    if( pNew->zTableName==0 ){ +      rc = SQLITE_NOMEM; +    }else{ +      rc = sqlite3_declare_vtab(db,  +           "CREATE TABLE x(word,rank,distance,langid, " +           "score, matchlen, phonehash HIDDEN, " +           "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, " +           "soundslike HIDDEN, command HIDDEN)" +      ); +#define SPELLFIX_COL_WORD            0 +#define SPELLFIX_COL_RANK            1 +#define SPELLFIX_COL_DISTANCE        2 +#define SPELLFIX_COL_LANGID          3 +#define SPELLFIX_COL_SCORE           4 +#define SPELLFIX_COL_MATCHLEN        5 +#define SPELLFIX_COL_PHONEHASH       6 +#define SPELLFIX_COL_TOP             7 +#define SPELLFIX_COL_SCOPE           8 +#define SPELLFIX_COL_SRCHCNT         9 +#define SPELLFIX_COL_SOUNDSLIKE     10 +#define SPELLFIX_COL_COMMAND        11 +    } +    if( rc==SQLITE_OK && isCreate ){ +      sqlite3_uint64 r; +      spellfix1DbExec(&rc, db, +         "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" +         "  id INTEGER PRIMARY KEY,\n" +         "  rank INT,\n" +         "  langid INT,\n" +         "  word TEXT,\n" +         "  k1 TEXT,\n" +         "  k2 TEXT\n" +         ");\n", +         zDbName, zTableName +      ); +      sqlite3_randomness(sizeof(r), &r); +      spellfix1DbExec(&rc, db, +         "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_index_%llx\" " +            "ON \"%w_vocab\"(langid,k2);", +         zDbName, zModule, r, zTableName +      ); +    } +    for(i=3; rc==SQLITE_OK && i<argc; i++){ +      if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){ +        pNew->zCostTable = spellfix1Dequote(&argv[i][16]); +        if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM; +        continue; +      } +      *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]); +      rc = SQLITE_ERROR;  +    } +  } + +  if( rc && pNew ){ +    *ppVTab = 0; +    spellfix1Uninit(0, &pNew->base); +  }else{ +    *ppVTab = (sqlite3_vtab *)pNew; +  } +  return rc; +} + +/* +** The xConnect and xCreate methods +*/ +static int spellfix1Connect( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVTab, +  char **pzErr +){ +  return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr); +} +static int spellfix1Create( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVTab, +  char **pzErr +){ +  return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr); +} + +/* +** Clear all of the content from a cursor. +*/ +static void spellfix1ResetCursor(spellfix1_cursor *pCur){ +  int i; +  for(i=0; i<pCur->nRow; i++){ +    sqlite3_free(pCur->a[i].zWord); +  } +  pCur->nRow = 0; +  pCur->iRow = 0; +  pCur->nSearch = 0; +  if( pCur->pFullScan ){ +    sqlite3_finalize(pCur->pFullScan); +    pCur->pFullScan = 0; +  } +} + +/* +** Resize the cursor to hold up to N rows of content +*/ +static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ +  struct spellfix1_row *aNew; +  assert( N>=pCur->nRow ); +  aNew = sqlite3_realloc(pCur->a, sizeof(pCur->a[0])*N); +  if( aNew==0 && N>0 ){ +    spellfix1ResetCursor(pCur); +    sqlite3_free(pCur->a); +    pCur->nAlloc = 0; +    pCur->a = 0; +  }else{ +    pCur->nAlloc = N; +    pCur->a = aNew; +  } +} + + +/* +** Close a fuzzy-search cursor. +*/ +static int spellfix1Close(sqlite3_vtab_cursor *cur){ +  spellfix1_cursor *pCur = (spellfix1_cursor *)cur; +  spellfix1ResetCursor(pCur); +  spellfix1ResizeCursor(pCur, 0); +  sqlite3_free(pCur->zPattern); +  sqlite3_free(pCur); +  return SQLITE_OK; +} + +/* +** Search for terms of these forms: +** +**   (A)    word MATCH $str +**   (B)    langid == $langid +**   (C)    top = $top +**   (D)    scope = $scope +**   (E)    distance < $distance +**   (F)    distance <= $distance +** +** The plan number is a bit mask formed with these bits: +** +**   0x01   (A) is found +**   0x02   (B) is found +**   0x04   (C) is found +**   0x08   (D) is found +**   0x10   (E) is found +**   0x20   (F) is found +** +** filter.argv[*] values contains $str, $langid, $top, and $scope, +** if specified and in that order. +*/ +static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ +  int iPlan = 0; +  int iLangTerm = -1; +  int iTopTerm = -1; +  int iScopeTerm = -1; +  int iDistTerm = -1; +  int i; +  const struct sqlite3_index_constraint *pConstraint; +  pConstraint = pIdxInfo->aConstraint; +  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ +    if( pConstraint->usable==0 ) continue; + +    /* Terms of the form:  word MATCH $str */ +    if( (iPlan & 1)==0  +     && pConstraint->iColumn==SPELLFIX_COL_WORD +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH +    ){ +      iPlan |= 1; +      pIdxInfo->aConstraintUsage[i].argvIndex = 1; +      pIdxInfo->aConstraintUsage[i].omit = 1; +    } + +    /* Terms of the form:  langid = $langid  */ +    if( (iPlan & 2)==0 +     && pConstraint->iColumn==SPELLFIX_COL_LANGID +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 2; +      iLangTerm = i; +    } + +    /* Terms of the form:  top = $top */ +    if( (iPlan & 4)==0 +     && pConstraint->iColumn==SPELLFIX_COL_TOP +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 4; +      iTopTerm = i; +    } + +    /* Terms of the form:  scope = $scope */ +    if( (iPlan & 8)==0 +     && pConstraint->iColumn==SPELLFIX_COL_SCOPE +     && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ +    ){ +      iPlan |= 8; +      iScopeTerm = i; +    } + +    /* Terms of the form:  distance < $dist or distance <= $dist */ +    if( (iPlan & (16|32))==0 +     && pConstraint->iColumn==SPELLFIX_COL_DISTANCE +     && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT +          || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) +    ){ +      iPlan |= pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ? 16 : 32; +      iDistTerm = i; +    } +  } +  if( iPlan&1 ){ +    int idx = 2; +    pIdxInfo->idxNum = iPlan; +    if( pIdxInfo->nOrderBy==1 +     && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE +     && pIdxInfo->aOrderBy[0].desc==0 +    ){ +      pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */ +    } +    if( iPlan&2 ){ +      pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++; +      pIdxInfo->aConstraintUsage[iLangTerm].omit = 1; +    } +    if( iPlan&4 ){ +      pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++; +      pIdxInfo->aConstraintUsage[iTopTerm].omit = 1; +    } +    if( iPlan&8 ){ +      pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++; +      pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1; +    } +    if( iPlan&(16|32) ){ +      pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++; +      pIdxInfo->aConstraintUsage[iDistTerm].omit = 1; +    } +    pIdxInfo->estimatedCost = (double)10000; +  }else{ +    pIdxInfo->idxNum = 0; +    pIdxInfo->estimatedCost = (double)10000000; +  } +  return SQLITE_OK; +} + +/* +** Open a new fuzzy-search cursor. +*/ +static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ +  spellfix1_vtab *p = (spellfix1_vtab*)pVTab; +  spellfix1_cursor *pCur; +  pCur = sqlite3_malloc( sizeof(*pCur) ); +  if( pCur==0 ) return SQLITE_NOMEM; +  memset(pCur, 0, sizeof(*pCur)); +  pCur->pVTab = p; +  *ppCursor = &pCur->base; +  return SQLITE_OK; +} + +/* +** Adjust a distance measurement by the words rank in order to show +** preference to common words. +*/ +static int spellfix1Score(int iDistance, int iRank){ +  int iLog2; +  for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} +  return iDistance + 32 - iLog2; +} + +/* +** Compare two spellfix1_row objects for sorting purposes in qsort() such +** that they sort in order of increasing distance. +*/ +static int spellfix1RowCompare(const void *A, const void *B){ +  const struct spellfix1_row *a = (const struct spellfix1_row*)A; +  const struct spellfix1_row *b = (const struct spellfix1_row*)B; +  return a->iScore - b->iScore; +} + +/* +** A structure used to pass information from spellfix1FilterForMatch() +** into spellfix1RunQuery(). +*/ +typedef struct MatchQuery { +  spellfix1_cursor *pCur;          /* The cursor being queried */ +  sqlite3_stmt *pStmt;             /* shadow table query statment */ +  char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */ +  const char *zPattern;            /* Transliterated input string */ +  int nPattern;                    /* Length of zPattern */ +  EditDist3FromString *pMatchStr3; /* Original unicode string */ +  EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */ +  const EditDist3Lang *pLang;      /* The selected language coefficients */ +  int iLang;                       /* The language id */ +  int iScope;                      /* Default scope */ +  int iMaxDist;                    /* Maximum allowed edit distance, or -1 */ +  int rc;                          /* Error code */ +  int nRun;                  /* Number of prior runs for the same zPattern */ +  char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */ +} MatchQuery; + +/* +** Run a query looking for the best matches against zPattern using +** zHash as the character class seed hash. +*/ +static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){ +  const char *zK1; +  const char *zWord; +  int iDist; +  int iRank; +  int iScore; +  int iWorst = 0; +  int idx; +  int idxWorst = -1; +  int i; +  int iScope = p->iScope; +  spellfix1_cursor *pCur = p->pCur; +  sqlite3_stmt *pStmt = p->pStmt; +  char zHash1[SPELLFIX_MX_HASH]; +  char zHash2[SPELLFIX_MX_HASH]; +  char *zClass; +  int nClass; +  int rc; + +  if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */ +  zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery); +  if( zClass==0 ){ +    p->rc = SQLITE_NOMEM; +    return; +  } +  nClass = (int)strlen(zClass); +  if( nClass>SPELLFIX_MX_HASH-2 ){ +    nClass = SPELLFIX_MX_HASH-2; +    zClass[nClass] = 0; +  } +  if( nClass<=iScope ){ +    if( nClass>2 ){ +      iScope = nClass-1; +    }else{ +      iScope = nClass; +    } +  } +  memcpy(zHash1, zClass, iScope); +  sqlite3_free(zClass); +  zHash1[iScope] = 0; +  memcpy(zHash2, zHash1, iScope); +  zHash2[iScope] = 'Z'; +  zHash2[iScope+1] = 0; +#if SPELLFIX_MX_RUN>1 +  for(i=0; i<p->nRun; i++){ +    if( strcmp(p->azPrior[i], zHash1)==0 ) return; +  } +#endif +  assert( p->nRun<SPELLFIX_MX_RUN ); +  memcpy(p->azPrior[p->nRun++], zHash1, iScope+1); +  if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM +   || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM +  ){ +    p->rc = SQLITE_NOMEM; +    return; +  } +#if SPELLFIX_MX_RUN>1 +  for(i=0; i<pCur->nRow; i++){ +    if( pCur->a[i].iScore>iWorst ){ +      iWorst = pCur->a[i].iScore; +      idxWorst = i; +    } +  } +#endif +  while( sqlite3_step(pStmt)==SQLITE_ROW ){ +    int iMatchlen = -1; +    iRank = sqlite3_column_int(pStmt, 2); +    if( p->pMatchStr3 ){ +      int nWord = sqlite3_column_bytes(pStmt, 1); +      zWord = (const char*)sqlite3_column_text(pStmt, 1); +      iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen); +    }else{ +      zK1 = (const char*)sqlite3_column_text(pStmt, 3); +      if( zK1==0 ) continue; +      iDist = editdist1(p->zPattern, zK1, 0); +    } +    if( iDist<0 ){ +      p->rc = SQLITE_NOMEM; +      break; +    } +    pCur->nSearch++; +    iScore = spellfix1Score(iDist,iRank); +    if( p->iMaxDist>=0 ){ +      if( iDist>p->iMaxDist ) continue; +      if( pCur->nRow>=pCur->nAlloc-1 ){ +        spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10); +        if( pCur->a==0 ) break; +      } +      idx = pCur->nRow; +    }else if( pCur->nRow<pCur->nAlloc ){ +      idx = pCur->nRow; +    }else if( iScore<iWorst ){ +      idx = idxWorst; +      sqlite3_free(pCur->a[idx].zWord); +    }else{ +      continue; +    } +    pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); +    if( pCur->a[idx].zWord==0 ){ +      p->rc = SQLITE_NOMEM; +      break; +    } +    pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); +    pCur->a[idx].iRank = iRank; +    pCur->a[idx].iDistance = iDist; +    pCur->a[idx].iScore = iScore; +    pCur->a[idx].iMatchlen = iMatchlen; +    memcpy(pCur->a[idx].zHash, zHash1, iScope+1); +    if( pCur->nRow<pCur->nAlloc ) pCur->nRow++; +    if( pCur->nRow==pCur->nAlloc ){ +      iWorst = pCur->a[0].iScore; +      idxWorst = 0; +      for(i=1; i<pCur->nRow; i++){ +        iScore = pCur->a[i].iScore; +        if( iWorst<iScore ){ +          iWorst = iScore; +          idxWorst = i; +        } +      } +    } +  } +  rc = sqlite3_reset(pStmt); +  if( rc ) p->rc = rc; +} + +/* +** This version of the xFilter method work if the MATCH term is present +** and we are doing a scan. +*/ +static int spellfix1FilterForMatch( +  spellfix1_cursor *pCur, +  int idxNum, +  int argc, +  sqlite3_value **argv +){ +  const unsigned char *zMatchThis;   /* RHS of the MATCH operator */ +  EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */ +  char *zPattern;                    /* Transliteration of zMatchThis */ +  int nPattern;                      /* Length of zPattern */ +  int iLimit = 20;                   /* Max number of rows of output */ +  int iScope = 3;                    /* Use this many characters of zClass */ +  int iLang = 0;                     /* Language code */ +  char *zSql;                        /* SQL of shadow table query */ +  sqlite3_stmt *pStmt = 0;           /* Shadow table query */ +  int rc;                            /* Result code */ +  int idx = 1;                       /* Next available filter parameter */ +  spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */ +  MatchQuery x;                      /* For passing info to RunQuery() */ + +  /* Load the cost table if we have not already done so */ +  if( p->zCostTable!=0 && p->pConfig3==0 ){ +    p->pConfig3 = sqlite3_malloc( sizeof(p->pConfig3[0]) ); +    if( p->pConfig3==0 ) return SQLITE_NOMEM; +    memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); +    rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); +    if( rc ) return rc; +  } +  memset(&x, 0, sizeof(x)); +  x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */ +  x.iMaxDist = -1;   /* Maximum allowed edit distance */ + +  if( idxNum&2 ){ +    iLang = sqlite3_value_int(argv[idx++]); +  } +  if( idxNum&4 ){ +    iLimit = sqlite3_value_int(argv[idx++]); +    if( iLimit<1 ) iLimit = 1; +  } +  if( idxNum&8 ){ +    x.iScope = sqlite3_value_int(argv[idx++]); +    if( x.iScope<1 ) x.iScope = 1; +    if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2; +  } +  if( idxNum&(16|32) ){ +    x.iMaxDist = sqlite3_value_int(argv[idx++]); +    if( idxNum&16 ) x.iMaxDist--; +    if( x.iMaxDist<0 ) x.iMaxDist = 0; +  } +  spellfix1ResetCursor(pCur); +  spellfix1ResizeCursor(pCur, iLimit); +  zMatchThis = sqlite3_value_text(argv[0]); +  if( zMatchThis==0 ) return SQLITE_OK; +  if( p->pConfig3 ){ +    x.pLang = editDist3FindLang(p->pConfig3, iLang); +    pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1); +    if( pMatchStr3==0 ){ +      x.rc = SQLITE_NOMEM; +      goto filter_exit; +    } +  }else{ +    x.pLang = 0; +  } +  zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0])); +  sqlite3_free(pCur->zPattern); +  pCur->zPattern = zPattern; +  if( zPattern==0 ){ +    x.rc = SQLITE_NOMEM; +    goto filter_exit; +  } +  nPattern = (int)strlen(zPattern); +  if( zPattern[nPattern-1]=='*' ) nPattern--; +  zSql = sqlite3_mprintf( +     "SELECT id, word, rank, k1" +     "  FROM \"%w\".\"%w_vocab\"" +     " WHERE langid=%d AND k2>=?1 AND k2<?2", +     p->zDbName, p->zTableName, iLang +  ); +  if( zSql==0 ){ +    x.rc = SQLITE_NOMEM; +    pStmt = 0; +    goto filter_exit; +  } +  rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); +  sqlite3_free(zSql); +  pCur->iLang = iLang; +  x.pCur = pCur; +  x.pStmt = pStmt; +  x.zPattern = zPattern; +  x.nPattern = nPattern; +  x.pMatchStr3 = pMatchStr3; +  x.iLang = iLang; +  x.rc = rc; +  x.pConfig3 = p->pConfig3; +  if( x.rc==SQLITE_OK ){ +    spellfix1RunQuery(&x, zPattern, nPattern); +  } + +  if( pCur->a ){ +    qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare); +    pCur->iTop = iLimit; +    pCur->iScope = iScope; +  }else{ +    x.rc = SQLITE_NOMEM; +  } + +filter_exit: +  sqlite3_finalize(pStmt); +  editDist3FromStringDelete(pMatchStr3); +  return x.rc; +} + +/* +** This version of xFilter handles a full-table scan case +*/ +static int spellfix1FilterForFullScan( +  spellfix1_cursor *pCur, +  int idxNum, +  int argc, +  sqlite3_value **argv +){ +  int rc; +  char *zSql; +  spellfix1_vtab *pVTab = pCur->pVTab; +  spellfix1ResetCursor(pCur); +  zSql = sqlite3_mprintf( +     "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"", +     pVTab->zDbName, pVTab->zTableName); +  if( zSql==0 ) return SQLITE_NOMEM; +  rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0); +  sqlite3_free(zSql); +  pCur->nRow = pCur->iRow = 0; +  if( rc==SQLITE_OK ){ +    rc = sqlite3_step(pCur->pFullScan); +    if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; } +    if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } +  }else{ +    pCur->iRow = 0; +  } +  return rc; +} + + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again.  Always called at least once +** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call. +*/ +static int spellfix1Filter( +  sqlite3_vtab_cursor *cur,  +  int idxNum, const char *idxStr, +  int argc, sqlite3_value **argv +){ +  spellfix1_cursor *pCur = (spellfix1_cursor *)cur; +  int rc; +  if( idxNum & 1 ){ +    rc = spellfix1FilterForMatch(pCur, idxNum, argc, argv); +  }else{ +    rc = spellfix1FilterForFullScan(pCur, idxNum, argc, argv); +  } +  return rc; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int spellfix1Next(sqlite3_vtab_cursor *cur){ +  spellfix1_cursor *pCur = (spellfix1_cursor *)cur; +  int rc = SQLITE_OK; +  if( pCur->iRow < pCur->nRow ){ +    if( pCur->pFullScan ){ +      rc = sqlite3_step(pCur->pFullScan); +      if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow; +      if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK; +    }else{ +      pCur->iRow++; +    } +  } +  return rc; +} + +/* +** Return TRUE if we are at the end-of-file +*/ +static int spellfix1Eof(sqlite3_vtab_cursor *cur){ +  spellfix1_cursor *pCur = (spellfix1_cursor *)cur; +  return pCur->iRow>=pCur->nRow; +} + +/* +** Return columns from the current row. +*/ +static int spellfix1Column( +  sqlite3_vtab_cursor *cur, +  sqlite3_context *ctx, +  int i +){ +  spellfix1_cursor *pCur = (spellfix1_cursor*)cur; +  if( pCur->pFullScan ){ +    if( i<=SPELLFIX_COL_LANGID ){ +      sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i)); +    }else{ +      sqlite3_result_null(ctx); +    } +    return SQLITE_OK; +  } +  switch( i ){ +    case SPELLFIX_COL_WORD: { +      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC); +      break; +    } +    case SPELLFIX_COL_RANK: { +      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank); +      break; +    } +    case SPELLFIX_COL_DISTANCE: { +      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance); +      break; +    } +    case SPELLFIX_COL_LANGID: { +      sqlite3_result_int(ctx, pCur->iLang); +      break; +    } +    case SPELLFIX_COL_SCORE: { +      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); +      break; +    } +    case SPELLFIX_COL_MATCHLEN: { +      int iMatchlen = pCur->a[pCur->iRow].iMatchlen; +      if( iMatchlen<0 ){ +        int nPattern = (int)strlen(pCur->zPattern); +        char *zWord = pCur->a[pCur->iRow].zWord; +        int nWord = (int)strlen(zWord); + +        if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){ +          char *zTranslit; +          int res; +          zTranslit = (char *)transliterate((unsigned char *)zWord, nWord); +          if( !zTranslit ) return SQLITE_NOMEM; +          res = editdist1(pCur->zPattern, zTranslit, &iMatchlen); +          sqlite3_free(zTranslit); +          if( res<0 ) return SQLITE_NOMEM; +          iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen); +        }else{ +          iMatchlen = utf8Charlen(zWord, nWord); +        } +      } + +      sqlite3_result_int(ctx, iMatchlen); +      break; +    } +    case SPELLFIX_COL_PHONEHASH: { +      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC); +      break; +    } +    case SPELLFIX_COL_TOP: { +      sqlite3_result_int(ctx, pCur->iTop); +      break; +    } +    case SPELLFIX_COL_SCOPE: { +      sqlite3_result_int(ctx, pCur->iScope); +      break; +    } +    case SPELLFIX_COL_SRCHCNT: { +      sqlite3_result_int(ctx, pCur->nSearch); +      break; +    } +    default: { +      sqlite3_result_null(ctx); +      break; +    } +  } +  return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ +  spellfix1_cursor *pCur = (spellfix1_cursor*)cur; +  if( pCur->pFullScan ){ +    *pRowid = sqlite3_column_int64(pCur->pFullScan, 4); +  }else{ +    *pRowid = pCur->a[pCur->iRow].iRowid; +  } +  return SQLITE_OK; +} + +/* +** The xUpdate() method. +*/ +static int spellfix1Update( +  sqlite3_vtab *pVTab, +  int argc, +  sqlite3_value **argv, +  sqlite_int64 *pRowid +){ +  int rc = SQLITE_OK; +  sqlite3_int64 rowid, newRowid; +  spellfix1_vtab *p = (spellfix1_vtab*)pVTab; +  sqlite3 *db = p->db; + +  if( argc==1 ){ +    /* A delete operation on the rowid given by argv[0] */ +    rowid = *pRowid = sqlite3_value_int64(argv[0]); +    spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" " +                           " WHERE id=%lld", +                  p->zDbName, p->zTableName, rowid); +  }else{ +    const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]); +    int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]); +    int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]); +    int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]); +    const unsigned char *zSoundslike = +           sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]); +    int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]); +    char *zK1, *zK2; +    int i; +    char c; + +    if( zWord==0 ){ +      /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy'); +      ** cause zWord to be NULL, so we look at the "command" column to see +      ** what special actions to take */ +      const char *zCmd =  +         (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]); +      if( zCmd==0 ){ +        pVTab->zErrMsg = sqlite3_mprintf("%s.word may not be NULL", +                                         p->zTableName); +        return SQLITE_CONSTRAINT_NOTNULL; +      } +      if( strcmp(zCmd,"reset")==0 ){ +        /* Reset the  edit cost table (if there is one). */ +        editDist3ConfigDelete(p->pConfig3); +        p->pConfig3 = 0; +        return SQLITE_OK; +      } +      if( strncmp(zCmd,"edit_cost_table=",16)==0 ){ +        editDist3ConfigDelete(p->pConfig3); +        p->pConfig3 = 0; +        sqlite3_free(p->zCostTable); +        p->zCostTable = spellfix1Dequote(zCmd+16); +        if( p->zCostTable==0 ) return SQLITE_NOMEM; +        if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){ +          sqlite3_free(p->zCostTable); +          p->zCostTable = 0; +        } +        return SQLITE_OK; +      } +      pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"", +                                       p->zTableName, zCmd); +      return SQLITE_ERROR; +    } +    if( iRank<1 ) iRank = 1; +    if( zSoundslike ){ +      zK1 = (char*)transliterate(zSoundslike, nSoundslike); +    }else{ +      zK1 = (char*)transliterate(zWord, nWord); +    } +    if( zK1==0 ) return SQLITE_NOMEM; +    for(i=0; (c = zK1[i])!=0; i++){ +       if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A'; +    } +    zK2 = (char*)phoneticHash((const unsigned char*)zK1, i); +    if( zK2==0 ){ +      sqlite3_free(zK1); +      return SQLITE_NOMEM; +    } +    if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ +      spellfix1DbExec(&rc, db, +             "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) " +             "VALUES(%d,%d,%Q,%Q,%Q)", +             p->zDbName, p->zTableName, +             iRank, iLang, zWord, zK1, zK2 +      ); +      *pRowid = sqlite3_last_insert_rowid(db); +    }else{ +      rowid = sqlite3_value_int64(argv[0]); +      newRowid = *pRowid = sqlite3_value_int64(argv[1]); +      spellfix1DbExec(&rc, db, +             "UPDATE \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d," +             " word=%Q, k1=%Q, k2=%Q WHERE id=%lld", +             p->zDbName, p->zTableName, newRowid, iRank, iLang, +             zWord, zK1, zK2, rowid +      ); +    } +    sqlite3_free(zK1); +    sqlite3_free(zK2); +  } +  return rc; +} + +/* +** Rename the spellfix1 table. +*/ +static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){ +  spellfix1_vtab *p = (spellfix1_vtab*)pVTab; +  sqlite3 *db = p->db; +  int rc = SQLITE_OK; +  char *zNewName = sqlite3_mprintf("%s", zNew); +  if( zNewName==0 ){ +    return SQLITE_NOMEM; +  } +  spellfix1DbExec(&rc, db,  +     "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"", +     p->zDbName, p->zTableName, zNewName +  ); +  if( rc==SQLITE_OK ){ +    sqlite3_free(p->zTableName); +    p->zTableName = zNewName; +  }else{ +    sqlite3_free(zNewName); +  } +  return rc; +} + + +/* +** A virtual table module that provides fuzzy search. +*/ +static sqlite3_module spellfix1Module = { +  0,                       /* iVersion */ +  spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */ +  spellfix1Connect,        /* xConnect - reconnected to an existing table */ +  spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */ +  spellfix1Disconnect,     /* xDisconnect - close a connection */ +  spellfix1Destroy,        /* xDestroy - handle DROP TABLE */ +  spellfix1Open,           /* xOpen - open a cursor */ +  spellfix1Close,          /* xClose - close a cursor */ +  spellfix1Filter,         /* xFilter - configure scan constraints */ +  spellfix1Next,           /* xNext - advance a cursor */ +  spellfix1Eof,            /* xEof - check for end of scan */ +  spellfix1Column,         /* xColumn - read data */ +  spellfix1Rowid,          /* xRowid - read data */ +  spellfix1Update,         /* xUpdate */ +  0,                       /* xBegin */ +  0,                       /* xSync */ +  0,                       /* xCommit */ +  0,                       /* xRollback */ +  0,                       /* xFindMethod */ +  spellfix1Rename,         /* xRename */ +}; + +/* +** Register the various functions and the virtual table. +*/ +static int spellfix1Register(sqlite3 *db){ +  int rc = SQLITE_OK; +  int i; +  rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0, +                                  transliterateSqlFunc, 0, 0); +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0, +                                  editdistSqlFunc, 0, 0); +  } +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0, +                                  phoneticHashSqlFunc, 0, 0); +  } +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0, +                                  scriptCodeSqlFunc, 0, 0); +  } +  if( rc==SQLITE_OK ){ +    rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0); +  } +  if( rc==SQLITE_OK ){ +    rc = editDist3Install(db); +  } + +  /* Verify sanity of the translit[] table */ +  for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){ +    assert( translit[i].cFrom<translit[i+1].cFrom ); +  } + +  return rc; +} + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Extension load function. +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_spellfix_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  SQLITE_EXTENSION_INIT2(pApi); +#ifndef SQLITE_OMIT_VIRTUALTABLE +  return spellfix1Register(db); +#endif +  return SQLITE_OK; +} diff --git a/ext/misc/wholenumber.c b/ext/misc/wholenumber.c new file mode 100644 index 0000000..63369c6 --- /dev/null +++ b/ext/misc/wholenumber.c @@ -0,0 +1,274 @@ +/* +** 2011 April 02 +** +** The author disclaims copyright to this source code.  In place of +** a legal notice, here is a blessing: +** +**    May you do good and not evil. +**    May you find forgiveness for yourself and forgive others. +**    May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file implements a virtual table that returns the whole numbers +** between 1 and 4294967295, inclusive. +** +** Example: +** +**     CREATE VIRTUAL TABLE nums USING wholenumber; +**     SELECT value FROM nums WHERE value<10; +** +** Results in: +** +**     1 2 3 4 5 6 7 8 9 +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <assert.h> +#include <string.h> + +#ifndef SQLITE_OMIT_VIRTUALTABLE + + +/* A wholenumber cursor object */ +typedef struct wholenumber_cursor wholenumber_cursor; +struct wholenumber_cursor { +  sqlite3_vtab_cursor base;  /* Base class - must be first */ +  sqlite3_int64 iValue;      /* Current value */ +  sqlite3_int64 mxValue;     /* Maximum value */ +}; + +/* Methods for the wholenumber module */ +static int wholenumberConnect( +  sqlite3 *db, +  void *pAux, +  int argc, const char *const*argv, +  sqlite3_vtab **ppVtab, +  char **pzErr +){ +  sqlite3_vtab *pNew; +  pNew = *ppVtab = sqlite3_malloc( sizeof(*pNew) ); +  if( pNew==0 ) return SQLITE_NOMEM; +  sqlite3_declare_vtab(db, "CREATE TABLE x(value)"); +  memset(pNew, 0, sizeof(*pNew)); +  return SQLITE_OK; +} +/* Note that for this virtual table, the xCreate and xConnect +** methods are identical. */ + +static int wholenumberDisconnect(sqlite3_vtab *pVtab){ +  sqlite3_free(pVtab); +  return SQLITE_OK; +} +/* The xDisconnect and xDestroy methods are also the same */ + + +/* +** Open a new wholenumber cursor. +*/ +static int wholenumberOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ +  wholenumber_cursor *pCur; +  pCur = sqlite3_malloc( sizeof(*pCur) ); +  if( pCur==0 ) return SQLITE_NOMEM; +  memset(pCur, 0, sizeof(*pCur)); +  *ppCursor = &pCur->base; +  return SQLITE_OK; +} + +/* +** Close a wholenumber cursor. +*/ +static int wholenumberClose(sqlite3_vtab_cursor *cur){ +  sqlite3_free(cur); +  return SQLITE_OK; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int wholenumberNext(sqlite3_vtab_cursor *cur){ +  wholenumber_cursor *pCur = (wholenumber_cursor*)cur; +  pCur->iValue++; +  return SQLITE_OK; +} + +/* +** Return the value associated with a wholenumber. +*/ +static int wholenumberColumn( +  sqlite3_vtab_cursor *cur, +  sqlite3_context *ctx, +  int i +){ +  wholenumber_cursor *pCur = (wholenumber_cursor*)cur; +  sqlite3_result_int64(ctx, pCur->iValue); +  return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int wholenumberRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ +  wholenumber_cursor *pCur = (wholenumber_cursor*)cur; +  *pRowid = pCur->iValue; +  return SQLITE_OK; +} + +/* +** When the wholenumber_cursor.rLimit value is 0 or less, that is a signal +** that the cursor has nothing more to output. +*/ +static int wholenumberEof(sqlite3_vtab_cursor *cur){ +  wholenumber_cursor *pCur = (wholenumber_cursor*)cur; +  return pCur->iValue>pCur->mxValue || pCur->iValue==0; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again.  Always called at least once +** prior to any wholenumberColumn, wholenumberRowid, or wholenumberEof call. +** +**    idxNum   Constraints +**    ------   --------------------- +**      0      (none) +**      1      value > $argv0 +**      2      value >= $argv0 +**      4      value < $argv0 +**      8      value <= $argv0 +** +**      5      value > $argv0 AND value < $argv1 +**      6      value >= $argv0 AND value < $argv1 +**      9      value > $argv0 AND value <= $argv1 +**     10      value >= $argv0 AND value <= $argv1 +*/ +static int wholenumberFilter( +  sqlite3_vtab_cursor *pVtabCursor,  +  int idxNum, const char *idxStr, +  int argc, sqlite3_value **argv +){ +  wholenumber_cursor *pCur = (wholenumber_cursor *)pVtabCursor; +  sqlite3_int64 v; +  int i = 0; +  pCur->iValue = 1; +  pCur->mxValue = 0xffffffff;  /* 4294967295 */ +  if( idxNum & 3 ){ +    v = sqlite3_value_int64(argv[0]) + (idxNum&1); +    if( v>pCur->iValue && v<=pCur->mxValue ) pCur->iValue = v; +    i++; +  } +  if( idxNum & 12 ){ +    v = sqlite3_value_int64(argv[i]) - ((idxNum>>2)&1); +    if( v>=pCur->iValue && v<pCur->mxValue ) pCur->mxValue = v; +  } +  return SQLITE_OK; +} + +/* +** Search for terms of these forms: +** +**  (1)  value > $value +**  (2)  value >= $value +**  (4)  value < $value +**  (8)  value <= $value +** +** idxNum is an ORed combination of 1 or 2 with 4 or 8. +*/ +static int wholenumberBestIndex( +  sqlite3_vtab *tab, +  sqlite3_index_info *pIdxInfo +){ +  int i; +  int idxNum = 0; +  int argvIdx = 1; +  int ltIdx = -1; +  int gtIdx = -1; +  const struct sqlite3_index_constraint *pConstraint; +  pConstraint = pIdxInfo->aConstraint; +  for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ +    if( pConstraint->usable==0 ) continue; +    if( (idxNum & 3)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_GT ){ +      idxNum |= 1; +      ltIdx = i; +    } +    if( (idxNum & 3)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_GE ){ +      idxNum |= 2; +      ltIdx = i; +    } +    if( (idxNum & 12)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){ +      idxNum |= 4; +      gtIdx = i; +    } +    if( (idxNum & 12)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE ){ +      idxNum |= 8; +      gtIdx = i; +    } +  } +  pIdxInfo->idxNum = idxNum; +  if( ltIdx>=0 ){ +    pIdxInfo->aConstraintUsage[ltIdx].argvIndex = argvIdx++; +    pIdxInfo->aConstraintUsage[ltIdx].omit = 1; +  } +  if( gtIdx>=0 ){ +    pIdxInfo->aConstraintUsage[gtIdx].argvIndex = argvIdx; +    pIdxInfo->aConstraintUsage[gtIdx].omit = 1; +  } +  if( pIdxInfo->nOrderBy==1 +   && pIdxInfo->aOrderBy[0].desc==0 +  ){ +    pIdxInfo->orderByConsumed = 1; +  } +  if( (idxNum & 12)==0 ){ +    pIdxInfo->estimatedCost = (double)100000000; +  }else if( (idxNum & 3)==0 ){ +    pIdxInfo->estimatedCost = (double)5; +  }else{ +    pIdxInfo->estimatedCost = (double)1; +  } +  return SQLITE_OK; +} + +/* +** A virtual table module that provides read-only access to a +** Tcl global variable namespace. +*/ +static sqlite3_module wholenumberModule = { +  0,                         /* iVersion */ +  wholenumberConnect, +  wholenumberConnect, +  wholenumberBestIndex, +  wholenumberDisconnect,  +  wholenumberDisconnect, +  wholenumberOpen,           /* xOpen - open a cursor */ +  wholenumberClose,          /* xClose - close a cursor */ +  wholenumberFilter,         /* xFilter - configure scan constraints */ +  wholenumberNext,           /* xNext - advance a cursor */ +  wholenumberEof,            /* xEof - check for end of scan */ +  wholenumberColumn,         /* xColumn - read data */ +  wholenumberRowid,          /* xRowid - read data */ +  0,                         /* xUpdate */ +  0,                         /* xBegin */ +  0,                         /* xSync */ +  0,                         /* xCommit */ +  0,                         /* xRollback */ +  0,                         /* xFindMethod */ +  0,                         /* xRename */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_wholenumber_init( +  sqlite3 *db,  +  char **pzErrMsg,  +  const sqlite3_api_routines *pApi +){ +  int rc = SQLITE_OK; +  SQLITE_EXTENSION_INIT2(pApi); +#ifndef SQLITE_OMIT_VIRTUALTABLE +  rc = sqlite3_create_module(db, "wholenumber", &wholenumberModule, 0); +#endif +  return rc; +} diff --git a/ext/rtree/rtree.c b/ext/rtree/rtree.c index 66da481..16a316f 100644 --- a/ext/rtree/rtree.c +++ b/ext/rtree/rtree.c @@ -2660,12 +2660,12 @@ static int newRowid(Rtree *pRtree, i64 *piRowid){  */  static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){    int rc;                         /* Return code */ -  RtreeNode *pLeaf;               /* Leaf node containing record iDelete */ +  RtreeNode *pLeaf = 0;           /* Leaf node containing record iDelete */    int iCell;                      /* Index of iDelete cell in pLeaf */    RtreeNode *pRoot;               /* Root node of rtree structure */ -  /* Obtain a reference to the root node to initialise Rtree.iDepth */ +  /* Obtain a reference to the root node to initialize Rtree.iDepth */    rc = nodeAcquire(pRtree, 1, 0, &pRoot);    /* Obtain a reference to the leaf node that contains the entry  @@ -2863,7 +2863,7 @@ static int rtreeUpdate(    */    if( rc==SQLITE_OK && nData>1 ){      /* Insert the new record into the r-tree */ -    RtreeNode *pLeaf; +    RtreeNode *pLeaf = 0;      /* Figure out the rowid of the new row. */      if( bHaveRowid==0 ){ @@ -3049,7 +3049,8 @@ static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){  static int getNodeSize(    sqlite3 *db,                    /* Database handle */    Rtree *pRtree,                  /* Rtree handle */ -  int isCreate                    /* True for xCreate, false for xConnect */ +  int isCreate,                   /* True for xCreate, false for xConnect */ +  char **pzErr                    /* OUT: Error message, if any */  ){    int rc;    char *zSql; @@ -3062,6 +3063,8 @@ static int getNodeSize(        if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){          pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS;        } +    }else{ +      *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db));      }    }else{      zSql = sqlite3_mprintf( @@ -3069,6 +3072,9 @@ static int getNodeSize(          pRtree->zDb, pRtree->zName      );      rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); +    if( rc!=SQLITE_OK ){ +      *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); +    }    }    sqlite3_free(zSql); @@ -3132,7 +3138,7 @@ static int rtreeInit(    memcpy(pRtree->zName, argv[2], nName);    /* Figure out the node size to use. */ -  rc = getNodeSize(db, pRtree, isCreate); +  rc = getNodeSize(db, pRtree, isCreate, pzErr);    /* Create/Connect to the underlying relational database schema. If    ** that is successful, call sqlite3_declare_vtab() to configure diff --git a/ext/rtree/rtree1.test b/ext/rtree/rtree1.test index e3c7d68..275b132 100644 --- a/ext/rtree/rtree1.test +++ b/ext/rtree/rtree1.test @@ -17,6 +17,7 @@ if {![info exists testdir]} {  }  source [file join [file dirname [info script]] rtree_util.tcl]  source $testdir/tester.tcl +set testprefix rtree1  # Test plan:  # diff --git a/ext/rtree/rtree5.test b/ext/rtree/rtree5.test index 8990772..8ff90b0 100644 --- a/ext/rtree/rtree5.test +++ b/ext/rtree/rtree5.test @@ -61,7 +61,7 @@ do_test rtree5-1.9 {  do_test rtree5-1.10 {     execsql { SELECT (1<<31)-5, (1<<31)-1, -1*(1<<31), -1*(1<<31)+5 }  } {2147483643 2147483647 -2147483648 -2147483643} -do_test rtree5-1.10 {  +do_test rtree5-1.11 {     execsql {       INSERT INTO t1 VALUES(2, (1<<31)-5, (1<<31)-1, -1*(1<<31), -1*(1<<31)+5)     } | 
