diff options
author | Hans-Christoph Steiner <hans@eds.org> | 2012-09-20 18:34:38 -0400 |
---|---|---|
committer | Hans-Christoph Steiner <hans@eds.org> | 2012-09-20 18:34:38 -0400 |
commit | 487e15dc239ccdb3344d1c99ce120e872bab4a74 (patch) | |
tree | c986d492f6092ca7b4401d91515f74daed17fae2 /src/test_fuzzer.c | |
parent | 7bb481fda9ecb134804b49c2ce77ca28f7eea583 (diff) |
Imported Upstream version 2.0.6
Diffstat (limited to 'src/test_fuzzer.c')
-rw-r--r-- | src/test_fuzzer.c | 663 |
1 files changed, 467 insertions, 196 deletions
diff --git a/src/test_fuzzer.c b/src/test_fuzzer.c index cf59257..10496f2 100644 --- a/src/test_fuzzer.c +++ b/src/test_fuzzer.c @@ -10,43 +10,58 @@ ** ************************************************************************* ** -** Code for demonstartion virtual table that generates variations +** Code for a demonstration virtual table that generates variations ** on an input word at increasing edit distances from the original. ** ** A fuzzer virtual table is created like this: ** -** CREATE VIRTUAL TABLE temp.f USING fuzzer; +** CREATE VIRTUAL TABLE f USING fuzzer(<fuzzer-data-table>); ** -** The name of the new virtual table in the example above is "f". -** Note that all fuzzer virtual tables must be TEMP tables. The -** "temp." prefix in front of the table name is required when the -** table is being created. The "temp." prefix can be omitted when -** using the table as long as the name is unambiguous. +** When it is created, the new fuzzer table must be supplied with the +** name of a "fuzzer data table", which must reside in the same database +** file as the new fuzzer table. The fuzzer data table contains the various +** transformations and their costs that the fuzzer logic uses to generate +** variations. ** -** Before being used, the fuzzer needs to be programmed by giving it -** character transformations and a cost associated with each transformation. -** Examples: -** -** INSERT INTO f(cFrom,cTo,Cost) VALUES('','a',100); -** -** The above statement says that the cost of inserting a letter 'a' is -** 100. (All costs are integers. We recommend that costs be scaled so -** that the average cost is around 100.) -** -** INSERT INTO f(cFrom,cTo,Cost) VALUES('b','',87); +** The fuzzer data table must contain exactly four columns (more precisely, +** the statement "SELECT * FROM <fuzzer_data_table>" must return records +** that consist of four columns). It does not matter what the columns are +** named. ** -** The above statement says that the cost of deleting a single letter -** 'b' is 87. +** Each row in the fuzzer data table represents a single character +** transformation. The left most column of the row (column 0) contains an +** integer value - the identifier of the ruleset to which the transformation +** rule belongs (see "MULTIPLE RULE SETS" below). The second column of the +** row (column 0) contains the input character or characters. The third +** column contains the output character or characters. And the fourth column +** contains the integer cost of making the transformation. For example: ** -** INSERT INTO f(cFrom,cTo,Cost) VALUES('o','oe',38); -** INSERT INTO f(cFrom,cTo,Cost) VALUES('oe','o',40); +** CREATE TABLE f_data(ruleset, cFrom, cTo, Cost); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, '', 'a', 100); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'b', '', 87); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40); ** -** This third example says that the cost of transforming the single -** letter "o" into the two-letter sequence "oe" is 38 and that the +** The first row inserted into the fuzzer data table by the SQL script +** above indicates that the cost of inserting a letter 'a' is 100. (All +** costs are integers. We recommend that costs be scaled so that the +** average cost is around 100.) The second INSERT statement creates a rule +** saying that the cost of deleting a single letter 'b' is 87. The third +** and fourth INSERT statements mean that the cost of transforming a +** single letter "o" into the two-letter sequence "oe" is 38 and that the ** cost of transforming "oe" back into "o" is 40. ** -** After all the transformation costs have been set, the fuzzer table -** can be queried as follows: +** The contents of the fuzzer data table are loaded into main memory when +** a fuzzer table is first created, and may be internally reloaded by the +** system at any subsequent time. Therefore, the fuzzer data table should be +** populated before the fuzzer table is created and not modified thereafter. +** If you do need to modify the contents of the fuzzer data table, it is +** recommended that the associated fuzzer table be dropped, the fuzzer data +** table edited, and the fuzzer table recreated within a single transaction. +** Alternatively, the fuzzer data table can be edited then the database +** connection can be closed and reopened. +** +** Once it has been created, the fuzzer table can be queried as follows: ** ** SELECT word, distance FROM f ** WHERE word MATCH 'abcdefg' @@ -61,6 +76,9 @@ ** the one that is returned. In the example, the search is limited to ** strings with a total distance of less than 200. ** +** The fuzzer is a read-only table. Any attempt to DELETE, INSERT, or +** UPDATE on a fuzzer table will throw an error. +** ** It is important to put some kind of a limit on the fuzzer output. This ** can be either in the form of a LIMIT clause at the end of the query, ** or better, a "distance<NNN" constraint where NNN is some number. The @@ -93,7 +111,42 @@ ** ** This last query will show up to 50 words out of the vocabulary that ** match or nearly match the $prefix. +** +** MULTIPLE RULE SETS +** +** Normally, the "ruleset" value associated with all character transformations +** in the fuzzer data table is zero. However, if required, the fuzzer table +** allows multiple rulesets to be defined. Each query uses only a single +** ruleset. This allows, for example, a single fuzzer table to support +** multiple languages. +** +** By default, only the rules from ruleset 0 are used. To specify an +** alternative ruleset, a "ruleset = ?" expression must be added to the +** WHERE clause of a SELECT, where ? is the identifier of the desired +** ruleset. For example: +** +** SELECT vocabulary.w FROM f, vocabulary +** WHERE f.word MATCH $word +** AND f.distance<=200 +** AND f.word=vocabulary.w +** AND f.ruleset=1 -- Specify the ruleset to use here +** LIMIT 20 +** +** If no "ruleset = ?" constraint is specified in the WHERE clause, ruleset +** 0 is used. +** +** LIMITS +** +** The maximum ruleset number is 2147483647. The maximum length of either +** of the strings in the second or third column of the fuzzer data table +** is 50 bytes. The maximum cost on a rule is 1000. */ + +/* If SQLITE_DEBUG is not defined, disable assert statements. */ +#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) +# define NDEBUG +#endif + #include "sqlite3.h" #include <stdlib.h> #include <string.h> @@ -112,10 +165,25 @@ typedef struct fuzzer_seen fuzzer_seen; typedef struct fuzzer_stem fuzzer_stem; /* -** Type of the "cost" of an edit operation. Might be changed to -** "float" or "double" or "sqlite3_int64" in the future. +** Various types. +** +** fuzzer_cost is the "cost" of an edit operation. +** +** fuzzer_len is the length of a matching string. +** +** fuzzer_ruleid is an ruleset identifier. */ typedef int fuzzer_cost; +typedef signed char fuzzer_len; +typedef int fuzzer_ruleid; + +/* +** Limits +*/ +#define FUZZER_MX_LENGTH 50 /* Maximum length of a rule string */ +#define FUZZER_MX_RULEID 2147483647 /* Maximum rule ID */ +#define FUZZER_MX_COST 1000 /* Maximum single-rule cost */ +#define FUZZER_MX_OUTPUT_LENGTH 100 /* Maximum length of an output string */ /* @@ -123,11 +191,12 @@ typedef int fuzzer_cost; ** All rules are kept on a linked list sorted by rCost. */ struct fuzzer_rule { - fuzzer_rule *pNext; /* Next rule in order of increasing rCost */ - fuzzer_cost rCost; /* Cost of this transformation */ - int nFrom, nTo; /* Length of the zFrom and zTo strings */ - char *zFrom; /* Transform from */ - char zTo[4]; /* Transform to (extra space appended) */ + fuzzer_rule *pNext; /* Next rule in order of increasing rCost */ + char *zFrom; /* Transform from */ + fuzzer_cost rCost; /* Cost of this transformation */ + fuzzer_len nFrom, nTo; /* Length of the zFrom and zTo strings */ + fuzzer_ruleid iRuleset; /* The rule set to which this rule belongs */ + char zTo[4]; /* Transform to (extra space appended) */ }; /* @@ -143,13 +212,13 @@ struct fuzzer_rule { */ struct fuzzer_stem { char *zBasis; /* Word being fuzzed */ - int nBasis; /* Length of the zBasis string */ const fuzzer_rule *pRule; /* Current rule to apply */ - int n; /* Apply pRule at this character offset */ - fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */ - fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */ fuzzer_stem *pNext; /* Next stem in rCost order */ fuzzer_stem *pHash; /* Next stem with same hash on zBasis */ + fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */ + fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */ + fuzzer_len nBasis; /* Length of the zBasis string */ + fuzzer_len n; /* Apply pRule at this character offset */ }; /* @@ -159,7 +228,6 @@ struct fuzzer_vtab { sqlite3_vtab base; /* Base class - must be first */ char *zClassName; /* Name of this class. Default: "fuzzer" */ fuzzer_rule *pRule; /* All active rules in this fuzzer */ - fuzzer_rule *pNewRule; /* New rules to add when last cursor expires */ int nCursor; /* Number of active cursors */ }; @@ -179,54 +247,11 @@ struct fuzzer_cursor { char *zBuf; /* Temporary use buffer */ int nBuf; /* Bytes allocated for zBuf */ int nStem; /* Number of stems allocated */ + int iRuleset; /* Only process rules from this ruleset */ fuzzer_rule nullRule; /* Null rule used first */ fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ }; -/* Methods for the fuzzer module */ -static int fuzzerConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - fuzzer_vtab *pNew; - int n; - if( strcmp(argv[1],"temp")!=0 ){ - *pzErr = sqlite3_mprintf("%s virtual tables must be TEMP", argv[0]); - return SQLITE_ERROR; - } - n = strlen(argv[0]) + 1; - pNew = sqlite3_malloc( sizeof(*pNew) + n ); - if( pNew==0 ) return SQLITE_NOMEM; - pNew->zClassName = (char*)&pNew[1]; - memcpy(pNew->zClassName, argv[0], n); - sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,cFrom,cTo,cost)"); - memset(pNew, 0, sizeof(*pNew)); - *ppVtab = &pNew->base; - return SQLITE_OK; -} -/* Note that for this virtual table, the xCreate and xConnect -** methods are identical. */ - -static int fuzzerDisconnect(sqlite3_vtab *pVtab){ - fuzzer_vtab *p = (fuzzer_vtab*)pVtab; - assert( p->nCursor==0 ); - do{ - while( p->pRule ){ - fuzzer_rule *pRule = p->pRule; - p->pRule = pRule->pNext; - sqlite3_free(pRule); - } - p->pRule = p->pNewRule; - p->pNewRule = 0; - }while( p->pRule ); - sqlite3_free(p); - return SQLITE_OK; -} -/* The xDisconnect and xDestroy methods are also the same */ - /* ** The two input rule lists are both sorted in order of increasing ** cost. Merge them together into a single list, sorted by cost, and @@ -256,25 +281,134 @@ static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){ return head.pNext; } +/* +** Statement pStmt currently points to a row in the fuzzer data table. This +** function allocates and populates a fuzzer_rule structure according to +** the content of the row. +** +** If successful, *ppRule is set to point to the new object and SQLITE_OK +** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point +** to an error message and an SQLite error code returned. +*/ +static int fuzzerLoadOneRule( + fuzzer_vtab *p, /* Fuzzer virtual table handle */ + sqlite3_stmt *pStmt, /* Base rule on statements current row */ + fuzzer_rule **ppRule, /* OUT: New rule object */ + char **pzErr /* OUT: Error message */ +){ + sqlite3_int64 iRuleset = sqlite3_column_int64(pStmt, 0); + const char *zFrom = (const char *)sqlite3_column_text(pStmt, 1); + const char *zTo = (const char *)sqlite3_column_text(pStmt, 2); + int nCost = sqlite3_column_int(pStmt, 3); + + int rc = SQLITE_OK; /* Return code */ + int nFrom; /* Size of string zFrom, in bytes */ + int nTo; /* Size of string zTo, in bytes */ + fuzzer_rule *pRule = 0; /* New rule object to return */ + + if( zFrom==0 ) zFrom = ""; + if( zTo==0 ) zTo = ""; + nFrom = (int)strlen(zFrom); + nTo = (int)strlen(zTo); + + /* Silently ignore null transformations */ + if( strcmp(zFrom, zTo)==0 ){ + *ppRule = 0; + return SQLITE_OK; + } + + if( nCost<=0 || nCost>FUZZER_MX_COST ){ + *pzErr = sqlite3_mprintf("%s: cost must be between 1 and %d", + p->zClassName, FUZZER_MX_COST + ); + rc = SQLITE_ERROR; + }else + if( nFrom>FUZZER_MX_LENGTH || nTo>FUZZER_MX_LENGTH ){ + *pzErr = sqlite3_mprintf("%s: maximum string length is %d", + p->zClassName, FUZZER_MX_LENGTH + ); + rc = SQLITE_ERROR; + }else + if( iRuleset<0 || iRuleset>FUZZER_MX_RULEID ){ + *pzErr = sqlite3_mprintf("%s: ruleset must be between 0 and %d", + p->zClassName, FUZZER_MX_RULEID + ); + rc = SQLITE_ERROR; + }else{ + + pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); + if( pRule==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pRule, 0, sizeof(*pRule)); + pRule->zFrom = &pRule->zTo[nTo+1]; + pRule->nFrom = nFrom; + memcpy(pRule->zFrom, zFrom, nFrom+1); + memcpy(pRule->zTo, zTo, nTo+1); + pRule->nTo = nTo; + pRule->rCost = nCost; + pRule->iRuleset = (int)iRuleset; + } + } + + *ppRule = pRule; + return rc; +} /* -** Open a new fuzzer cursor. +** Load the content of the fuzzer data table into memory. */ -static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fuzzer_vtab *p = (fuzzer_vtab*)pVTab; - fuzzer_cursor *pCur; - pCur = sqlite3_malloc( sizeof(*pCur) ); - if( pCur==0 ) return SQLITE_NOMEM; - memset(pCur, 0, sizeof(*pCur)); - pCur->pVtab = p; - *ppCursor = &pCur->base; - if( p->nCursor==0 && p->pNewRule ){ +static int fuzzerLoadRules( + sqlite3 *db, /* Database handle */ + fuzzer_vtab *p, /* Virtual fuzzer table to configure */ + const char *zDb, /* Database containing rules data */ + const char *zData, /* Table containing rules data */ + char **pzErr /* OUT: Error message */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* SELECT used to read from rules table */ + fuzzer_rule *pHead = 0; + + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zData); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + int rc2; /* finalize() return code */ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s: %s", p->zClassName, sqlite3_errmsg(db)); + }else if( sqlite3_column_count(pStmt)!=4 ){ + *pzErr = sqlite3_mprintf("%s: %s has %d columns, expected 4", + p->zClassName, zData, sqlite3_column_count(pStmt) + ); + rc = SQLITE_ERROR; + }else{ + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + fuzzer_rule *pRule = 0; + rc = fuzzerLoadOneRule(p, pStmt, &pRule, pzErr); + if( pRule ){ + pRule->pNext = pHead; + pHead = pRule; + } + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + } + sqlite3_free(zSql); + + /* All rules are now in a singly linked list starting at pHead. This + ** block sorts them by cost and then sets fuzzer_vtab.pRule to point to + ** point to the head of the sorted list. + */ + if( rc==SQLITE_OK ){ unsigned int i; fuzzer_rule *pX; fuzzer_rule *a[15]; for(i=0; i<sizeof(a)/sizeof(a[0]); i++) a[i] = 0; - while( (pX = p->pNewRule)!=0 ){ - p->pNewRule = pX->pNext; + while( (pX = pHead)!=0 ){ + pHead = pX->pNext; pX->pNext = 0; for(i=0; a[i] && i<sizeof(a)/sizeof(a[0])-1; i++){ pX = fuzzerMergeRules(a[i], pX); @@ -286,7 +420,143 @@ static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ pX = fuzzerMergeRules(a[i], pX); } p->pRule = fuzzerMergeRules(p->pRule, pX); + }else{ + /* An error has occurred. Setting p->pRule to point to the head of the + ** allocated list ensures that the list will be cleaned up in this case. + */ + assert( p->pRule==0 ); + p->pRule = pHead; } + + return rc; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc() +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +static char *fuzzerDequote(const char *zIn){ + int nIn; /* Size of input string, in bytes */ + char *zOut; /* Output (dequoted) string */ + + nIn = (int)strlen(zIn); + zOut = sqlite3_malloc(nIn+1); + if( zOut ){ + char q = zIn[0]; /* Quote character (if any ) */ + + if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ + memcpy(zOut, zIn, nIn+1); + }else{ + int iOut = 0; /* Index of next byte to write to output */ + int iIn; /* Index of next byte to read from input */ + + if( q=='[' ) q = ']'; + for(iIn=1; iIn<nIn; iIn++){ + if( zIn[iIn]==q ) iIn++; + zOut[iOut++] = zIn[iIn]; + } + } + assert( (int)strlen(zOut)<=nIn ); + } + return zOut; +} + +/* +** xDisconnect/xDestroy method for the fuzzer module. +*/ +static int fuzzerDisconnect(sqlite3_vtab *pVtab){ + fuzzer_vtab *p = (fuzzer_vtab*)pVtab; + assert( p->nCursor==0 ); + while( p->pRule ){ + fuzzer_rule *pRule = p->pRule; + p->pRule = pRule->pNext; + sqlite3_free(pRule); + } + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** xConnect/xCreate method for the fuzzer module. Arguments are: +** +** argv[0] -> module name ("fuzzer") +** argv[1] -> database name +** argv[2] -> table name +** argv[3] -> fuzzer rule table name +*/ +static int fuzzerConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + int rc = SQLITE_OK; /* Return code */ + fuzzer_vtab *pNew = 0; /* New virtual table */ + const char *zModule = argv[0]; + const char *zDb = argv[1]; + + if( argc!=4 ){ + *pzErr = sqlite3_mprintf( + "%s: wrong number of CREATE VIRTUAL TABLE arguments", zModule + ); + rc = SQLITE_ERROR; + }else{ + int nModule; /* Length of zModule, in bytes */ + + nModule = (int)strlen(zModule); + pNew = sqlite3_malloc( sizeof(*pNew) + nModule + 1); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *zTab; /* Dequoted name of fuzzer data table */ + + memset(pNew, 0, sizeof(*pNew)); + pNew->zClassName = (char*)&pNew[1]; + memcpy(pNew->zClassName, zModule, nModule+1); + + zTab = fuzzerDequote(argv[3]); + if( zTab==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = fuzzerLoadRules(db, pNew, zDb, zTab, pzErr); + sqlite3_free(zTab); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,ruleset)"); + } + if( rc!=SQLITE_OK ){ + fuzzerDisconnect((sqlite3_vtab *)pNew); + pNew = 0; + } + } + } + + *ppVtab = (sqlite3_vtab *)pNew; + return rc; +} + +/* +** Open a new fuzzer cursor. +*/ +static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + fuzzer_vtab *p = (fuzzer_vtab*)pVTab; + fuzzer_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + pCur->pVtab = p; + *ppCursor = &pCur->base; p->nCursor++; return SQLITE_OK; } @@ -343,8 +613,8 @@ static int fuzzerRender( int *pnBuf /* Size of the buffer */ ){ const fuzzer_rule *pRule = pStem->pRule; - int n; - char *z; + int n; /* Size of output term without nul-term */ + char *z; /* Buffer to assemble output term in */ n = pStem->nBasis + pRule->nTo - pRule->nFrom; if( (*pnBuf)<n+1 ){ @@ -362,6 +632,8 @@ static int fuzzerRender( memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom], pStem->nBasis-n-pRule->nFrom+1); } + + assert( z[pStem->nBasis + pRule->nTo - pRule->nFrom]==0 ); return SQLITE_OK; } @@ -424,13 +696,32 @@ static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){ } h = fuzzerHash(pCur->zBuf); pLookup = pCur->apHash[h]; - while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){ + while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){ pLookup = pLookup->pHash; } return pLookup!=0; } /* +** If argument pRule is NULL, this function returns false. +** +** Otherwise, it returns true if rule pRule should be skipped. A rule +** should be skipped if it does not belong to rule-set iRuleset, or if +** applying it to stem pStem would create a string longer than +** FUZZER_MX_OUTPUT_LENGTH bytes. +*/ +static int fuzzerSkipRule( + const fuzzer_rule *pRule, /* Determine whether or not to skip this */ + fuzzer_stem *pStem, /* Stem rule may be applied to */ + int iRuleset /* Rule-set used by the current query */ +){ + return pRule && ( + (pRule->iRuleset!=iRuleset) + || (pStem->nBasis + pRule->nTo - pRule->nFrom)>FUZZER_MX_OUTPUT_LENGTH + ); +} + +/* ** Advance a fuzzer_stem to its next value. Return 0 if there are ** no more values that can be generated by this fuzzer_stem. Return ** -1 on a memory allocation failure. @@ -438,6 +729,7 @@ static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ const fuzzer_rule *pRule; while( (pRule = pStem->pRule)!=0 ){ + assert( pRule==&pCur->nullRule || pRule->iRuleset==pCur->iRuleset ); while( pStem->n < pStem->nBasis - pRule->nFrom ){ pStem->n++; if( pRule->nFrom==0 @@ -453,8 +745,11 @@ static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ } } pStem->n = -1; - pStem->pRule = pRule->pNext; - if( pStem->pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0; + do{ + pRule = pRule->pNext; + }while( fuzzerSkipRule(pRule, pStem, pCur->iRuleset) ); + pStem->pRule = pRule; + if( pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0; } return 0; } @@ -572,15 +867,20 @@ static fuzzer_stem *fuzzerNewStem( fuzzer_cost rBaseCost ){ fuzzer_stem *pNew; + fuzzer_rule *pRule; unsigned int h; - pNew = sqlite3_malloc( sizeof(*pNew) + strlen(zWord) + 1 ); + pNew = sqlite3_malloc( sizeof(*pNew) + (int)strlen(zWord) + 1 ); if( pNew==0 ) return 0; memset(pNew, 0, sizeof(*pNew)); pNew->zBasis = (char*)&pNew[1]; - pNew->nBasis = strlen(zWord); + pNew->nBasis = (int)strlen(zWord); memcpy(pNew->zBasis, zWord, pNew->nBasis+1); - pNew->pRule = pCur->pVtab->pRule; + pRule = pCur->pVtab->pRule; + while( fuzzerSkipRule(pRule, pNew, pCur->iRuleset) ){ + pRule = pRule->pNext; + } + pNew->pRule = pRule; pNew->n = -1; pNew->rBaseCost = pNew->rCostX = rBaseCost; h = fuzzerHash(pNew->zBasis); @@ -627,7 +927,10 @@ static int fuzzerNext(sqlite3_vtab_cursor *cur){ ** stem list is the next lowest cost word. */ while( (pStem = pCur->pStem)!=0 ){ - if( fuzzerAdvance(pCur, pStem) ){ + int res = fuzzerAdvance(pCur, pStem); + if( res<0 ){ + return SQLITE_NOMEM; + }else if( res>0 ){ pCur->pStem = 0; pStem = fuzzerInsert(pCur, pStem); if( (rc = fuzzerSeen(pCur, pStem))!=0 ){ @@ -665,30 +968,44 @@ static int fuzzerFilter( int argc, sqlite3_value **argv ){ fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor; - const char *zWord = 0; + const char *zWord = ""; fuzzer_stem *pStem; + int idx; fuzzerClearCursor(pCur, 1); pCur->rLimit = 2147483647; - if( idxNum==1 ){ - zWord = (const char*)sqlite3_value_text(argv[0]); - }else if( idxNum==2 ){ - pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[0]); - }else if( idxNum==3 ){ + idx = 0; + if( idxNum & 1 ){ zWord = (const char*)sqlite3_value_text(argv[0]); - pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[1]); + idx++; + } + if( idxNum & 2 ){ + pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[idx]); + idx++; + } + if( idxNum & 4 ){ + pCur->iRuleset = (fuzzer_cost)sqlite3_value_int(argv[idx]); + idx++; } - if( zWord==0 ) zWord = ""; - pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0); - if( pStem==0 ) return SQLITE_NOMEM; pCur->nullRule.pNext = pCur->pVtab->pRule; pCur->nullRule.rCost = 0; pCur->nullRule.nFrom = 0; pCur->nullRule.nTo = 0; pCur->nullRule.zFrom = ""; - pStem->pRule = &pCur->nullRule; - pStem->n = pStem->nBasis; pCur->iRowid = 1; + assert( pCur->pStem==0 ); + + /* If the query term is longer than FUZZER_MX_OUTPUT_LENGTH bytes, this + ** query will return zero rows. */ + if( (int)strlen(zWord)<FUZZER_MX_OUTPUT_LENGTH ){ + pCur->pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0); + if( pStem==0 ) return SQLITE_NOMEM; + pStem->pRule = &pCur->nullRule; + pStem->n = pStem->nBasis; + }else{ + pCur->rLimit = 0; + } + return SQLITE_OK; } @@ -735,22 +1052,29 @@ static int fuzzerEof(sqlite3_vtab_cursor *cur){ /* ** Search for terms of these forms: ** -** word MATCH $str -** distance < $value -** distance <= $value +** (A) word MATCH $str +** (B1) distance < $value +** (B2) distance <= $value +** (C) ruleid == $ruleid ** ** The distance< and distance<= are both treated as distance<=. -** The query plan number is as follows: +** The query plan number is a bit vector: ** -** 0: None of the terms above are found -** 1: There is a "word MATCH" term with $str in filter.argv[0]. -** 2: There is a "distance<" term with $value in filter.argv[0]. -** 3: Both "word MATCH" and "distance<" with $str in argv[0] and -** $value in argv[1]. +** bit 1: Term of the form (A) found +** bit 2: Term like (B1) or (B2) found +** bit 3: Term like (C) found +** +** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set +** then $value is in filter.argv[0] if bit-1 is clear and is in +** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is +** in filter.argv[0] if bit-1 and bit-2 are both zero, is in +** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in +** filter.argv[2] if both bit-1 and bit-2 are set. */ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ int iPlan = 0; int iDistTerm = -1; + int iRulesetTerm = -1; int i; const struct sqlite3_index_constraint *pConstraint; pConstraint = pIdxInfo->aConstraint; @@ -772,11 +1096,23 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ iPlan |= 2; iDistTerm = i; } + if( (iPlan & 4)==0 + && pConstraint->iColumn==2 + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 4; + pIdxInfo->aConstraintUsage[i].omit = 1; + iRulesetTerm = i; + } + } + if( iPlan & 2 ){ + pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); } - if( iPlan==2 ){ - pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1; - }else if( iPlan==3 ){ - pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 2; + if( iPlan & 4 ){ + int idx = 1; + if( iPlan & 1 ) idx++; + if( iPlan & 2 ) idx++; + pIdxInfo->aConstraintUsage[iRulesetTerm].argvIndex = idx; } pIdxInfo->idxNum = iPlan; if( pIdxInfo->nOrderBy==1 @@ -791,72 +1127,7 @@ static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ } /* -** Disallow all attempts to DELETE or UPDATE. Only INSERTs are allowed. -** -** On an insert, the cFrom, cTo, and cost columns are used to construct -** a new rule. All other columns are ignored. The rule is ignored -** if cFrom and cTo are identical. A NULL value for cFrom or cTo is -** interpreted as an empty string. The cost must be positive. -*/ -static int fuzzerUpdate( - sqlite3_vtab *pVTab, - int argc, - sqlite3_value **argv, - sqlite_int64 *pRowid -){ - fuzzer_vtab *p = (fuzzer_vtab*)pVTab; - fuzzer_rule *pRule; - const char *zFrom; - int nFrom; - const char *zTo; - int nTo; - fuzzer_cost rCost; - if( argc!=7 ){ - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = sqlite3_mprintf("cannot delete from a %s virtual table", - p->zClassName); - return SQLITE_CONSTRAINT; - } - if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){ - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = sqlite3_mprintf("cannot update a %s virtual table", - p->zClassName); - return SQLITE_CONSTRAINT; - } - zFrom = (char*)sqlite3_value_text(argv[4]); - if( zFrom==0 ) zFrom = ""; - zTo = (char*)sqlite3_value_text(argv[5]); - if( zTo==0 ) zTo = ""; - if( strcmp(zFrom,zTo)==0 ){ - /* Silently ignore null transformations */ - return SQLITE_OK; - } - rCost = sqlite3_value_int(argv[6]); - if( rCost<=0 ){ - sqlite3_free(pVTab->zErrMsg); - pVTab->zErrMsg = sqlite3_mprintf("cost must be positive"); - return SQLITE_CONSTRAINT; - } - nFrom = strlen(zFrom); - nTo = strlen(zTo); - pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); - if( pRule==0 ){ - return SQLITE_NOMEM; - } - pRule->zFrom = &pRule->zTo[nTo+1]; - pRule->nFrom = nFrom; - memcpy(pRule->zFrom, zFrom, nFrom+1); - memcpy(pRule->zTo, zTo, nTo+1); - pRule->nTo = nTo; - pRule->rCost = rCost; - pRule->pNext = p->pNewRule; - p->pNewRule = pRule; - return SQLITE_OK; -} - -/* -** A virtual table module that provides read-only access to a -** Tcl global variable namespace. +** A virtual table module that implements the "fuzzer". */ static sqlite3_module fuzzerModule = { 0, /* iVersion */ @@ -872,7 +1143,7 @@ static sqlite3_module fuzzerModule = { fuzzerEof, /* xEof - check for end of scan */ fuzzerColumn, /* xColumn - read data */ fuzzerRowid, /* xRowid - read data */ - fuzzerUpdate, /* xUpdate - INSERT */ + 0, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ @@ -916,7 +1187,7 @@ static int register_fuzzer_module( Tcl_WrongNumArgs(interp, 1, objv, "DB"); return TCL_ERROR; } - if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + getDbPointer(interp, Tcl_GetString(objv[1]), &db); fuzzer_register(db); return TCL_OK; } |