From 08119c361d1181b3e8f1abb429236e488a664753 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Tue, 13 Aug 2013 15:42:54 -0400 Subject: Imported Upstream version 2.2.1 --- ext/misc/amatch.c | 1483 +++++++++++++++++++++++++ ext/misc/closure.c | 948 ++++++++++++++++ ext/misc/fuzzer.c | 1173 ++++++++++++++++++++ ext/misc/ieee754.c | 131 +++ ext/misc/nextchar.c | 265 +++++ ext/misc/regexp.c | 756 +++++++++++++ ext/misc/rot13.c | 114 ++ ext/misc/spellfix.c | 2844 ++++++++++++++++++++++++++++++++++++++++++++++++ ext/misc/wholenumber.c | 274 +++++ 9 files changed, 7988 insertions(+) create mode 100644 ext/misc/amatch.c create mode 100644 ext/misc/closure.c create mode 100644 ext/misc/fuzzer.c create mode 100644 ext/misc/ieee754.c create mode 100644 ext/misc/nextchar.c create mode 100644 ext/misc/regexp.c create mode 100644 ext/misc/rot13.c create mode 100644 ext/misc/spellfix.c create mode 100644 ext/misc/wholenumber.c (limited to 'ext/misc') diff --git a/ext/misc/amatch.c b/ext/misc/amatch.c new file mode 100644 index 0000000..b613080 --- /dev/null +++ b/ext/misc/amatch.c @@ -0,0 +1,1483 @@ +/* +** 2013-03-14 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code for a demonstration virtual table that finds +** "approximate matches" - strings from a finite set that are nearly the +** same as a single input string. The virtual table is called "amatch". +** +** A amatch virtual table is created like this: +** +** CREATE VIRTUAL TABLE f USING approximate_match( +** vocabulary_table=, -- V +** vocabulary_word=, -- W +** vocabulary_language=, -- L +** edit_distances= +** ); +** +** When it is created, the new amatch table must be supplied with the +** the name of a table V and columns V.W and V.L such that +** +** SELECT W FROM V WHERE L=$language +** +** returns the allowed vocabulary for the match. If the "vocabulary_language" +** or L columnname is left unspecified or is an empty string, then no +** filtering of the vocabulary by language is performed. +** +** For efficiency, it is essential that the vocabulary table be indexed: +** +** CREATE vocab_index ON V(W) +** +** A separate edit-cost-table provides scoring information that defines +** what it means for one string to be "close" to another. +** +** The edit-cost-table must contain exactly four columns (more precisely, +** the statement "SELECT * FROM " must return records +** that consist of four columns). It does not matter what the columns are +** named. +** +** Each row in the edit-cost-table represents a single character +** transformation going from user input to the vocabulary. The leftmost +** column of the row (column 0) contains an integer identifier of the +** language to which the transformation rule belongs (see "MULTIPLE LANGUAGES" +** below). The second column of the row (column 1) contains the input +** character or characters - the characters of user input. The third +** column contains characters as they appear in the vocabulary table. +** And the fourth column contains the integer cost of making the +** transformation. For example: +** +** CREATE TABLE f_data(iLang, cFrom, cTo, Cost); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '', 'a', 100); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'b', '', 87); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40); +** +** The first row inserted into the edit-cost-table by the SQL script +** above indicates that the cost of having an extra 'a' in the vocabulary +** table that is missing in the user input 100. (All costs are integers. +** Overall cost must not exceed 16777216.) The second INSERT statement +** creates a rule saying that the cost of having a single letter 'b' in +** user input which is missing in the vocabulary table is 87. The third +** INSERT statement mean that the cost of matching an 'o' in user input +** against an 'oe' in the vocabulary table is 38. And so forth. +** +** The following rules are special: +** +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '?', '', 97); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '', '?', 98); +** INSERT INTO f_data(iLang, cFrom, cTo, Cost) VALUES(0, '?', '?', 99); +** +** The '?' to '' rule is the cost of having any single character in the input +** that is not found in the vocabular. The '' to '?' rule is the cost of +** having a character in the vocabulary table that is missing from input. +** And the '?' to '?' rule is the cost of doing an arbitrary character +** substitution. These three generic rules apply across all languages. +** In other words, the iLang field is ignored for the generic substitution +** rules. If more than one cost is given for a generic substitution rule, +** then the lowest cost is used. +** +** Once it has been created, the amatch virtual table can be queried +** as follows: +** +** SELECT word, distance FROM f +** WHERE word MATCH 'abcdefg' +** AND distance<200; +** +** This query outputs the strings contained in the T(F) field that +** are close to "abcdefg" and in order of increasing distance. No string +** is output more than once. If there are multiple ways to transform the +** target string ("abcdefg") into a string in the vocabulary table then +** the lowest cost transform is the one that is returned. In this example, +** the search is limited to strings with a total distance of less than 200. +** +** For efficiency, it is important to put tight bounds on the distance. +** The time and memory space needed to perform this query is exponential +** in the maximum distance. A good rule of thumb is to limit the distance +** to no more than 1.5 or 2 times the maximum cost of any rule in the +** edit-cost-table. +** +** The amatch is a read-only table. Any attempt to DELETE, INSERT, or +** UPDATE on a amatch table will throw an error. +** +** It is important to put some kind of a limit on the amatch output. This +** can be either in the form of a LIMIT clause at the end of the query, +** or better, a "distance +#include +#include +#include +#include + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct amatch_vtab amatch_vtab; +typedef struct amatch_cursor amatch_cursor; +typedef struct amatch_rule amatch_rule; +typedef struct amatch_word amatch_word; +typedef struct amatch_avl amatch_avl; + + +/***************************************************************************** +** AVL Tree implementation +*/ +/* +** Objects that want to be members of the AVL tree should embedded an +** instance of this structure. +*/ +struct amatch_avl { + amatch_word *pWord; /* Points to the object being stored in the tree */ + char *zKey; /* Key. zero-terminated string. Must be unique */ + amatch_avl *pBefore; /* Other elements less than zKey */ + amatch_avl *pAfter; /* Other elements greater than zKey */ + amatch_avl *pUp; /* Parent element */ + short int height; /* Height of this node. Leaf==1 */ + short int imbalance; /* Height difference between pBefore and pAfter */ +}; + +/* Recompute the amatch_avl.height and amatch_avl.imbalance fields for p. +** Assume that the children of p have correct heights. +*/ +static void amatchAvlRecomputeHeight(amatch_avl *p){ + short int hBefore = p->pBefore ? p->pBefore->height : 0; + short int hAfter = p->pAfter ? p->pAfter->height : 0; + p->imbalance = hBefore - hAfter; /* -: pAfter higher. +: pBefore higher */ + p->height = (hBefore>hAfter ? hBefore : hAfter)+1; +} + +/* +** P B +** / \ / \ +** B Z ==> X P +** / \ / \ +** X Y Y Z +** +*/ +static amatch_avl *amatchAvlRotateBefore(amatch_avl *pP){ + amatch_avl *pB = pP->pBefore; + amatch_avl *pY = pB->pAfter; + pB->pUp = pP->pUp; + pB->pAfter = pP; + pP->pUp = pB; + pP->pBefore = pY; + if( pY ) pY->pUp = pP; + amatchAvlRecomputeHeight(pP); + amatchAvlRecomputeHeight(pB); + return pB; +} + +/* +** P A +** / \ / \ +** X A ==> P Z +** / \ / \ +** Y Z X Y +** +*/ +static amatch_avl *amatchAvlRotateAfter(amatch_avl *pP){ + amatch_avl *pA = pP->pAfter; + amatch_avl *pY = pA->pBefore; + pA->pUp = pP->pUp; + pA->pBefore = pP; + pP->pUp = pA; + pP->pAfter = pY; + if( pY ) pY->pUp = pP; + amatchAvlRecomputeHeight(pP); + amatchAvlRecomputeHeight(pA); + return pA; +} + +/* +** Return a pointer to the pBefore or pAfter pointer in the parent +** of p that points to p. Or if p is the root node, return pp. +*/ +static amatch_avl **amatchAvlFromPtr(amatch_avl *p, amatch_avl **pp){ + amatch_avl *pUp = p->pUp; + if( pUp==0 ) return pp; + if( pUp->pAfter==p ) return &pUp->pAfter; + return &pUp->pBefore; +} + +/* +** Rebalance all nodes starting with p and working up to the root. +** Return the new root. +*/ +static amatch_avl *amatchAvlBalance(amatch_avl *p){ + amatch_avl *pTop = p; + amatch_avl **pp; + while( p ){ + amatchAvlRecomputeHeight(p); + if( p->imbalance>=2 ){ + amatch_avl *pB = p->pBefore; + if( pB->imbalance<0 ) p->pBefore = amatchAvlRotateAfter(pB); + pp = amatchAvlFromPtr(p,&p); + p = *pp = amatchAvlRotateBefore(p); + }else if( p->imbalance<=(-2) ){ + amatch_avl *pA = p->pAfter; + if( pA->imbalance>0 ) p->pAfter = amatchAvlRotateBefore(pA); + pp = amatchAvlFromPtr(p,&p); + p = *pp = amatchAvlRotateAfter(p); + } + pTop = p; + p = p->pUp; + } + return pTop; +} + +/* Search the tree rooted at p for an entry with zKey. Return a pointer +** to the entry or return NULL. +*/ +static amatch_avl *amatchAvlSearch(amatch_avl *p, const char *zKey){ + int c; + while( p && (c = strcmp(zKey, p->zKey))!=0 ){ + p = (c<0) ? p->pBefore : p->pAfter; + } + return p; +} + +/* Find the first node (the one with the smallest key). +*/ +static amatch_avl *amatchAvlFirst(amatch_avl *p){ + if( p ) while( p->pBefore ) p = p->pBefore; + return p; +} + +#if 0 /* NOT USED */ +/* Return the node with the next larger key after p. +*/ +static amatch_avl *amatchAvlNext(amatch_avl *p){ + amatch_avl *pPrev = 0; + while( p && p->pAfter==pPrev ){ + pPrev = p; + p = p->pUp; + } + if( p && pPrev==0 ){ + p = amatchAvlFirst(p->pAfter); + } + return p; +} +#endif + +#if 0 /* NOT USED */ +/* Verify AVL tree integrity +*/ +static int amatchAvlIntegrity(amatch_avl *pHead){ + amatch_avl *p; + if( pHead==0 ) return 1; + if( (p = pHead->pBefore)!=0 ){ + assert( p->pUp==pHead ); + assert( amatchAvlIntegrity(p) ); + assert( strcmp(p->zKey, pHead->zKey)<0 ); + while( p->pAfter ) p = p->pAfter; + assert( strcmp(p->zKey, pHead->zKey)<0 ); + } + if( (p = pHead->pAfter)!=0 ){ + assert( p->pUp==pHead ); + assert( amatchAvlIntegrity(p) ); + assert( strcmp(p->zKey, pHead->zKey)>0 ); + p = amatchAvlFirst(p); + assert( strcmp(p->zKey, pHead->zKey)>0 ); + } + return 1; +} +static int amatchAvlIntegrity2(amatch_avl *pHead){ + amatch_avl *p, *pNext; + for(p=amatchAvlFirst(pHead); p; p=pNext){ + pNext = amatchAvlNext(p); + if( pNext==0 ) break; + assert( strcmp(p->zKey, pNext->zKey)<0 ); + } + return 1; +} +#endif + +/* Insert a new node pNew. Return NULL on success. If the key is not +** unique, then do not perform the insert but instead leave pNew unchanged +** and return a pointer to an existing node with the same key. +*/ +static amatch_avl *amatchAvlInsert(amatch_avl **ppHead, amatch_avl *pNew){ + int c; + amatch_avl *p = *ppHead; + if( p==0 ){ + p = pNew; + pNew->pUp = 0; + }else{ + while( p ){ + c = strcmp(pNew->zKey, p->zKey); + if( c<0 ){ + if( p->pBefore ){ + p = p->pBefore; + }else{ + p->pBefore = pNew; + pNew->pUp = p; + break; + } + }else if( c>0 ){ + if( p->pAfter ){ + p = p->pAfter; + }else{ + p->pAfter = pNew; + pNew->pUp = p; + break; + } + }else{ + return p; + } + } + } + pNew->pBefore = 0; + pNew->pAfter = 0; + pNew->height = 1; + pNew->imbalance = 0; + *ppHead = amatchAvlBalance(p); + /* assert( amatchAvlIntegrity(*ppHead) ); */ + /* assert( amatchAvlIntegrity2(*ppHead) ); */ + return 0; +} + +/* Remove node pOld from the tree. pOld must be an element of the tree or +** the AVL tree will become corrupt. +*/ +static void amatchAvlRemove(amatch_avl **ppHead, amatch_avl *pOld){ + amatch_avl **ppParent; + amatch_avl *pBalance; + /* assert( amatchAvlSearch(*ppHead, pOld->zKey)==pOld ); */ + ppParent = amatchAvlFromPtr(pOld, ppHead); + if( pOld->pBefore==0 && pOld->pAfter==0 ){ + *ppParent = 0; + pBalance = pOld->pUp; + }else if( pOld->pBefore && pOld->pAfter ){ + amatch_avl *pX, *pY; + pX = amatchAvlFirst(pOld->pAfter); + *amatchAvlFromPtr(pX, 0) = pX->pAfter; + if( pX->pAfter ) pX->pAfter->pUp = pX->pUp; + pBalance = pX->pUp; + pX->pAfter = pOld->pAfter; + if( pX->pAfter ){ + pX->pAfter->pUp = pX; + }else{ + assert( pBalance==pOld ); + pBalance = pX; + } + pX->pBefore = pY = pOld->pBefore; + if( pY ) pY->pUp = pX; + pX->pUp = pOld->pUp; + *ppParent = pX; + }else if( pOld->pBefore==0 ){ + *ppParent = pBalance = pOld->pAfter; + pBalance->pUp = pOld->pUp; + }else if( pOld->pAfter==0 ){ + *ppParent = pBalance = pOld->pBefore; + pBalance->pUp = pOld->pUp; + } + *ppHead = amatchAvlBalance(pBalance); + pOld->pUp = 0; + pOld->pBefore = 0; + pOld->pAfter = 0; + /* assert( amatchAvlIntegrity(*ppHead) ); */ + /* assert( amatchAvlIntegrity2(*ppHead) ); */ +} +/* +** End of the AVL Tree implementation +******************************************************************************/ + + +/* +** Various types. +** +** amatch_cost is the "cost" of an edit operation. +** +** amatch_len is the length of a matching string. +** +** amatch_langid is an ruleset identifier. +*/ +typedef int amatch_cost; +typedef signed char amatch_len; +typedef int amatch_langid; + +/* +** Limits +*/ +#define AMATCH_MX_LENGTH 50 /* Maximum length of a rule string */ +#define AMATCH_MX_LANGID 2147483647 /* Maximum rule ID */ +#define AMATCH_MX_COST 1000 /* Maximum single-rule cost */ + +/* +** A match or partial match +*/ +struct amatch_word { + amatch_word *pNext; /* Next on a list of all amatch_words */ + amatch_avl sCost; /* Linkage of this node into the cost tree */ + amatch_avl sWord; /* Linkage of this node into the word tree */ + amatch_cost rCost; /* Cost of the match so far */ + int iSeq; /* Sequence number */ + char zCost[10]; /* Cost key (text rendering of rCost) */ + short int nMatch; /* Input characters matched */ + char zWord[4]; /* Text of the word. Extra space appended as needed */ +}; + +/* +** Each transformation rule is stored as an instance of this object. +** All rules are kept on a linked list sorted by rCost. +*/ +struct amatch_rule { + amatch_rule *pNext; /* Next rule in order of increasing rCost */ + char *zFrom; /* Transform from (a string from user input) */ + amatch_cost rCost; /* Cost of this transformation */ + amatch_langid iLang; /* The langauge to which this rule belongs */ + amatch_len nFrom, nTo; /* Length of the zFrom and zTo strings */ + char zTo[4]; /* Tranform to V.W value (extra space appended) */ +}; + +/* +** A amatch virtual-table object +*/ +struct amatch_vtab { + sqlite3_vtab base; /* Base class - must be first */ + char *zClassName; /* Name of this class. Default: "amatch" */ + char *zDb; /* Name of database. (ex: "main") */ + char *zSelf; /* Name of this virtual table */ + char *zCostTab; /* Name of edit-cost-table */ + char *zVocabTab; /* Name of vocabulary table */ + char *zVocabWord; /* Name of vocabulary table word column */ + char *zVocabLang; /* Name of vocabulary table language column */ + amatch_rule *pRule; /* All active rules in this amatch */ + amatch_cost rIns; /* Generic insertion cost '' -> ? */ + amatch_cost rDel; /* Generic deletion cost ? -> '' */ + amatch_cost rSub; /* Generic substitution cost ? -> ? */ + sqlite3 *db; /* The database connection */ + sqlite3_stmt *pVCheck; /* Query to check zVocabTab */ + int nCursor; /* Number of active cursors */ +}; + +/* A amatch cursor object */ +struct amatch_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + sqlite3_int64 iRowid; /* The rowid of the current word */ + amatch_langid iLang; /* Use this language ID */ + amatch_cost rLimit; /* Maximum cost of any term */ + int nBuf; /* Space allocated for zBuf */ + int oomErr; /* True following an OOM error */ + int nWord; /* Number of amatch_word objects */ + char *zBuf; /* Temp-use buffer space */ + char *zInput; /* Input word to match against */ + amatch_vtab *pVtab; /* The virtual table this cursor belongs to */ + amatch_word *pAllWords; /* List of all amatch_word objects */ + amatch_word *pCurrent; /* Most recent solution */ + amatch_avl *pCost; /* amatch_word objects keyed by iCost */ + amatch_avl *pWord; /* amatch_word objects keyed by zWord */ +}; + +/* +** The two input rule lists are both sorted in order of increasing +** cost. Merge them together into a single list, sorted by cost, and +** return a pointer to the head of that list. +*/ +static amatch_rule *amatchMergeRules(amatch_rule *pA, amatch_rule *pB){ + amatch_rule head; + amatch_rule *pTail; + + pTail = &head; + while( pA && pB ){ + if( pA->rCost<=pB->rCost ){ + pTail->pNext = pA; + pTail = pA; + pA = pA->pNext; + }else{ + pTail->pNext = pB; + pTail = pB; + pB = pB->pNext; + } + } + if( pA==0 ){ + pTail->pNext = pB; + }else{ + pTail->pNext = pA; + } + return head.pNext; +} + +/* +** Statement pStmt currently points to a row in the amatch data table. This +** function allocates and populates a amatch_rule structure according to +** the content of the row. +** +** If successful, *ppRule is set to point to the new object and SQLITE_OK +** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point +** to an error message and an SQLite error code returned. +*/ +static int amatchLoadOneRule( + amatch_vtab *p, /* Fuzzer virtual table handle */ + sqlite3_stmt *pStmt, /* Base rule on statements current row */ + amatch_rule **ppRule, /* OUT: New rule object */ + char **pzErr /* OUT: Error message */ +){ + sqlite3_int64 iLang = sqlite3_column_int64(pStmt, 0); + const char *zFrom = (const char *)sqlite3_column_text(pStmt, 1); + const char *zTo = (const char *)sqlite3_column_text(pStmt, 2); + amatch_cost rCost = sqlite3_column_int(pStmt, 3); + + int rc = SQLITE_OK; /* Return code */ + int nFrom; /* Size of string zFrom, in bytes */ + int nTo; /* Size of string zTo, in bytes */ + amatch_rule *pRule = 0; /* New rule object to return */ + + if( zFrom==0 ) zFrom = ""; + if( zTo==0 ) zTo = ""; + nFrom = (int)strlen(zFrom); + nTo = (int)strlen(zTo); + + /* Silently ignore null transformations */ + if( strcmp(zFrom, zTo)==0 ){ + if( zFrom[0]=='?' && zFrom[1]==0 ){ + if( p->rSub==0 || p->rSub>rCost ) p->rSub = rCost; + } + *ppRule = 0; + return SQLITE_OK; + } + + if( rCost<=0 || rCost>AMATCH_MX_COST ){ + *pzErr = sqlite3_mprintf("%s: cost must be between 1 and %d", + p->zClassName, AMATCH_MX_COST + ); + rc = SQLITE_ERROR; + }else + if( nFrom>AMATCH_MX_LENGTH || nTo>AMATCH_MX_LENGTH ){ + *pzErr = sqlite3_mprintf("%s: maximum string length is %d", + p->zClassName, AMATCH_MX_LENGTH + ); + rc = SQLITE_ERROR; + }else + if( iLang<0 || iLang>AMATCH_MX_LANGID ){ + *pzErr = sqlite3_mprintf("%s: iLang must be between 0 and %d", + p->zClassName, AMATCH_MX_LANGID + ); + rc = SQLITE_ERROR; + }else + if( strcmp(zFrom,"")==0 && strcmp(zTo,"?")==0 ){ + if( p->rIns==0 || p->rIns>rCost ) p->rIns = rCost; + }else + if( strcmp(zFrom,"?")==0 && strcmp(zTo,"")==0 ){ + if( p->rDel==0 || p->rDel>rCost ) p->rDel = rCost; + }else + { + pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); + if( pRule==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pRule, 0, sizeof(*pRule)); + pRule->zFrom = &pRule->zTo[nTo+1]; + pRule->nFrom = nFrom; + memcpy(pRule->zFrom, zFrom, nFrom+1); + memcpy(pRule->zTo, zTo, nTo+1); + pRule->nTo = nTo; + pRule->rCost = rCost; + pRule->iLang = (int)iLang; + } + } + + *ppRule = pRule; + return rc; +} + +/* +** Free all the content in the edit-cost-table +*/ +static void amatchFreeRules(amatch_vtab *p){ + while( p->pRule ){ + amatch_rule *pRule = p->pRule; + p->pRule = pRule->pNext; + sqlite3_free(pRule); + } + p->pRule = 0; +} + +/* +** Load the content of the amatch data table into memory. +*/ +static int amatchLoadRules( + sqlite3 *db, /* Database handle */ + amatch_vtab *p, /* Virtual amatch table to configure */ + char **pzErr /* OUT: Error message */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* SELECT used to read from rules table */ + amatch_rule *pHead = 0; + + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", p->zDb, p->zCostTab); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + int rc2; /* finalize() return code */ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s: %s", p->zClassName, sqlite3_errmsg(db)); + }else if( sqlite3_column_count(pStmt)!=4 ){ + *pzErr = sqlite3_mprintf("%s: %s has %d columns, expected 4", + p->zClassName, p->zCostTab, sqlite3_column_count(pStmt) + ); + rc = SQLITE_ERROR; + }else{ + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + amatch_rule *pRule = 0; + rc = amatchLoadOneRule(p, pStmt, &pRule, pzErr); + if( pRule ){ + pRule->pNext = pHead; + pHead = pRule; + } + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + } + sqlite3_free(zSql); + + /* All rules are now in a singly linked list starting at pHead. This + ** block sorts them by cost and then sets amatch_vtab.pRule to point to + ** point to the head of the sorted list. + */ + if( rc==SQLITE_OK ){ + unsigned int i; + amatch_rule *pX; + amatch_rule *a[15]; + for(i=0; ipNext; + pX->pNext = 0; + for(i=0; a[i] && ipRule = amatchMergeRules(p->pRule, pX); + }else{ + /* An error has occurred. Setting p->pRule to point to the head of the + ** allocated list ensures that the list will be cleaned up in this case. + */ + assert( p->pRule==0 ); + p->pRule = pHead; + } + + return rc; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc() +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +static char *amatchDequote(const char *zIn){ + int nIn; /* Size of input string, in bytes */ + char *zOut; /* Output (dequoted) string */ + + nIn = (int)strlen(zIn); + zOut = sqlite3_malloc(nIn+1); + if( zOut ){ + char q = zIn[0]; /* Quote character (if any ) */ + + if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ + memcpy(zOut, zIn, nIn+1); + }else{ + int iOut = 0; /* Index of next byte to write to output */ + int iIn; /* Index of next byte to read from input */ + + if( q=='[' ) q = ']'; + for(iIn=1; iInpVCheck ){ + sqlite3_finalize(p->pVCheck); + p->pVCheck = 0; + } +} + +/* +** Deallocate an amatch_vtab object +*/ +static void amatchFree(amatch_vtab *p){ + if( p ){ + amatchFreeRules(p); + amatchVCheckClear(p); + sqlite3_free(p->zClassName); + sqlite3_free(p->zDb); + sqlite3_free(p->zCostTab); + sqlite3_free(p->zVocabTab); + sqlite3_free(p->zVocabWord); + sqlite3_free(p->zVocabLang); + memset(p, 0, sizeof(*p)); + sqlite3_free(p); + } +} + +/* +** xDisconnect/xDestroy method for the amatch module. +*/ +static int amatchDisconnect(sqlite3_vtab *pVtab){ + amatch_vtab *p = (amatch_vtab*)pVtab; + assert( p->nCursor==0 ); + amatchFree(p); + return SQLITE_OK; +} + +/* +** Check to see if the argument is of the form: +** +** KEY = VALUE +** +** If it is, return a pointer to the first character of VALUE. +** If not, return NULL. Spaces around the = are ignored. +*/ +static const char *amatchValueOfKey(const char *zKey, const char *zStr){ + int nKey = (int)strlen(zKey); + int nStr = (int)strlen(zStr); + int i; + if( nStr module name ("approximate_match") +** argv[1] -> database name +** argv[2] -> table name +** argv[3...] -> arguments +*/ +static int amatchConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + int rc = SQLITE_OK; /* Return code */ + amatch_vtab *pNew = 0; /* New virtual table */ + const char *zModule = argv[0]; + const char *zDb = argv[1]; + const char *zVal; + int i; + + (void)pAux; + *ppVtab = 0; + pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + rc = SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + pNew->db = db; + pNew->zClassName = sqlite3_mprintf("%s", zModule); + if( pNew->zClassName==0 ) goto amatchConnectError; + pNew->zDb = sqlite3_mprintf("%s", zDb); + if( pNew->zDb==0 ) goto amatchConnectError; + pNew->zSelf = sqlite3_mprintf("%s", argv[2]); + if( pNew->zSelf==0 ) goto amatchConnectError; + for(i=3; izVocabTab); + pNew->zVocabTab = amatchDequote(zVal); + if( pNew->zVocabTab==0 ) goto amatchConnectError; + continue; + } + zVal = amatchValueOfKey("vocabulary_word", argv[i]); + if( zVal ){ + sqlite3_free(pNew->zVocabWord); + pNew->zVocabWord = amatchDequote(zVal); + if( pNew->zVocabWord==0 ) goto amatchConnectError; + continue; + } + zVal = amatchValueOfKey("vocabulary_language", argv[i]); + if( zVal ){ + sqlite3_free(pNew->zVocabLang); + pNew->zVocabLang = amatchDequote(zVal); + if( pNew->zVocabLang==0 ) goto amatchConnectError; + continue; + } + zVal = amatchValueOfKey("edit_distances", argv[i]); + if( zVal ){ + sqlite3_free(pNew->zCostTab); + pNew->zCostTab = amatchDequote(zVal); + if( pNew->zCostTab==0 ) goto amatchConnectError; + continue; + } + *pzErr = sqlite3_mprintf("unrecognized argument: [%s]\n", argv[i]); + amatchFree(pNew); + *ppVtab = 0; + return SQLITE_ERROR; + } + rc = SQLITE_OK; + if( pNew->zCostTab==0 ){ + *pzErr = sqlite3_mprintf("no edit_distances table specified"); + rc = SQLITE_ERROR; + }else{ + rc = amatchLoadRules(db, pNew, pzErr); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(word,distance,language," + "command HIDDEN,nword HIDDEN)" + ); +#define AMATCH_COL_WORD 0 +#define AMATCH_COL_DISTANCE 1 +#define AMATCH_COL_LANGUAGE 2 +#define AMATCH_COL_COMMAND 3 +#define AMATCH_COL_NWORD 4 + } + if( rc!=SQLITE_OK ){ + amatchFree(pNew); + } + *ppVtab = &pNew->base; + return rc; + +amatchConnectError: + amatchFree(pNew); + return rc; +} + +/* +** Open a new amatch cursor. +*/ +static int amatchOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + amatch_vtab *p = (amatch_vtab*)pVTab; + amatch_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + pCur->pVtab = p; + *ppCursor = &pCur->base; + p->nCursor++; + return SQLITE_OK; +} + +/* +** Free up all the memory allocated by a cursor. Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void amatchClearCursor(amatch_cursor *pCur){ + amatch_word *pWord, *pNextWord; + for(pWord=pCur->pAllWords; pWord; pWord=pNextWord){ + pNextWord = pWord->pNext; + sqlite3_free(pWord); + } + pCur->pAllWords = 0; + sqlite3_free(pCur->zInput); + pCur->zInput = 0; + pCur->pCost = 0; + pCur->pWord = 0; + pCur->pCurrent = 0; + pCur->rLimit = 1000000; + pCur->iLang = 0; + pCur->nWord = 0; +} + +/* +** Close a amatch cursor. +*/ +static int amatchClose(sqlite3_vtab_cursor *cur){ + amatch_cursor *pCur = (amatch_cursor *)cur; + amatchClearCursor(pCur); + pCur->pVtab->nCursor--; + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Render a 24-bit unsigned integer as a 4-byte base-64 number. +*/ +static void amatchEncodeInt(int x, char *z){ + static const char a[] = + "0123456789" + "ABCDEFGHIJ" + "KLMNOPQRST" + "UVWXYZ^abc" + "defghijklm" + "nopqrstuvw" + "xyz~"; + z[0] = a[(x>>18)&0x3f]; + z[1] = a[(x>>12)&0x3f]; + z[2] = a[(x>>6)&0x3f]; + z[3] = a[x&0x3f]; +} + +/* +** Write the zCost[] field for a amatch_word object +*/ +static void amatchWriteCost(amatch_word *pWord){ + amatchEncodeInt(pWord->rCost, pWord->zCost); + amatchEncodeInt(pWord->iSeq, pWord->zCost+4); + pWord->zCost[8] = 0; +} + +/* +** Add a new amatch_word object to the queue. +** +** If a prior amatch_word object with the same zWord, and nMatch +** already exists, update its rCost (if the new rCost is less) but +** otherwise leave it unchanged. Do not add a duplicate. +** +** Do nothing if the cost exceeds threshold. +*/ +static void amatchAddWord( + amatch_cursor *pCur, + amatch_cost rCost, + int nMatch, + const char *zWordBase, + const char *zWordTail +){ + amatch_word *pWord; + amatch_avl *pNode; + amatch_avl *pOther; + int nBase, nTail; + char zBuf[4]; + + if( rCost>pCur->rLimit ){ + return; + } + nBase = (int)strlen(zWordBase); + nTail = (int)strlen(zWordTail); + if( nBase+nTail+3>pCur->nBuf ){ + pCur->nBuf = nBase+nTail+100; + pCur->zBuf = sqlite3_realloc(pCur->zBuf, pCur->nBuf); + if( pCur->zBuf==0 ){ + pCur->nBuf = 0; + return; + } + } + amatchEncodeInt(nMatch, zBuf); + memcpy(pCur->zBuf, zBuf+2, 2); + memcpy(pCur->zBuf+2, zWordBase, nBase); + memcpy(pCur->zBuf+2+nBase, zWordTail, nTail+1); + pNode = amatchAvlSearch(pCur->pWord, pCur->zBuf); + if( pNode ){ + pWord = pNode->pWord; + if( pWord->rCost>rCost ){ +#ifdef AMATCH_TRACE_1 + printf("UPDATE [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", + pWord->zWord+2, pWord->nMatch, pCur->zInput, pCur->zInput, + pWord->rCost, pWord->zWord, pWord->zCost); +#endif + amatchAvlRemove(&pCur->pCost, &pWord->sCost); + pWord->rCost = rCost; + amatchWriteCost(pWord); +#ifdef AMATCH_TRACE_1 + printf(" ---> %d (\"%s\" \"%s\")\n", + pWord->rCost, pWord->zWord, pWord->zCost); +#endif + pOther = amatchAvlInsert(&pCur->pCost, &pWord->sCost); + assert( pOther==0 ); (void)pOther; + } + return; + } + pWord = sqlite3_malloc( sizeof(*pWord) + nBase + nTail - 1 ); + if( pWord==0 ) return; + memset(pWord, 0, sizeof(*pWord)); + pWord->rCost = rCost; + pWord->iSeq = pCur->nWord++; + amatchWriteCost(pWord); + pWord->nMatch = nMatch; + pWord->pNext = pCur->pAllWords; + pCur->pAllWords = pWord; + pWord->sCost.zKey = pWord->zCost; + pWord->sCost.pWord = pWord; + pOther = amatchAvlInsert(&pCur->pCost, &pWord->sCost); + assert( pOther==0 ); (void)pOther; + pWord->sWord.zKey = pWord->zWord; + pWord->sWord.pWord = pWord; + strcpy(pWord->zWord, pCur->zBuf); + pOther = amatchAvlInsert(&pCur->pWord, &pWord->sWord); + assert( pOther==0 ); (void)pOther; +#ifdef AMATCH_TRACE_1 + printf("INSERT [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", pWord->zWord+2, + pWord->nMatch, pCur->zInput, pCur->zInput+pWord->nMatch, rCost, + pWord->zWord, pWord->zCost); +#endif +} + +/* +** Advance a cursor to its next row of output +*/ +static int amatchNext(sqlite3_vtab_cursor *cur){ + amatch_cursor *pCur = (amatch_cursor*)cur; + amatch_word *pWord = 0; + amatch_avl *pNode; + int isMatch = 0; + amatch_vtab *p = pCur->pVtab; + int nWord; + int rc; + int i; + const char *zW; + amatch_rule *pRule; + char *zBuf = 0; + char nBuf = 0; + char zNext[8]; + char zNextIn[8]; + int nNextIn; + + if( p->pVCheck==0 ){ + char *zSql; + if( p->zVocabLang && p->zVocabLang[0] ){ + zSql = sqlite3_mprintf( + "SELECT \"%s\" FROM \"%s\"", + " WHERE \"%w\">=?1 AND \"%w\"=?2" + " ORDER BY 1", + p->zVocabWord, p->zVocabTab, + p->zVocabWord, p->zVocabLang + ); + }else{ + zSql = sqlite3_mprintf( + "SELECT \"%s\" FROM \"%s\"" + " WHERE \"%w\">=?1" + " ORDER BY 1", + p->zVocabWord, p->zVocabTab, + p->zVocabWord + ); + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &p->pVCheck, 0); + sqlite3_free(zSql); + if( rc ) return rc; + } + sqlite3_bind_int(p->pVCheck, 2, pCur->iLang); + + do{ + pNode = amatchAvlFirst(pCur->pCost); + if( pNode==0 ){ + pWord = 0; + break; + } + pWord = pNode->pWord; + amatchAvlRemove(&pCur->pCost, &pWord->sCost); + +#ifdef AMATCH_TRACE_1 + printf("PROCESS [%s][%.*s^%s] %d (\"%s\" \"%s\")\n", + pWord->zWord+2, pWord->nMatch, pCur->zInput, pCur->zInput+pWord->nMatch, + pWord->rCost, pWord->zWord, pWord->zCost); +#endif + nWord = (int)strlen(pWord->zWord+2); + if( nWord+20>nBuf ){ + nBuf = nWord+100; + zBuf = sqlite3_realloc(zBuf, nBuf); + if( zBuf==0 ) return SQLITE_NOMEM; + } + strcpy(zBuf, pWord->zWord+2); + zNext[0] = 0; + zNextIn[0] = pCur->zInput[pWord->nMatch]; + if( zNextIn[0] ){ + for(i=1; i<=4 && (pCur->zInput[pWord->nMatch+i]&0xc0)==0x80; i++){ + zNextIn[i] = pCur->zInput[pWord->nMatch+i]; + } + zNextIn[i] = 0; + nNextIn = i; + }else{ + nNextIn = 0; + } + + if( zNextIn[0] && zNextIn[0]!='*' ){ + sqlite3_reset(p->pVCheck); + strcat(zBuf, zNextIn); + sqlite3_bind_text(p->pVCheck, 1, zBuf, nWord+nNextIn, SQLITE_STATIC); + rc = sqlite3_step(p->pVCheck); + if( rc==SQLITE_ROW ){ + zW = (const char*)sqlite3_column_text(p->pVCheck, 0); + if( strncmp(zBuf, zW, nWord+nNextIn)==0 ){ + amatchAddWord(pCur, pWord->rCost, pWord->nMatch+nNextIn, zBuf, ""); + } + } + zBuf[nWord] = 0; + } + + while( 1 ){ + strcpy(zBuf+nWord, zNext); + sqlite3_reset(p->pVCheck); + sqlite3_bind_text(p->pVCheck, 1, zBuf, -1, SQLITE_TRANSIENT); + rc = sqlite3_step(p->pVCheck); + if( rc!=SQLITE_ROW ) break; + zW = (const char*)sqlite3_column_text(p->pVCheck, 0); + strcpy(zBuf+nWord, zNext); + if( strncmp(zW, zBuf, nWord)!=0 ) break; + if( (zNextIn[0]=='*' && zNextIn[1]==0) + || (zNextIn[0]==0 && zW[nWord]==0) + ){ + isMatch = 1; + zNextIn[0] = 0; + nNextIn = 0; + break; + } + zNext[0] = zW[nWord]; + for(i=1; i<=4 && (zW[nWord+i]&0xc0)==0x80; i++){ + zNext[i] = zW[nWord+i]; + } + zNext[i] = 0; + zBuf[nWord] = 0; + if( p->rIns>0 ){ + amatchAddWord(pCur, pWord->rCost+p->rIns, pWord->nMatch, + zBuf, zNext); + } + if( p->rSub>0 ){ + amatchAddWord(pCur, pWord->rCost+p->rSub, pWord->nMatch+nNextIn, + zBuf, zNext); + } + if( p->rIns<0 && p->rSub<0 ) break; + zNext[i-1]++; /* FIX ME */ + } + sqlite3_reset(p->pVCheck); + + if( p->rDel>0 ){ + zBuf[nWord] = 0; + amatchAddWord(pCur, pWord->rCost+p->rDel, pWord->nMatch+nNextIn, + zBuf, ""); + } + + for(pRule=p->pRule; pRule; pRule=pRule->pNext){ + if( pRule->iLang!=pCur->iLang ) continue; + if( strncmp(pRule->zFrom, pCur->zInput+pWord->nMatch, pRule->nFrom)==0 ){ + amatchAddWord(pCur, pWord->rCost+pRule->rCost, + pWord->nMatch+pRule->nFrom, pWord->zWord+2, pRule->zTo); + } + } + }while( !isMatch ); + pCur->pCurrent = pWord; + sqlite3_free(zBuf); + return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again. Always called at least once +** prior to any amatchColumn, amatchRowid, or amatchEof call. +*/ +static int amatchFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + amatch_cursor *pCur = (amatch_cursor *)pVtabCursor; + const char *zWord = "*"; + int idx; + + amatchClearCursor(pCur); + idx = 0; + if( idxNum & 1 ){ + zWord = (const char*)sqlite3_value_text(argv[0]); + idx++; + } + if( idxNum & 2 ){ + pCur->rLimit = (amatch_cost)sqlite3_value_int(argv[idx]); + idx++; + } + if( idxNum & 4 ){ + pCur->iLang = (amatch_cost)sqlite3_value_int(argv[idx]); + idx++; + } + pCur->zInput = sqlite3_mprintf("%s", zWord); + if( pCur->zInput==0 ) return SQLITE_NOMEM; + amatchAddWord(pCur, 0, 0, "", ""); + amatchNext(pVtabCursor); + + return SQLITE_OK; +} + +/* +** Only the word and distance columns have values. All other columns +** return NULL +*/ +static int amatchColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + amatch_cursor *pCur = (amatch_cursor*)cur; + switch( i ){ + case AMATCH_COL_WORD: { + sqlite3_result_text(ctx, pCur->pCurrent->zWord+2, -1, SQLITE_STATIC); + break; + } + case AMATCH_COL_DISTANCE: { + sqlite3_result_int(ctx, pCur->pCurrent->rCost); + break; + } + case AMATCH_COL_LANGUAGE: { + sqlite3_result_int(ctx, pCur->iLang); + break; + } + case AMATCH_COL_NWORD: { + sqlite3_result_int(ctx, pCur->nWord); + break; + } + default: { + sqlite3_result_null(ctx); + break; + } + } + return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int amatchRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + amatch_cursor *pCur = (amatch_cursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; +} + +/* +** EOF indicator +*/ +static int amatchEof(sqlite3_vtab_cursor *cur){ + amatch_cursor *pCur = (amatch_cursor*)cur; + return pCur->pCurrent==0; +} + +/* +** Search for terms of these forms: +** +** (A) word MATCH $str +** (B1) distance < $value +** (B2) distance <= $value +** (C) language == $language +** +** The distance< and distance<= are both treated as distance<=. +** The query plan number is a bit vector: +** +** bit 1: Term of the form (A) found +** bit 2: Term like (B1) or (B2) found +** bit 3: Term like (C) found +** +** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set +** then $value is in filter.argv[0] if bit-1 is clear and is in +** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is +** in filter.argv[0] if bit-1 and bit-2 are both zero, is in +** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in +** filter.argv[2] if both bit-1 and bit-2 are set. +*/ +static int amatchBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int iPlan = 0; + int iDistTerm = -1; + int iLangTerm = -1; + int i; + const struct sqlite3_index_constraint *pConstraint; + + (void)tab; + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + if( (iPlan & 1)==0 + && pConstraint->iColumn==0 + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH + ){ + iPlan |= 1; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + if( (iPlan & 2)==0 + && pConstraint->iColumn==1 + && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT + || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) + ){ + iPlan |= 2; + iDistTerm = i; + } + if( (iPlan & 4)==0 + && pConstraint->iColumn==2 + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 4; + pIdxInfo->aConstraintUsage[i].omit = 1; + iLangTerm = i; + } + } + if( iPlan & 2 ){ + pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); + } + if( iPlan & 4 ){ + int idx = 1; + if( iPlan & 1 ) idx++; + if( iPlan & 2 ) idx++; + pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx; + } + pIdxInfo->idxNum = iPlan; + if( pIdxInfo->nOrderBy==1 + && pIdxInfo->aOrderBy[0].iColumn==1 + && pIdxInfo->aOrderBy[0].desc==0 + ){ + pIdxInfo->orderByConsumed = 1; + } + pIdxInfo->estimatedCost = (double)10000; + + return SQLITE_OK; +} + +/* +** The xUpdate() method. +** +** This implementation disallows DELETE and UPDATE. The only thing +** allowed is INSERT into the "command" column. +*/ +static int amatchUpdate( + sqlite3_vtab *pVTab, + int argc, + sqlite3_value **argv, + sqlite_int64 *pRowid +){ + amatch_vtab *p = (amatch_vtab*)pVTab; + const unsigned char *zCmd; + (void)pRowid; + if( argc==1 ){ + pVTab->zErrMsg = sqlite3_mprintf("DELETE from %s is not allowed", + p->zSelf); + return SQLITE_ERROR; + } + if( sqlite3_value_type(argv[0])!=SQLITE_NULL ){ + pVTab->zErrMsg = sqlite3_mprintf("UPDATE of %s is not allowed", + p->zSelf); + return SQLITE_ERROR; + } + if( sqlite3_value_type(argv[2+AMATCH_COL_WORD])!=SQLITE_NULL + || sqlite3_value_type(argv[2+AMATCH_COL_DISTANCE])!=SQLITE_NULL + || sqlite3_value_type(argv[2+AMATCH_COL_LANGUAGE])!=SQLITE_NULL + ){ + pVTab->zErrMsg = sqlite3_mprintf( + "INSERT INTO %s allowed for column [command] only", p->zSelf); + return SQLITE_ERROR; + } + zCmd = sqlite3_value_text(argv[2+AMATCH_COL_COMMAND]); + if( zCmd==0 ) return SQLITE_OK; + + return SQLITE_OK; +} + +/* +** A virtual table module that implements the "approximate_match". +*/ +static sqlite3_module amatchModule = { + 0, /* iVersion */ + amatchConnect, /* xCreate */ + amatchConnect, /* xConnect */ + amatchBestIndex, /* xBestIndex */ + amatchDisconnect, /* xDisconnect */ + amatchDisconnect, /* xDestroy */ + amatchOpen, /* xOpen - open a cursor */ + amatchClose, /* xClose - close a cursor */ + amatchFilter, /* xFilter - configure scan constraints */ + amatchNext, /* xNext - advance a cursor */ + amatchEof, /* xEof - check for end of scan */ + amatchColumn, /* xColumn - read data */ + amatchRowid, /* xRowid - read data */ + amatchUpdate, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Register the amatch virtual table +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_amatch_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; /* Not used */ +#ifndef SQLITE_OMIT_VIRTUALTABLE + rc = sqlite3_create_module(db, "approximate_match", &amatchModule, 0); +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + return rc; +} diff --git a/ext/misc/closure.c b/ext/misc/closure.c new file mode 100644 index 0000000..213b763 --- /dev/null +++ b/ext/misc/closure.c @@ -0,0 +1,948 @@ +/* +** 2013-04-16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains code for a virtual table that finds the transitive +** closure of a parent/child relationship in a real table. The virtual +** table is called "transitive_closure". +** +** A transitive_closure virtual table is created like this: +** +** CREATE VIRTUAL TABLE x USING transitive_closure( +** tablename=, -- T +** idcolumn=, -- X +** parentcolumn= -- P +** ); +** +** When it is created, the new transitive_closure table may be supplied +** with default values for the name of a table T and columns T.X and T.P. +** The T.X and T.P columns must contain integers. The ideal case is for +** T.X to be the INTEGER PRIMARY KEY. The T.P column should reference +** the T.X column. The row referenced by T.P is the parent of the current row. +** +** The tablename, idcolumn, and parentcolumn supplied by the CREATE VIRTUAL +** TABLE statement may be overridden in individual queries by including +** terms like tablename='newtable', idcolumn='id2', or +** parentcolumn='parent3' in the WHERE clause of the query. +** +** For efficiency, it is essential that there be an index on the P column: +** +** CREATE Tidx1 ON T(P) +** +** Suppose a specific instance of the closure table is as follows: +** +** CREATE VIRTUAL TABLE ct1 USING transitive_closure( +** tablename='group', +** idcolumn='groupId', +** parentcolumn='parentId' +** ); +** +** Such an instance of the transitive_closure virtual table would be +** appropriate for walking a tree defined using a table like this, for example: +** +** CREATE TABLE group( +** groupId INTEGER PRIMARY KEY, +** parentId INTEGER REFERENCES group +** ); +** CREATE INDEX group_idx1 ON group(parentId); +** +** The group table above would presumably have other application-specific +** fields. The key point here is that rows of the group table form a +** tree. The purpose of the ct1 virtual table is to easily extract +** branches of that tree. +** +** Once it has been created, the ct1 virtual table can be queried +** as follows: +** +** SELECT * FROM element +** WHERE element.groupId IN (SELECT id FROM ct1 WHERE root=?1); +** +** The above query will return all elements that are part of group ?1 +** or children of group ?1 or grand-children of ?1 and so forth for all +** descendents of group ?1. The same query can be formulated as a join: +** +** SELECT element.* FROM element, ct1 +** WHERE element.groupid=ct1.id +** AND ct1.root=?1; +** +** The depth of the transitive_closure (the number of generations of +** parent/child relations to follow) can be limited by setting "depth" +** column in the WHERE clause. So, for example, the following query +** finds only children and grandchildren but no further descendents: +** +** SELECT element.* FROM element, ct1 +** WHERE element.groupid=ct1.id +** AND ct1.root=?1 +** AND ct1.depth<=2; +** +** The "ct1.depth<=2" term could be a strict equality "ct1.depth=2" in +** order to find only the grandchildren of ?1, not ?1 itself or the +** children of ?1. +** +** The root=?1 term must be supplied in WHERE clause or else the query +** of the ct1 virtual table will return an empty set. The tablename, +** idcolumn, and parentcolumn attributes can be overridden in the WHERE +** clause if desired. So, for example, the ct1 table could be repurposed +** to find ancestors rather than descendents by inverting the roles of +** the idcolumn and parentcolumn: +** +** SELECT element.* FROM element, ct1 +** WHERE element.groupid=ct1.id +** AND ct1.root=?1 +** AND ct1.idcolumn='parentId' +** AND ct1.parentcolumn='groupId'; +** +** Multiple calls to ct1 could be combined. For example, the following +** query finds all elements that "cousins" of groupId ?1. That is to say +** elements where the groupId is a grandchild of the grandparent of ?1. +** (This definition of "cousins" also includes siblings and self.) +** +** SELECT element.* FROM element, ct1 +** WHERE element.groupId=ct1.id +** AND ct1.depth=2 +** AND ct1.root IN (SELECT id FROM ct1 +** WHERE root=?1 +** AND depth=2 +** AND idcolumn='parentId' +** AND parentcolumn='groupId'); +** +** In our example, the group.groupId column is unique and thus the +** subquery will return exactly one row. For that reason, the IN +** operator could be replaced by "=" to get the same result. But +** in the general case where the idcolumn is not unique, an IN operator +** would be required for this kind of query. +** +** Note that because the tablename, idcolumn, and parentcolumn can +** all be specified in the query, it is possible for an application +** to define a single transitive_closure virtual table for use on lots +** of different hierarchy tables. One might say: +** +** CREATE VIRTUAL TABLE temp.closure USING transitive_closure; +** +** As each database connection is being opened. Then the application +** would always have a "closure" virtual table handy to use for querying. +** +** SELECT element.* FROM element, closure +** WHERE element.groupid=ct1.id +** AND closure.root=?1 +** AND closure.tablename='group' +** AND closure.idname='groupId' +** AND closure.parentname='parentId'; +** +** See the documentation at http://www.sqlite.org/loadext.html for information +** on how to compile and use loadable extensions such as this one. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include +#include +#include +#include +#include + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct closure_vtab closure_vtab; +typedef struct closure_cursor closure_cursor; +typedef struct closure_queue closure_queue; +typedef struct closure_avl closure_avl; + +/***************************************************************************** +** AVL Tree implementation +*/ +/* +** Objects that want to be members of the AVL tree should embedded an +** instance of this structure. +*/ +struct closure_avl { + sqlite3_int64 id; /* Id of this entry in the table */ + int iGeneration; /* Which generation is this entry part of */ + closure_avl *pList; /* A linked list of nodes */ + closure_avl *pBefore; /* Other elements less than id */ + closure_avl *pAfter; /* Other elements greater than id */ + closure_avl *pUp; /* Parent element */ + short int height; /* Height of this node. Leaf==1 */ + short int imbalance; /* Height difference between pBefore and pAfter */ +}; + +/* Recompute the closure_avl.height and closure_avl.imbalance fields for p. +** Assume that the children of p have correct heights. +*/ +static void closureAvlRecomputeHeight(closure_avl *p){ + short int hBefore = p->pBefore ? p->pBefore->height : 0; + short int hAfter = p->pAfter ? p->pAfter->height : 0; + p->imbalance = hBefore - hAfter; /* -: pAfter higher. +: pBefore higher */ + p->height = (hBefore>hAfter ? hBefore : hAfter)+1; +} + +/* +** P B +** / \ / \ +** B Z ==> X P +** / \ / \ +** X Y Y Z +** +*/ +static closure_avl *closureAvlRotateBefore(closure_avl *pP){ + closure_avl *pB = pP->pBefore; + closure_avl *pY = pB->pAfter; + pB->pUp = pP->pUp; + pB->pAfter = pP; + pP->pUp = pB; + pP->pBefore = pY; + if( pY ) pY->pUp = pP; + closureAvlRecomputeHeight(pP); + closureAvlRecomputeHeight(pB); + return pB; +} + +/* +** P A +** / \ / \ +** X A ==> P Z +** / \ / \ +** Y Z X Y +** +*/ +static closure_avl *closureAvlRotateAfter(closure_avl *pP){ + closure_avl *pA = pP->pAfter; + closure_avl *pY = pA->pBefore; + pA->pUp = pP->pUp; + pA->pBefore = pP; + pP->pUp = pA; + pP->pAfter = pY; + if( pY ) pY->pUp = pP; + closureAvlRecomputeHeight(pP); + closureAvlRecomputeHeight(pA); + return pA; +} + +/* +** Return a pointer to the pBefore or pAfter pointer in the parent +** of p that points to p. Or if p is the root node, return pp. +*/ +static closure_avl **closureAvlFromPtr(closure_avl *p, closure_avl **pp){ + closure_avl *pUp = p->pUp; + if( pUp==0 ) return pp; + if( pUp->pAfter==p ) return &pUp->pAfter; + return &pUp->pBefore; +} + +/* +** Rebalance all nodes starting with p and working up to the root. +** Return the new root. +*/ +static closure_avl *closureAvlBalance(closure_avl *p){ + closure_avl *pTop = p; + closure_avl **pp; + while( p ){ + closureAvlRecomputeHeight(p); + if( p->imbalance>=2 ){ + closure_avl *pB = p->pBefore; + if( pB->imbalance<0 ) p->pBefore = closureAvlRotateAfter(pB); + pp = closureAvlFromPtr(p,&p); + p = *pp = closureAvlRotateBefore(p); + }else if( p->imbalance<=(-2) ){ + closure_avl *pA = p->pAfter; + if( pA->imbalance>0 ) p->pAfter = closureAvlRotateBefore(pA); + pp = closureAvlFromPtr(p,&p); + p = *pp = closureAvlRotateAfter(p); + } + pTop = p; + p = p->pUp; + } + return pTop; +} + +/* Search the tree rooted at p for an entry with id. Return a pointer +** to the entry or return NULL. +*/ +static closure_avl *closureAvlSearch(closure_avl *p, sqlite3_int64 id){ + while( p && id!=p->id ){ + p = (idid) ? p->pBefore : p->pAfter; + } + return p; +} + +/* Find the first node (the one with the smallest key). +*/ +static closure_avl *closureAvlFirst(closure_avl *p){ + if( p ) while( p->pBefore ) p = p->pBefore; + return p; +} + +/* Return the node with the next larger key after p. +*/ +closure_avl *closureAvlNext(closure_avl *p){ + closure_avl *pPrev = 0; + while( p && p->pAfter==pPrev ){ + pPrev = p; + p = p->pUp; + } + if( p && pPrev==0 ){ + p = closureAvlFirst(p->pAfter); + } + return p; +} + +/* Insert a new node pNew. Return NULL on success. If the key is not +** unique, then do not perform the insert but instead leave pNew unchanged +** and return a pointer to an existing node with the same key. +*/ +static closure_avl *closureAvlInsert( + closure_avl **ppHead, /* Head of the tree */ + closure_avl *pNew /* New node to be inserted */ +){ + closure_avl *p = *ppHead; + if( p==0 ){ + p = pNew; + pNew->pUp = 0; + }else{ + while( p ){ + if( pNew->idid ){ + if( p->pBefore ){ + p = p->pBefore; + }else{ + p->pBefore = pNew; + pNew->pUp = p; + break; + } + }else if( pNew->id>p->id ){ + if( p->pAfter ){ + p = p->pAfter; + }else{ + p->pAfter = pNew; + pNew->pUp = p; + break; + } + }else{ + return p; + } + } + } + pNew->pBefore = 0; + pNew->pAfter = 0; + pNew->height = 1; + pNew->imbalance = 0; + *ppHead = closureAvlBalance(p); + return 0; +} + +/* Walk the tree can call xDestroy on each node +*/ +static void closureAvlDestroy(closure_avl *p, void (*xDestroy)(closure_avl*)){ + if( p ){ + closureAvlDestroy(p->pBefore, xDestroy); + closureAvlDestroy(p->pAfter, xDestroy); + xDestroy(p); + } +} +/* +** End of the AVL Tree implementation +******************************************************************************/ + +/* +** A closure virtual-table object +*/ +struct closure_vtab { + sqlite3_vtab base; /* Base class - must be first */ + char *zDb; /* Name of database. (ex: "main") */ + char *zSelf; /* Name of this virtual table */ + char *zTableName; /* Name of table holding parent/child relation */ + char *zIdColumn; /* Name of ID column of zTableName */ + char *zParentColumn; /* Name of PARENT column in zTableName */ + sqlite3 *db; /* The database connection */ + int nCursor; /* Number of pending cursors */ +}; + +/* A closure cursor object */ +struct closure_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + closure_vtab *pVtab; /* The virtual table this cursor belongs to */ + char *zTableName; /* Name of table holding parent/child relation */ + char *zIdColumn; /* Name of ID column of zTableName */ + char *zParentColumn; /* Name of PARENT column in zTableName */ + closure_avl *pCurrent; /* Current element of output */ + closure_avl *pClosure; /* The complete closure tree */ +}; + +/* A queue of AVL nodes */ +struct closure_queue { + closure_avl *pFirst; /* Oldest node on the queue */ + closure_avl *pLast; /* Youngest node on the queue */ +}; + +/* +** Add a node to the end of the queue +*/ +static void queuePush(closure_queue *pQueue, closure_avl *pNode){ + pNode->pList = 0; + if( pQueue->pLast ){ + pQueue->pLast->pList = pNode; + }else{ + pQueue->pFirst = pNode; + } + pQueue->pLast = pNode; +} + +/* +** Extract the oldest element (the front element) from the queue. +*/ +static closure_avl *queuePull(closure_queue *pQueue){ + closure_avl *p = pQueue->pFirst; + if( p ){ + pQueue->pFirst = p->pList; + if( pQueue->pFirst==0 ) pQueue->pLast = 0; + } + return p; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc() +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +static char *closureDequote(const char *zIn){ + int nIn; /* Size of input string, in bytes */ + char *zOut; /* Output (dequoted) string */ + + nIn = (int)strlen(zIn); + zOut = sqlite3_malloc(nIn+1); + if( zOut ){ + char q = zIn[0]; /* Quote character (if any ) */ + + if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ + memcpy(zOut, zIn, nIn+1); + }else{ + int iOut = 0; /* Index of next byte to write to output */ + int iIn; /* Index of next byte to read from input */ + + if( q=='[' ) q = ']'; + for(iIn=1; iInzDb); + sqlite3_free(p->zSelf); + sqlite3_free(p->zTableName); + sqlite3_free(p->zIdColumn); + sqlite3_free(p->zParentColumn); + memset(p, 0, sizeof(*p)); + sqlite3_free(p); + } +} + +/* +** xDisconnect/xDestroy method for the closure module. +*/ +static int closureDisconnect(sqlite3_vtab *pVtab){ + closure_vtab *p = (closure_vtab*)pVtab; + assert( p->nCursor==0 ); + closureFree(p); + return SQLITE_OK; +} + +/* +** Check to see if the argument is of the form: +** +** KEY = VALUE +** +** If it is, return a pointer to the first character of VALUE. +** If not, return NULL. Spaces around the = are ignored. +*/ +static const char *closureValueOfKey(const char *zKey, const char *zStr){ + int nKey = (int)strlen(zKey); + int nStr = (int)strlen(zStr); + int i; + if( nStr module name ("approximate_match") +** argv[1] -> database name +** argv[2] -> table name +** argv[3...] -> arguments +*/ +static int closureConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + int rc = SQLITE_OK; /* Return code */ + closure_vtab *pNew = 0; /* New virtual table */ + const char *zDb = argv[1]; + const char *zVal; + int i; + + (void)pAux; + *ppVtab = 0; + pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + rc = SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + pNew->db = db; + pNew->zDb = sqlite3_mprintf("%s", zDb); + if( pNew->zDb==0 ) goto closureConnectError; + pNew->zSelf = sqlite3_mprintf("%s", argv[2]); + if( pNew->zSelf==0 ) goto closureConnectError; + for(i=3; izTableName); + pNew->zTableName = closureDequote(zVal); + if( pNew->zTableName==0 ) goto closureConnectError; + continue; + } + zVal = closureValueOfKey("idcolumn", argv[i]); + if( zVal ){ + sqlite3_free(pNew->zIdColumn); + pNew->zIdColumn = closureDequote(zVal); + if( pNew->zIdColumn==0 ) goto closureConnectError; + continue; + } + zVal = closureValueOfKey("parentcolumn", argv[i]); + if( zVal ){ + sqlite3_free(pNew->zParentColumn); + pNew->zParentColumn = closureDequote(zVal); + if( pNew->zParentColumn==0 ) goto closureConnectError; + continue; + } + *pzErr = sqlite3_mprintf("unrecognized argument: [%s]\n", argv[i]); + closureFree(pNew); + *ppVtab = 0; + return SQLITE_ERROR; + } + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(id,depth,root HIDDEN,tablename HIDDEN," + "idcolumn HIDDEN,parentcolumn HIDDEN)" + ); +#define CLOSURE_COL_ID 0 +#define CLOSURE_COL_DEPTH 1 +#define CLOSURE_COL_ROOT 2 +#define CLOSURE_COL_TABLENAME 3 +#define CLOSURE_COL_IDCOLUMN 4 +#define CLOSURE_COL_PARENTCOLUMN 5 + if( rc!=SQLITE_OK ){ + closureFree(pNew); + } + *ppVtab = &pNew->base; + return rc; + +closureConnectError: + closureFree(pNew); + return rc; +} + +/* +** Open a new closure cursor. +*/ +static int closureOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + closure_vtab *p = (closure_vtab*)pVTab; + closure_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + pCur->pVtab = p; + *ppCursor = &pCur->base; + p->nCursor++; + return SQLITE_OK; +} + +/* +** Free up all the memory allocated by a cursor. Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void closureClearCursor(closure_cursor *pCur){ + closureAvlDestroy(pCur->pClosure, (void(*)(closure_avl*))sqlite3_free); + sqlite3_free(pCur->zTableName); + sqlite3_free(pCur->zIdColumn); + sqlite3_free(pCur->zParentColumn); + pCur->zTableName = 0; + pCur->zIdColumn = 0; + pCur->zParentColumn = 0; + pCur->pCurrent = 0; + pCur->pClosure = 0; +} + +/* +** Close a closure cursor. +*/ +static int closureClose(sqlite3_vtab_cursor *cur){ + closure_cursor *pCur = (closure_cursor *)cur; + closureClearCursor(pCur); + pCur->pVtab->nCursor--; + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Advance a cursor to its next row of output +*/ +static int closureNext(sqlite3_vtab_cursor *cur){ + closure_cursor *pCur = (closure_cursor*)cur; + pCur->pCurrent = closureAvlNext(pCur->pCurrent); + return SQLITE_OK; +} + +/* +** Allocate and insert a node +*/ +static int closureInsertNode( + closure_queue *pQueue, /* Add new node to this queue */ + closure_cursor *pCur, /* The cursor into which to add the node */ + sqlite3_int64 id, /* The node ID */ + int iGeneration /* The generation number for this node */ +){ + closure_avl *pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(*pNew)); + pNew->id = id; + pNew->iGeneration = iGeneration; + closureAvlInsert(&pCur->pClosure, pNew); + queuePush(pQueue, pNew); + return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again. Always called at least once +** prior to any closureColumn, closureRowid, or closureEof call. +** +** This routine actually computes the closure. +** +** See the comment at the beginning of closureBestIndex() for a +** description of the meaning of idxNum. The idxStr parameter is +** not used. +*/ +static int closureFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + closure_cursor *pCur = (closure_cursor *)pVtabCursor; + closure_vtab *pVtab = pCur->pVtab; + sqlite3_int64 iRoot; + int mxGen = 999999999; + char *zSql; + sqlite3_stmt *pStmt; + closure_avl *pAvl; + int rc = SQLITE_OK; + const char *zTableName = pVtab->zTableName; + const char *zIdColumn = pVtab->zIdColumn; + const char *zParentColumn = pVtab->zParentColumn; + closure_queue sQueue; + + (void)idxStr; /* Unused parameter */ + (void)argc; /* Unused parameter */ + closureClearCursor(pCur); + memset(&sQueue, 0, sizeof(sQueue)); + if( (idxNum & 1)==0 ){ + /* No root=$root in the WHERE clause. Return an empty set */ + return SQLITE_OK; + } + iRoot = sqlite3_value_int64(argv[0]); + if( (idxNum & 0x000f0)!=0 ){ + mxGen = sqlite3_value_int(argv[(idxNum>>4)&0x0f]); + if( (idxNum & 0x00002)!=0 ) mxGen--; + } + if( (idxNum & 0x00f00)!=0 ){ + zTableName = (const char*)sqlite3_value_text(argv[(idxNum>>8)&0x0f]); + pCur->zTableName = sqlite3_mprintf("%s", zTableName); + } + if( (idxNum & 0x0f000)!=0 ){ + zIdColumn = (const char*)sqlite3_value_text(argv[(idxNum>>12)&0x0f]); + pCur->zIdColumn = sqlite3_mprintf("%s", zIdColumn); + } + if( (idxNum & 0x0f0000)!=0 ){ + zParentColumn = (const char*)sqlite3_value_text(argv[(idxNum>>16)&0x0f]); + pCur->zParentColumn = sqlite3_mprintf("%s", zParentColumn); + } + + zSql = sqlite3_mprintf( + "SELECT \"%w\".\"%w\" FROM \"%w\" WHERE \"%w\".\"%w\"=?1", + zTableName, zIdColumn, zTableName, zTableName, zParentColumn); + if( zSql==0 ){ + return SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(pVtab->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + if( rc ){ + sqlite3_free(pVtab->base.zErrMsg); + pVtab->base.zErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pVtab->db)); + return rc; + } + } + if( rc==SQLITE_OK ){ + rc = closureInsertNode(&sQueue, pCur, iRoot, 0); + } + while( (pAvl = queuePull(&sQueue))!=0 ){ + if( pAvl->iGeneration>=mxGen ) continue; + sqlite3_bind_int64(pStmt, 1, pAvl->id); + while( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ + if( sqlite3_column_type(pStmt,0)==SQLITE_INTEGER ){ + sqlite3_int64 iNew = sqlite3_column_int64(pStmt, 0); + if( closureAvlSearch(pCur->pClosure, iNew)==0 ){ + rc = closureInsertNode(&sQueue, pCur, iNew, pAvl->iGeneration+1); + } + } + } + sqlite3_reset(pStmt); + } + sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ){ + pCur->pCurrent = closureAvlFirst(pCur->pClosure); + } + + return rc; +} + +/* +** Only the word and distance columns have values. All other columns +** return NULL +*/ +static int closureColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + closure_cursor *pCur = (closure_cursor*)cur; + switch( i ){ + case CLOSURE_COL_ID: { + sqlite3_result_int64(ctx, pCur->pCurrent->id); + break; + } + case CLOSURE_COL_DEPTH: { + sqlite3_result_int(ctx, pCur->pCurrent->iGeneration); + break; + } + case CLOSURE_COL_ROOT: { + sqlite3_result_null(ctx); + break; + } + case CLOSURE_COL_TABLENAME: { + sqlite3_result_text(ctx, + pCur->zTableName ? pCur->zTableName : pCur->pVtab->zTableName, + -1, SQLITE_TRANSIENT); + break; + } + case CLOSURE_COL_IDCOLUMN: { + sqlite3_result_text(ctx, + pCur->zIdColumn ? pCur->zIdColumn : pCur->pVtab->zIdColumn, + -1, SQLITE_TRANSIENT); + break; + } + case CLOSURE_COL_PARENTCOLUMN: { + sqlite3_result_text(ctx, + pCur->zParentColumn ? pCur->zParentColumn : pCur->pVtab->zParentColumn, + -1, SQLITE_TRANSIENT); + break; + } + } + return SQLITE_OK; +} + +/* +** The rowid. For the closure table, this is the same as the "id" column. +*/ +static int closureRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + closure_cursor *pCur = (closure_cursor*)cur; + *pRowid = pCur->pCurrent->id; + return SQLITE_OK; +} + +/* +** EOF indicator +*/ +static int closureEof(sqlite3_vtab_cursor *cur){ + closure_cursor *pCur = (closure_cursor*)cur; + return pCur->pCurrent==0; +} + +/* +** Search for terms of these forms: +** +** (A) root = $root +** (B1) depth < $depth +** (B2) depth <= $depth +** (B3) depth = $depth +** (C) tablename = $tablename +** (D) idcolumn = $idcolumn +** (E) parentcolumn = $parentcolumn +** +** +** +** idxNum meaning +** ---------- ------------------------------------------------------ +** 0x00000001 Term of the form (A) found +** 0x00000002 The term of bit-2 is like (B1) +** 0x000000f0 Index in filter.argv[] of $depth. 0 if not used. +** 0x00000f00 Index in filter.argv[] of $tablename. 0 if not used. +** 0x0000f000 Index in filter.argv[] of $idcolumn. 0 if not used +** 0x000f0000 Index in filter.argv[] of $parentcolumn. 0 if not used. +** +** There must be a term of type (A). If there is not, then the index type +** is 0 and the query will return an empty set. +*/ +static int closureBestIndex( + sqlite3_vtab *pTab, /* The virtual table */ + sqlite3_index_info *pIdxInfo /* Information about the query */ +){ + int iPlan = 0; + int i; + int idx = 1; + const struct sqlite3_index_constraint *pConstraint; + closure_vtab *pVtab = (closure_vtab*)pTab; + + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + if( (iPlan & 1)==0 + && pConstraint->iColumn==CLOSURE_COL_ROOT + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 1; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + if( (iPlan & 0x0000f0)==0 + && pConstraint->iColumn==CLOSURE_COL_DEPTH + && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT + || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE + || pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ) + ){ + iPlan |= idx<<4; + pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; + if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ) iPlan |= 0x000002; + } + if( (iPlan & 0x000f00)==0 + && pConstraint->iColumn==CLOSURE_COL_TABLENAME + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= idx<<8; + pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + if( (iPlan & 0x00f000)==0 + && pConstraint->iColumn==CLOSURE_COL_IDCOLUMN + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= idx<<12; + pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + if( (iPlan & 0x0f0000)==0 + && pConstraint->iColumn==CLOSURE_COL_PARENTCOLUMN + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= idx<<16; + pIdxInfo->aConstraintUsage[i].argvIndex = ++idx; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + } + if( (pVtab->zTableName==0 && (iPlan & 0x000f00)==0) + || (pVtab->zIdColumn==0 && (iPlan & 0x00f000)==0) + || (pVtab->zParentColumn==0 && (iPlan & 0x0f0000)==0) + ){ + /* All of tablename, idcolumn, and parentcolumn must be specified + ** in either the CREATE VIRTUAL TABLE or in the WHERE clause constraints + ** or else the result is an empty set. */ + iPlan = 0; + } + pIdxInfo->idxNum = iPlan; + if( pIdxInfo->nOrderBy==1 + && pIdxInfo->aOrderBy[0].iColumn==CLOSURE_COL_ID + && pIdxInfo->aOrderBy[0].desc==0 + ){ + pIdxInfo->orderByConsumed = 1; + } + pIdxInfo->estimatedCost = (double)10000; + + return SQLITE_OK; +} + +/* +** A virtual table module that implements the "approximate_match". +*/ +static sqlite3_module closureModule = { + 0, /* iVersion */ + closureConnect, /* xCreate */ + closureConnect, /* xConnect */ + closureBestIndex, /* xBestIndex */ + closureDisconnect, /* xDisconnect */ + closureDisconnect, /* xDestroy */ + closureOpen, /* xOpen - open a cursor */ + closureClose, /* xClose - close a cursor */ + closureFilter, /* xFilter - configure scan constraints */ + closureNext, /* xNext - advance a cursor */ + closureEof, /* xEof - check for end of scan */ + closureColumn, /* xColumn - read data */ + closureRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +/* +** Register the closure virtual table +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_closure_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; +#ifndef SQLITE_OMIT_VIRTUALTABLE + rc = sqlite3_create_module(db, "transitive_closure", &closureModule, 0); +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + return rc; +} diff --git a/ext/misc/fuzzer.c b/ext/misc/fuzzer.c new file mode 100644 index 0000000..642b8f9 --- /dev/null +++ b/ext/misc/fuzzer.c @@ -0,0 +1,1173 @@ +/* +** 2011 March 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** Code for a demonstration virtual table that generates variations +** on an input word at increasing edit distances from the original. +** +** A fuzzer virtual table is created like this: +** +** CREATE VIRTUAL TABLE f USING fuzzer(); +** +** When it is created, the new fuzzer table must be supplied with the +** name of a "fuzzer data table", which must reside in the same database +** file as the new fuzzer table. The fuzzer data table contains the various +** transformations and their costs that the fuzzer logic uses to generate +** variations. +** +** The fuzzer data table must contain exactly four columns (more precisely, +** the statement "SELECT * FROM " must return records +** that consist of four columns). It does not matter what the columns are +** named. +** +** Each row in the fuzzer data table represents a single character +** transformation. The left most column of the row (column 0) contains an +** integer value - the identifier of the ruleset to which the transformation +** rule belongs (see "MULTIPLE RULE SETS" below). The second column of the +** row (column 0) contains the input character or characters. The third +** column contains the output character or characters. And the fourth column +** contains the integer cost of making the transformation. For example: +** +** CREATE TABLE f_data(ruleset, cFrom, cTo, Cost); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, '', 'a', 100); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'b', '', 87); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'o', 'oe', 38); +** INSERT INTO f_data(ruleset, cFrom, cTo, Cost) VALUES(0, 'oe', 'o', 40); +** +** The first row inserted into the fuzzer data table by the SQL script +** above indicates that the cost of inserting a letter 'a' is 100. (All +** costs are integers. We recommend that costs be scaled so that the +** average cost is around 100.) The second INSERT statement creates a rule +** saying that the cost of deleting a single letter 'b' is 87. The third +** and fourth INSERT statements mean that the cost of transforming a +** single letter "o" into the two-letter sequence "oe" is 38 and that the +** cost of transforming "oe" back into "o" is 40. +** +** The contents of the fuzzer data table are loaded into main memory when +** a fuzzer table is first created, and may be internally reloaded by the +** system at any subsequent time. Therefore, the fuzzer data table should be +** populated before the fuzzer table is created and not modified thereafter. +** If you do need to modify the contents of the fuzzer data table, it is +** recommended that the associated fuzzer table be dropped, the fuzzer data +** table edited, and the fuzzer table recreated within a single transaction. +** Alternatively, the fuzzer data table can be edited then the database +** connection can be closed and reopened. +** +** Once it has been created, the fuzzer table can be queried as follows: +** +** SELECT word, distance FROM f +** WHERE word MATCH 'abcdefg' +** AND distance<200; +** +** This first query outputs the string "abcdefg" and all strings that +** can be derived from that string by appling the specified transformations. +** The strings are output together with their total transformation cost +** (called "distance") and appear in order of increasing cost. No string +** is output more than once. If there are multiple ways to transform the +** target string into the output string then the lowest cost transform is +** the one that is returned. In the example, the search is limited to +** strings with a total distance of less than 200. +** +** The fuzzer is a read-only table. Any attempt to DELETE, INSERT, or +** UPDATE on a fuzzer table will throw an error. +** +** It is important to put some kind of a limit on the fuzzer output. This +** can be either in the form of a LIMIT clause at the end of the query, +** or better, a "distance +#include +#include +#include + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Forward declaration of objects used by this implementation +*/ +typedef struct fuzzer_vtab fuzzer_vtab; +typedef struct fuzzer_cursor fuzzer_cursor; +typedef struct fuzzer_rule fuzzer_rule; +typedef struct fuzzer_seen fuzzer_seen; +typedef struct fuzzer_stem fuzzer_stem; + +/* +** Various types. +** +** fuzzer_cost is the "cost" of an edit operation. +** +** fuzzer_len is the length of a matching string. +** +** fuzzer_ruleid is an ruleset identifier. +*/ +typedef int fuzzer_cost; +typedef signed char fuzzer_len; +typedef int fuzzer_ruleid; + +/* +** Limits +*/ +#define FUZZER_MX_LENGTH 50 /* Maximum length of a rule string */ +#define FUZZER_MX_RULEID 2147483647 /* Maximum rule ID */ +#define FUZZER_MX_COST 1000 /* Maximum single-rule cost */ +#define FUZZER_MX_OUTPUT_LENGTH 100 /* Maximum length of an output string */ + + +/* +** Each transformation rule is stored as an instance of this object. +** All rules are kept on a linked list sorted by rCost. +*/ +struct fuzzer_rule { + fuzzer_rule *pNext; /* Next rule in order of increasing rCost */ + char *zFrom; /* Transform from */ + fuzzer_cost rCost; /* Cost of this transformation */ + fuzzer_len nFrom, nTo; /* Length of the zFrom and zTo strings */ + fuzzer_ruleid iRuleset; /* The rule set to which this rule belongs */ + char zTo[4]; /* Transform to (extra space appended) */ +}; + +/* +** A stem object is used to generate variants. It is also used to record +** previously generated outputs. +** +** Every stem is added to a hash table as it is output. Generation of +** duplicate stems is suppressed. +** +** Active stems (those that might generate new outputs) are kepts on a linked +** list sorted by increasing cost. The cost is the sum of rBaseCost and +** pRule->rCost. +*/ +struct fuzzer_stem { + char *zBasis; /* Word being fuzzed */ + const fuzzer_rule *pRule; /* Current rule to apply */ + fuzzer_stem *pNext; /* Next stem in rCost order */ + fuzzer_stem *pHash; /* Next stem with same hash on zBasis */ + fuzzer_cost rBaseCost; /* Base cost of getting to zBasis */ + fuzzer_cost rCostX; /* Precomputed rBaseCost + pRule->rCost */ + fuzzer_len nBasis; /* Length of the zBasis string */ + fuzzer_len n; /* Apply pRule at this character offset */ +}; + +/* +** A fuzzer virtual-table object +*/ +struct fuzzer_vtab { + sqlite3_vtab base; /* Base class - must be first */ + char *zClassName; /* Name of this class. Default: "fuzzer" */ + fuzzer_rule *pRule; /* All active rules in this fuzzer */ + int nCursor; /* Number of active cursors */ +}; + +#define FUZZER_HASH 4001 /* Hash table size */ +#define FUZZER_NQUEUE 20 /* Number of slots on the stem queue */ + +/* A fuzzer cursor object */ +struct fuzzer_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + sqlite3_int64 iRowid; /* The rowid of the current word */ + fuzzer_vtab *pVtab; /* The virtual table this cursor belongs to */ + fuzzer_cost rLimit; /* Maximum cost of any term */ + fuzzer_stem *pStem; /* Stem with smallest rCostX */ + fuzzer_stem *pDone; /* Stems already processed to completion */ + fuzzer_stem *aQueue[FUZZER_NQUEUE]; /* Queue of stems with higher rCostX */ + int mxQueue; /* Largest used index in aQueue[] */ + char *zBuf; /* Temporary use buffer */ + int nBuf; /* Bytes allocated for zBuf */ + int nStem; /* Number of stems allocated */ + int iRuleset; /* Only process rules from this ruleset */ + fuzzer_rule nullRule; /* Null rule used first */ + fuzzer_stem *apHash[FUZZER_HASH]; /* Hash of previously generated terms */ +}; + +/* +** The two input rule lists are both sorted in order of increasing +** cost. Merge them together into a single list, sorted by cost, and +** return a pointer to the head of that list. +*/ +static fuzzer_rule *fuzzerMergeRules(fuzzer_rule *pA, fuzzer_rule *pB){ + fuzzer_rule head; + fuzzer_rule *pTail; + + pTail = &head; + while( pA && pB ){ + if( pA->rCost<=pB->rCost ){ + pTail->pNext = pA; + pTail = pA; + pA = pA->pNext; + }else{ + pTail->pNext = pB; + pTail = pB; + pB = pB->pNext; + } + } + if( pA==0 ){ + pTail->pNext = pB; + }else{ + pTail->pNext = pA; + } + return head.pNext; +} + +/* +** Statement pStmt currently points to a row in the fuzzer data table. This +** function allocates and populates a fuzzer_rule structure according to +** the content of the row. +** +** If successful, *ppRule is set to point to the new object and SQLITE_OK +** is returned. Otherwise, *ppRule is zeroed, *pzErr may be set to point +** to an error message and an SQLite error code returned. +*/ +static int fuzzerLoadOneRule( + fuzzer_vtab *p, /* Fuzzer virtual table handle */ + sqlite3_stmt *pStmt, /* Base rule on statements current row */ + fuzzer_rule **ppRule, /* OUT: New rule object */ + char **pzErr /* OUT: Error message */ +){ + sqlite3_int64 iRuleset = sqlite3_column_int64(pStmt, 0); + const char *zFrom = (const char *)sqlite3_column_text(pStmt, 1); + const char *zTo = (const char *)sqlite3_column_text(pStmt, 2); + int nCost = sqlite3_column_int(pStmt, 3); + + int rc = SQLITE_OK; /* Return code */ + int nFrom; /* Size of string zFrom, in bytes */ + int nTo; /* Size of string zTo, in bytes */ + fuzzer_rule *pRule = 0; /* New rule object to return */ + + if( zFrom==0 ) zFrom = ""; + if( zTo==0 ) zTo = ""; + nFrom = (int)strlen(zFrom); + nTo = (int)strlen(zTo); + + /* Silently ignore null transformations */ + if( strcmp(zFrom, zTo)==0 ){ + *ppRule = 0; + return SQLITE_OK; + } + + if( nCost<=0 || nCost>FUZZER_MX_COST ){ + *pzErr = sqlite3_mprintf("%s: cost must be between 1 and %d", + p->zClassName, FUZZER_MX_COST + ); + rc = SQLITE_ERROR; + }else + if( nFrom>FUZZER_MX_LENGTH || nTo>FUZZER_MX_LENGTH ){ + *pzErr = sqlite3_mprintf("%s: maximum string length is %d", + p->zClassName, FUZZER_MX_LENGTH + ); + rc = SQLITE_ERROR; + }else + if( iRuleset<0 || iRuleset>FUZZER_MX_RULEID ){ + *pzErr = sqlite3_mprintf("%s: ruleset must be between 0 and %d", + p->zClassName, FUZZER_MX_RULEID + ); + rc = SQLITE_ERROR; + }else{ + + pRule = sqlite3_malloc( sizeof(*pRule) + nFrom + nTo ); + if( pRule==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pRule, 0, sizeof(*pRule)); + pRule->zFrom = &pRule->zTo[nTo+1]; + pRule->nFrom = nFrom; + memcpy(pRule->zFrom, zFrom, nFrom+1); + memcpy(pRule->zTo, zTo, nTo+1); + pRule->nTo = nTo; + pRule->rCost = nCost; + pRule->iRuleset = (int)iRuleset; + } + } + + *ppRule = pRule; + return rc; +} + +/* +** Load the content of the fuzzer data table into memory. +*/ +static int fuzzerLoadRules( + sqlite3 *db, /* Database handle */ + fuzzer_vtab *p, /* Virtual fuzzer table to configure */ + const char *zDb, /* Database containing rules data */ + const char *zData, /* Table containing rules data */ + char **pzErr /* OUT: Error message */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* SELECT used to read from rules table */ + fuzzer_rule *pHead = 0; + + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zData); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + int rc2; /* finalize() return code */ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s: %s", p->zClassName, sqlite3_errmsg(db)); + }else if( sqlite3_column_count(pStmt)!=4 ){ + *pzErr = sqlite3_mprintf("%s: %s has %d columns, expected 4", + p->zClassName, zData, sqlite3_column_count(pStmt) + ); + rc = SQLITE_ERROR; + }else{ + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + fuzzer_rule *pRule = 0; + rc = fuzzerLoadOneRule(p, pStmt, &pRule, pzErr); + if( pRule ){ + pRule->pNext = pHead; + pHead = pRule; + } + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + } + sqlite3_free(zSql); + + /* All rules are now in a singly linked list starting at pHead. This + ** block sorts them by cost and then sets fuzzer_vtab.pRule to point to + ** point to the head of the sorted list. + */ + if( rc==SQLITE_OK ){ + unsigned int i; + fuzzer_rule *pX; + fuzzer_rule *a[15]; + for(i=0; ipNext; + pX->pNext = 0; + for(i=0; a[i] && ipRule = fuzzerMergeRules(p->pRule, pX); + }else{ + /* An error has occurred. Setting p->pRule to point to the head of the + ** allocated list ensures that the list will be cleaned up in this case. + */ + assert( p->pRule==0 ); + p->pRule = pHead; + } + + return rc; +} + +/* +** This function converts an SQL quoted string into an unquoted string +** and returns a pointer to a buffer allocated using sqlite3_malloc() +** containing the result. The caller should eventually free this buffer +** using sqlite3_free. +** +** Examples: +** +** "abc" becomes abc +** 'xyz' becomes xyz +** [pqr] becomes pqr +** `mno` becomes mno +*/ +static char *fuzzerDequote(const char *zIn){ + int nIn; /* Size of input string, in bytes */ + char *zOut; /* Output (dequoted) string */ + + nIn = (int)strlen(zIn); + zOut = sqlite3_malloc(nIn+1); + if( zOut ){ + char q = zIn[0]; /* Quote character (if any ) */ + + if( q!='[' && q!= '\'' && q!='"' && q!='`' ){ + memcpy(zOut, zIn, nIn+1); + }else{ + int iOut = 0; /* Index of next byte to write to output */ + int iIn; /* Index of next byte to read from input */ + + if( q=='[' ) q = ']'; + for(iIn=1; iInnCursor==0 ); + while( p->pRule ){ + fuzzer_rule *pRule = p->pRule; + p->pRule = pRule->pNext; + sqlite3_free(pRule); + } + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** xConnect/xCreate method for the fuzzer module. Arguments are: +** +** argv[0] -> module name ("fuzzer") +** argv[1] -> database name +** argv[2] -> table name +** argv[3] -> fuzzer rule table name +*/ +static int fuzzerConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + int rc = SQLITE_OK; /* Return code */ + fuzzer_vtab *pNew = 0; /* New virtual table */ + const char *zModule = argv[0]; + const char *zDb = argv[1]; + + if( argc!=4 ){ + *pzErr = sqlite3_mprintf( + "%s: wrong number of CREATE VIRTUAL TABLE arguments", zModule + ); + rc = SQLITE_ERROR; + }else{ + int nModule; /* Length of zModule, in bytes */ + + nModule = (int)strlen(zModule); + pNew = sqlite3_malloc( sizeof(*pNew) + nModule + 1); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *zTab; /* Dequoted name of fuzzer data table */ + + memset(pNew, 0, sizeof(*pNew)); + pNew->zClassName = (char*)&pNew[1]; + memcpy(pNew->zClassName, zModule, nModule+1); + + zTab = fuzzerDequote(argv[3]); + if( zTab==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = fuzzerLoadRules(db, pNew, zDb, zTab, pzErr); + sqlite3_free(zTab); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_declare_vtab(db, "CREATE TABLE x(word,distance,ruleset)"); + } + if( rc!=SQLITE_OK ){ + fuzzerDisconnect((sqlite3_vtab *)pNew); + pNew = 0; + } + } + } + + *ppVtab = (sqlite3_vtab *)pNew; + return rc; +} + +/* +** Open a new fuzzer cursor. +*/ +static int fuzzerOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + fuzzer_vtab *p = (fuzzer_vtab*)pVTab; + fuzzer_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + pCur->pVtab = p; + *ppCursor = &pCur->base; + p->nCursor++; + return SQLITE_OK; +} + +/* +** Free all stems in a list. +*/ +static void fuzzerClearStemList(fuzzer_stem *pStem){ + while( pStem ){ + fuzzer_stem *pNext = pStem->pNext; + sqlite3_free(pStem); + pStem = pNext; + } +} + +/* +** Free up all the memory allocated by a cursor. Set it rLimit to 0 +** to indicate that it is at EOF. +*/ +static void fuzzerClearCursor(fuzzer_cursor *pCur, int clearHash){ + int i; + fuzzerClearStemList(pCur->pStem); + fuzzerClearStemList(pCur->pDone); + for(i=0; iaQueue[i]); + pCur->rLimit = (fuzzer_cost)0; + if( clearHash && pCur->nStem ){ + pCur->mxQueue = 0; + pCur->pStem = 0; + pCur->pDone = 0; + memset(pCur->aQueue, 0, sizeof(pCur->aQueue)); + memset(pCur->apHash, 0, sizeof(pCur->apHash)); + } + pCur->nStem = 0; +} + +/* +** Close a fuzzer cursor. +*/ +static int fuzzerClose(sqlite3_vtab_cursor *cur){ + fuzzer_cursor *pCur = (fuzzer_cursor *)cur; + fuzzerClearCursor(pCur, 0); + sqlite3_free(pCur->zBuf); + pCur->pVtab->nCursor--; + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Compute the current output term for a fuzzer_stem. +*/ +static int fuzzerRender( + fuzzer_stem *pStem, /* The stem to be rendered */ + char **pzBuf, /* Write results into this buffer. realloc if needed */ + int *pnBuf /* Size of the buffer */ +){ + const fuzzer_rule *pRule = pStem->pRule; + int n; /* Size of output term without nul-term */ + char *z; /* Buffer to assemble output term in */ + + n = pStem->nBasis + pRule->nTo - pRule->nFrom; + if( (*pnBuf)n; + z = *pzBuf; + if( n<0 ){ + memcpy(z, pStem->zBasis, pStem->nBasis+1); + }else{ + memcpy(z, pStem->zBasis, n); + memcpy(&z[n], pRule->zTo, pRule->nTo); + memcpy(&z[n+pRule->nTo], &pStem->zBasis[n+pRule->nFrom], + pStem->nBasis-n-pRule->nFrom+1); + } + + assert( z[pStem->nBasis + pRule->nTo - pRule->nFrom]==0 ); + return SQLITE_OK; +} + +/* +** Compute a hash on zBasis. +*/ +static unsigned int fuzzerHash(const char *z){ + unsigned int h = 0; + while( *z ){ h = (h<<3) ^ (h>>29) ^ *(z++); } + return h % FUZZER_HASH; +} + +/* +** Current cost of a stem +*/ +static fuzzer_cost fuzzerCost(fuzzer_stem *pStem){ + return pStem->rCostX = pStem->rBaseCost + pStem->pRule->rCost; +} + +#if 0 +/* +** Print a description of a fuzzer_stem on stderr. +*/ +static void fuzzerStemPrint( + const char *zPrefix, + fuzzer_stem *pStem, + const char *zSuffix +){ + if( pStem->n<0 ){ + fprintf(stderr, "%s[%s](%d)-->self%s", + zPrefix, + pStem->zBasis, pStem->rBaseCost, + zSuffix + ); + }else{ + char *zBuf = 0; + int nBuf = 0; + if( fuzzerRender(pStem, &zBuf, &nBuf)!=SQLITE_OK ) return; + fprintf(stderr, "%s[%s](%d)-->{%s}(%d)%s", + zPrefix, + pStem->zBasis, pStem->rBaseCost, zBuf, pStem->, + zSuffix + ); + sqlite3_free(zBuf); + } +} +#endif + +/* +** Return 1 if the string to which the cursor is point has already +** been emitted. Return 0 if not. Return -1 on a memory allocation +** failures. +*/ +static int fuzzerSeen(fuzzer_cursor *pCur, fuzzer_stem *pStem){ + unsigned int h; + fuzzer_stem *pLookup; + + if( fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ + return -1; + } + h = fuzzerHash(pCur->zBuf); + pLookup = pCur->apHash[h]; + while( pLookup && strcmp(pLookup->zBasis, pCur->zBuf)!=0 ){ + pLookup = pLookup->pHash; + } + return pLookup!=0; +} + +/* +** If argument pRule is NULL, this function returns false. +** +** Otherwise, it returns true if rule pRule should be skipped. A rule +** should be skipped if it does not belong to rule-set iRuleset, or if +** applying it to stem pStem would create a string longer than +** FUZZER_MX_OUTPUT_LENGTH bytes. +*/ +static int fuzzerSkipRule( + const fuzzer_rule *pRule, /* Determine whether or not to skip this */ + fuzzer_stem *pStem, /* Stem rule may be applied to */ + int iRuleset /* Rule-set used by the current query */ +){ + return pRule && ( + (pRule->iRuleset!=iRuleset) + || (pStem->nBasis + pRule->nTo - pRule->nFrom)>FUZZER_MX_OUTPUT_LENGTH + ); +} + +/* +** Advance a fuzzer_stem to its next value. Return 0 if there are +** no more values that can be generated by this fuzzer_stem. Return +** -1 on a memory allocation failure. +*/ +static int fuzzerAdvance(fuzzer_cursor *pCur, fuzzer_stem *pStem){ + const fuzzer_rule *pRule; + while( (pRule = pStem->pRule)!=0 ){ + assert( pRule==&pCur->nullRule || pRule->iRuleset==pCur->iRuleset ); + while( pStem->n < pStem->nBasis - pRule->nFrom ){ + pStem->n++; + if( pRule->nFrom==0 + || memcmp(&pStem->zBasis[pStem->n], pRule->zFrom, pRule->nFrom)==0 + ){ + /* Found a rewrite case. Make sure it is not a duplicate */ + int rc = fuzzerSeen(pCur, pStem); + if( rc<0 ) return -1; + if( rc==0 ){ + fuzzerCost(pStem); + return 1; + } + } + } + pStem->n = -1; + do{ + pRule = pRule->pNext; + }while( fuzzerSkipRule(pRule, pStem, pCur->iRuleset) ); + pStem->pRule = pRule; + if( pRule && fuzzerCost(pStem)>pCur->rLimit ) pStem->pRule = 0; + } + return 0; +} + +/* +** The two input stem lists are both sorted in order of increasing +** rCostX. Merge them together into a single list, sorted by rCostX, and +** return a pointer to the head of that new list. +*/ +static fuzzer_stem *fuzzerMergeStems(fuzzer_stem *pA, fuzzer_stem *pB){ + fuzzer_stem head; + fuzzer_stem *pTail; + + pTail = &head; + while( pA && pB ){ + if( pA->rCostX<=pB->rCostX ){ + pTail->pNext = pA; + pTail = pA; + pA = pA->pNext; + }else{ + pTail->pNext = pB; + pTail = pB; + pB = pB->pNext; + } + } + if( pA==0 ){ + pTail->pNext = pB; + }else{ + pTail->pNext = pA; + } + return head.pNext; +} + +/* +** Load pCur->pStem with the lowest-cost stem. Return a pointer +** to the lowest-cost stem. +*/ +static fuzzer_stem *fuzzerLowestCostStem(fuzzer_cursor *pCur){ + fuzzer_stem *pBest, *pX; + int iBest; + int i; + + if( pCur->pStem==0 ){ + iBest = -1; + pBest = 0; + for(i=0; i<=pCur->mxQueue; i++){ + pX = pCur->aQueue[i]; + if( pX==0 ) continue; + if( pBest==0 || pBest->rCostX>pX->rCostX ){ + pBest = pX; + iBest = i; + } + } + if( pBest ){ + pCur->aQueue[iBest] = pBest->pNext; + pBest->pNext = 0; + pCur->pStem = pBest; + } + } + return pCur->pStem; +} + +/* +** Insert pNew into queue of pending stems. Then find the stem +** with the lowest rCostX and move it into pCur->pStem. +** list. The insert is done such the pNew is in the correct order +** according to fuzzer_stem.zBaseCost+fuzzer_stem.pRule->rCost. +*/ +static fuzzer_stem *fuzzerInsert(fuzzer_cursor *pCur, fuzzer_stem *pNew){ + fuzzer_stem *pX; + int i; + + /* If pCur->pStem exists and is greater than pNew, then make pNew + ** the new pCur->pStem and insert the old pCur->pStem instead. + */ + if( (pX = pCur->pStem)!=0 && pX->rCostX>pNew->rCostX ){ + pNew->pNext = 0; + pCur->pStem = pNew; + pNew = pX; + } + + /* Insert the new value */ + pNew->pNext = 0; + pX = pNew; + for(i=0; i<=pCur->mxQueue; i++){ + if( pCur->aQueue[i] ){ + pX = fuzzerMergeStems(pX, pCur->aQueue[i]); + pCur->aQueue[i] = 0; + }else{ + pCur->aQueue[i] = pX; + break; + } + } + if( i>pCur->mxQueue ){ + if( imxQueue = i; + pCur->aQueue[i] = pX; + }else{ + assert( pCur->mxQueue==FUZZER_NQUEUE-1 ); + pX = fuzzerMergeStems(pX, pCur->aQueue[FUZZER_NQUEUE-1]); + pCur->aQueue[FUZZER_NQUEUE-1] = pX; + } + } + + return fuzzerLowestCostStem(pCur); +} + +/* +** Allocate a new fuzzer_stem. Add it to the hash table but do not +** link it into either the pCur->pStem or pCur->pDone lists. +*/ +static fuzzer_stem *fuzzerNewStem( + fuzzer_cursor *pCur, + const char *zWord, + fuzzer_cost rBaseCost +){ + fuzzer_stem *pNew; + fuzzer_rule *pRule; + unsigned int h; + + pNew = sqlite3_malloc( sizeof(*pNew) + (int)strlen(zWord) + 1 ); + if( pNew==0 ) return 0; + memset(pNew, 0, sizeof(*pNew)); + pNew->zBasis = (char*)&pNew[1]; + pNew->nBasis = (int)strlen(zWord); + memcpy(pNew->zBasis, zWord, pNew->nBasis+1); + pRule = pCur->pVtab->pRule; + while( fuzzerSkipRule(pRule, pNew, pCur->iRuleset) ){ + pRule = pRule->pNext; + } + pNew->pRule = pRule; + pNew->n = -1; + pNew->rBaseCost = pNew->rCostX = rBaseCost; + h = fuzzerHash(pNew->zBasis); + pNew->pHash = pCur->apHash[h]; + pCur->apHash[h] = pNew; + pCur->nStem++; + return pNew; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int fuzzerNext(sqlite3_vtab_cursor *cur){ + fuzzer_cursor *pCur = (fuzzer_cursor*)cur; + int rc; + fuzzer_stem *pStem, *pNew; + + pCur->iRowid++; + + /* Use the element the cursor is currently point to to create + ** a new stem and insert the new stem into the priority queue. + */ + pStem = pCur->pStem; + if( pStem->rCostX>0 ){ + rc = fuzzerRender(pStem, &pCur->zBuf, &pCur->nBuf); + if( rc==SQLITE_NOMEM ) return SQLITE_NOMEM; + pNew = fuzzerNewStem(pCur, pCur->zBuf, pStem->rCostX); + if( pNew ){ + if( fuzzerAdvance(pCur, pNew)==0 ){ + pNew->pNext = pCur->pDone; + pCur->pDone = pNew; + }else{ + if( fuzzerInsert(pCur, pNew)==pNew ){ + return SQLITE_OK; + } + } + }else{ + return SQLITE_NOMEM; + } + } + + /* Adjust the priority queue so that the first element of the + ** stem list is the next lowest cost word. + */ + while( (pStem = pCur->pStem)!=0 ){ + int res = fuzzerAdvance(pCur, pStem); + if( res<0 ){ + return SQLITE_NOMEM; + }else if( res>0 ){ + pCur->pStem = 0; + pStem = fuzzerInsert(pCur, pStem); + if( (rc = fuzzerSeen(pCur, pStem))!=0 ){ + if( rc<0 ) return SQLITE_NOMEM; + continue; + } + return SQLITE_OK; /* New word found */ + } + pCur->pStem = 0; + pStem->pNext = pCur->pDone; + pCur->pDone = pStem; + if( fuzzerLowestCostStem(pCur) ){ + rc = fuzzerSeen(pCur, pCur->pStem); + if( rc<0 ) return SQLITE_NOMEM; + if( rc==0 ){ + return SQLITE_OK; + } + } + } + + /* Reach this point only if queue has been exhausted and there is + ** nothing left to be output. */ + pCur->rLimit = (fuzzer_cost)0; + return SQLITE_OK; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again. Always called at least once +** prior to any fuzzerColumn, fuzzerRowid, or fuzzerEof call. +*/ +static int fuzzerFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + fuzzer_cursor *pCur = (fuzzer_cursor *)pVtabCursor; + const char *zWord = ""; + fuzzer_stem *pStem; + int idx; + + fuzzerClearCursor(pCur, 1); + pCur->rLimit = 2147483647; + idx = 0; + if( idxNum & 1 ){ + zWord = (const char*)sqlite3_value_text(argv[0]); + idx++; + } + if( idxNum & 2 ){ + pCur->rLimit = (fuzzer_cost)sqlite3_value_int(argv[idx]); + idx++; + } + if( idxNum & 4 ){ + pCur->iRuleset = (fuzzer_cost)sqlite3_value_int(argv[idx]); + idx++; + } + pCur->nullRule.pNext = pCur->pVtab->pRule; + pCur->nullRule.rCost = 0; + pCur->nullRule.nFrom = 0; + pCur->nullRule.nTo = 0; + pCur->nullRule.zFrom = ""; + pCur->iRowid = 1; + assert( pCur->pStem==0 ); + + /* If the query term is longer than FUZZER_MX_OUTPUT_LENGTH bytes, this + ** query will return zero rows. */ + if( (int)strlen(zWord)pStem = pStem = fuzzerNewStem(pCur, zWord, (fuzzer_cost)0); + if( pStem==0 ) return SQLITE_NOMEM; + pStem->pRule = &pCur->nullRule; + pStem->n = pStem->nBasis; + }else{ + pCur->rLimit = 0; + } + + return SQLITE_OK; +} + +/* +** Only the word and distance columns have values. All other columns +** return NULL +*/ +static int fuzzerColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + fuzzer_cursor *pCur = (fuzzer_cursor*)cur; + if( i==0 ){ + /* the "word" column */ + if( fuzzerRender(pCur->pStem, &pCur->zBuf, &pCur->nBuf)==SQLITE_NOMEM ){ + return SQLITE_NOMEM; + } + sqlite3_result_text(ctx, pCur->zBuf, -1, SQLITE_TRANSIENT); + }else if( i==1 ){ + /* the "distance" column */ + sqlite3_result_int(ctx, pCur->pStem->rCostX); + }else{ + /* All other columns are NULL */ + sqlite3_result_null(ctx); + } + return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int fuzzerRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + fuzzer_cursor *pCur = (fuzzer_cursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; +} + +/* +** When the fuzzer_cursor.rLimit value is 0 or less, that is a signal +** that the cursor has nothing more to output. +*/ +static int fuzzerEof(sqlite3_vtab_cursor *cur){ + fuzzer_cursor *pCur = (fuzzer_cursor*)cur; + return pCur->rLimit<=(fuzzer_cost)0; +} + +/* +** Search for terms of these forms: +** +** (A) word MATCH $str +** (B1) distance < $value +** (B2) distance <= $value +** (C) ruleid == $ruleid +** +** The distance< and distance<= are both treated as distance<=. +** The query plan number is a bit vector: +** +** bit 1: Term of the form (A) found +** bit 2: Term like (B1) or (B2) found +** bit 3: Term like (C) found +** +** If bit-1 is set, $str is always in filter.argv[0]. If bit-2 is set +** then $value is in filter.argv[0] if bit-1 is clear and is in +** filter.argv[1] if bit-1 is set. If bit-3 is set, then $ruleid is +** in filter.argv[0] if bit-1 and bit-2 are both zero, is in +** filter.argv[1] if exactly one of bit-1 and bit-2 are set, and is in +** filter.argv[2] if both bit-1 and bit-2 are set. +*/ +static int fuzzerBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + int iPlan = 0; + int iDistTerm = -1; + int iRulesetTerm = -1; + int i; + const struct sqlite3_index_constraint *pConstraint; + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + if( (iPlan & 1)==0 + && pConstraint->iColumn==0 + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH + ){ + iPlan |= 1; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + if( (iPlan & 2)==0 + && pConstraint->iColumn==1 + && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT + || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) + ){ + iPlan |= 2; + iDistTerm = i; + } + if( (iPlan & 4)==0 + && pConstraint->iColumn==2 + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 4; + pIdxInfo->aConstraintUsage[i].omit = 1; + iRulesetTerm = i; + } + } + if( iPlan & 2 ){ + pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = 1+((iPlan&1)!=0); + } + if( iPlan & 4 ){ + int idx = 1; + if( iPlan & 1 ) idx++; + if( iPlan & 2 ) idx++; + pIdxInfo->aConstraintUsage[iRulesetTerm].argvIndex = idx; + } + pIdxInfo->idxNum = iPlan; + if( pIdxInfo->nOrderBy==1 + && pIdxInfo->aOrderBy[0].iColumn==1 + && pIdxInfo->aOrderBy[0].desc==0 + ){ + pIdxInfo->orderByConsumed = 1; + } + pIdxInfo->estimatedCost = (double)10000; + + return SQLITE_OK; +} + +/* +** A virtual table module that implements the "fuzzer". +*/ +static sqlite3_module fuzzerModule = { + 0, /* iVersion */ + fuzzerConnect, + fuzzerConnect, + fuzzerBestIndex, + fuzzerDisconnect, + fuzzerDisconnect, + fuzzerOpen, /* xOpen - open a cursor */ + fuzzerClose, /* xClose - close a cursor */ + fuzzerFilter, /* xFilter - configure scan constraints */ + fuzzerNext, /* xNext - advance a cursor */ + fuzzerEof, /* xEof - check for end of scan */ + fuzzerColumn, /* xColumn - read data */ + fuzzerRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fuzzer_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); +#ifndef SQLITE_OMIT_VIRTUALTABLE + rc = sqlite3_create_module(db, "fuzzer", &fuzzerModule, 0); +#endif + return rc; +} diff --git a/ext/misc/ieee754.c b/ext/misc/ieee754.c new file mode 100644 index 0000000..436b11e --- /dev/null +++ b/ext/misc/ieee754.c @@ -0,0 +1,131 @@ +/* +** 2013-04-17 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite extension implements functions for the exact display +** and input of IEEE754 Binary64 floating-point numbers. +** +** ieee754(X) +** ieee754(Y,Z) +** +** In the first form, the value X should be a floating-point number. +** The function will return a string of the form 'ieee754(Y,Z)' where +** Y and Z are integers such that X==Y*pow(w.0,Z). +** +** In the second form, Y and Z are integers which are the mantissa and +** base-2 exponent of a new floating point number. The function returns +** a floating-point value equal to Y*pow(2.0,Z). +** +** Examples: +** +** ieee754(2.0) -> 'ieee754(2,0)' +** ieee754(45.25) -> 'ieee754(181,-2)' +** ieee754(2, 0) -> 2.0 +** ieee754(181, -2) -> 45.25 +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include +#include + +/* +** Implementation of the ieee754() function +*/ +static void ieee754func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + if( argc==1 ){ + sqlite3_int64 m, a; + double r; + int e; + int isNeg; + char zResult[100]; + assert( sizeof(m)==sizeof(r) ); + if( sqlite3_value_type(argv[0])!=SQLITE_FLOAT ) return; + r = sqlite3_value_double(argv[0]); + if( r<0.0 ){ + isNeg = 1; + r = -r; + }else{ + isNeg = 0; + } + memcpy(&a,&r,sizeof(a)); + if( a==0 ){ + e = 0; + m = 0; + }else{ + e = a>>52; + m = a & ((((sqlite3_int64)1)<<52)-1); + m |= ((sqlite3_int64)1)<<52; + while( e<1075 && m>0 && (m&1)==0 ){ + m >>= 1; + e++; + } + if( isNeg ) m = -m; + } + sqlite3_snprintf(sizeof(zResult), zResult, "ieee754(%lld,%d)", + m, e-1075); + sqlite3_result_text(context, zResult, -1, SQLITE_TRANSIENT); + }else if( argc==2 ){ + sqlite3_int64 m, e, a; + double r; + int isNeg = 0; + m = sqlite3_value_int64(argv[0]); + e = sqlite3_value_int64(argv[1]); + if( m<0 ){ + isNeg = 1; + m = -m; + if( m<0 ) return; + }else if( m==0 && e>1000 && e<1000 ){ + sqlite3_result_double(context, 0.0); + return; + } + while( (m>>32)&0xffe00000 ){ + m >>= 1; + e++; + } + while( ((m>>32)&0xfff00000)==0 ){ + m <<= 1; + e--; + } + e += 1075; + if( e<0 ) e = m = 0; + if( e>0x7ff ) m = 0; + a = m & ((((sqlite3_int64)1)<<52)-1); + a |= e<<52; + if( isNeg ) a |= ((sqlite3_int64)1)<<63; + memcpy(&r, &a, sizeof(r)); + sqlite3_result_double(context, r); + } +} + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_ieee_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; /* Unused parameter */ + rc = sqlite3_create_function(db, "ieee754", 1, SQLITE_UTF8, 0, + ieee754func, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "ieee754", 2, SQLITE_UTF8, 0, + ieee754func, 0, 0); + } + return rc; +} diff --git a/ext/misc/nextchar.c b/ext/misc/nextchar.c new file mode 100644 index 0000000..e063043 --- /dev/null +++ b/ext/misc/nextchar.c @@ -0,0 +1,265 @@ +/* +** 2013-02-28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains code to implement the next_char(A,T,F,W) SQL function. +** +** The next_char(A,T,F,H) function finds all valid "next" characters for +** string A given the vocabulary in T.F. The T.F field should be indexed. +** If the W value exists and is a non-empty string, then it is an SQL +** expression that limits the entries in T.F that will be considered. +** +** For example, suppose an application has a dictionary like this: +** +** CREATE TABLE dictionary(word TEXT UNIQUE); +** +** Further suppose that for user keypad entry, it is desired to disable +** (gray out) keys that are not valid as the next character. If the +** the user has previously entered (say) 'cha' then to find all allowed +** next characters (and thereby determine when keys should not be grayed +** out) run the following query: +** +** SELECT next_char('cha','dictionary','word'); +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include + +/* +** A structure to hold context of the next_char() computation across +** nested function calls. +*/ +typedef struct nextCharContext nextCharContext; +struct nextCharContext { + sqlite3 *db; /* Database connection */ + sqlite3_stmt *pStmt; /* Prepared statement used to query */ + const unsigned char *zPrefix; /* Prefix to scan */ + int nPrefix; /* Size of zPrefix in bytes */ + int nAlloc; /* Space allocated to aResult */ + int nUsed; /* Space used in aResult */ + unsigned int *aResult; /* Array of next characters */ + int mallocFailed; /* True if malloc fails */ + int otherError; /* True for any other failure */ +}; + +/* +** Append a result character if the character is not already in the +** result. +*/ +static void nextCharAppend(nextCharContext *p, unsigned c){ + int i; + for(i=0; inUsed; i++){ + if( p->aResult[i]==c ) return; + } + if( p->nUsed+1 > p->nAlloc ){ + unsigned int *aNew; + int n = p->nAlloc*2 + 30; + aNew = sqlite3_realloc(p->aResult, n*sizeof(unsigned int)); + if( aNew==0 ){ + p->mallocFailed = 1; + return; + }else{ + p->aResult = aNew; + p->nAlloc = n; + } + } + p->aResult[p->nUsed++] = c; +} + +/* +** Write a character into z[] as UTF8. Return the number of bytes needed +** to hold the character +*/ +static int writeUtf8(unsigned char *z, unsigned c){ + if( c<0x00080 ){ + z[0] = (unsigned char)(c&0xff); + return 1; + } + if( c<0x00800 ){ + z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F); + z[1] = 0x80 + (unsigned char)(c & 0x3F); + return 2; + } + if( c<0x10000 ){ + z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F); + z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F); + z[2] = 0x80 + (unsigned char)(c & 0x3F); + return 3; + } + z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07); + z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F); + z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F); + z[3] = 0x80 + (unsigned char)(c & 0x3F); + return 4; +} + +/* +** Read a UTF8 character out of z[] and write it into *pOut. Return +** the number of bytes in z[] that were used to construct the character. +*/ +static int readUtf8(const unsigned char *z, unsigned *pOut){ + static const unsigned char validBits[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, + }; + unsigned c = z[0]; + if( c<0xc0 ){ + *pOut = c; + return 1; + }else{ + int n = 1; + c = validBits[c-0xc0]; + while( (z[n] & 0xc0)==0x80 ){ + c = (c<<6) + (0x3f & z[n++]); + } + if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ + c = 0xFFFD; + } + *pOut = c; + return n; + } +} + +/* +** The nextCharContext structure has been set up. Add all "next" characters +** to the result set. +*/ +static void findNextChars(nextCharContext *p){ + unsigned cPrev = 0; + unsigned char zPrev[8]; + int n, rc; + + for(;;){ + sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix, + SQLITE_STATIC); + n = writeUtf8(zPrev, cPrev+1); + sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC); + rc = sqlite3_step(p->pStmt); + if( rc==SQLITE_DONE ){ + sqlite3_reset(p->pStmt); + return; + }else if( rc!=SQLITE_ROW ){ + p->otherError = rc; + return; + }else{ + const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0); + unsigned cNext; + n = readUtf8(zOut+p->nPrefix, &cNext); + sqlite3_reset(p->pStmt); + nextCharAppend(p, cNext); + cPrev = cNext; + if( p->mallocFailed ) return; + } + } +} + + +/* +** next_character(A,T,F,W) +** +** Return a string composted of all next possible characters after +** A for elements of T.F. If W is supplied, then it is an SQL expression +** that limits the elements in T.F that are considered. +*/ +static void nextCharFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + nextCharContext c; + const unsigned char *zTable = sqlite3_value_text(argv[1]); + const unsigned char *zField = sqlite3_value_text(argv[2]); + const unsigned char *zWhere; + char *zSql; + int rc; + + memset(&c, 0, sizeof(c)); + c.db = sqlite3_context_db_handle(context); + c.zPrefix = sqlite3_value_text(argv[0]); + c.nPrefix = sqlite3_value_bytes(argv[0]); + if( zTable==0 || zField==0 || c.zPrefix==0 ) return; + if( argc<4 + || (zWhere = sqlite3_value_text(argv[3]))==0 + || zWhere[0]==0 + ){ + zSql = sqlite3_mprintf( + "SELECT \"%w\" FROM \"%w\"" + " WHERE \"%w\">=(?1 || ?2)" + " AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ + " ORDER BY 1 ASC LIMIT 1", + zField, zTable, zField, zField); + }else{ + zSql = sqlite3_mprintf( + "SELECT \"%w\" FROM \"%w\"" + " WHERE \"%w\">=(?1 || ?2)" + " AND \"%w\"<=(?1 || char(1114111))" /* 1114111 == 0x10ffff */ + " AND (%s)" + " ORDER BY 1 ASC LIMIT 1", + zField, zTable, zField, zField, zWhere); + } + if( zSql==0 ){ + sqlite3_result_error_nomem(context); + return; + } + + rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0); + sqlite3_free(zSql); + if( rc ){ + sqlite3_result_error(context, sqlite3_errmsg(c.db), -1); + return; + } + findNextChars(&c); + if( c.mallocFailed ){ + sqlite3_result_error_nomem(context); + }else{ + unsigned char *pRes; + pRes = sqlite3_malloc( c.nUsed*4 + 1 ); + if( pRes==0 ){ + sqlite3_result_error_nomem(context); + }else{ + int i; + int n = 0; + for(i=0; i +#include +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +/* +** The following #defines change the names of some functions implemented in +** this file to prevent name collisions with C-library functions of the +** same name. +*/ +#define re_match sqlite3re_match +#define re_compile sqlite3re_compile +#define re_free sqlite3re_free + +/* The end-of-input character */ +#define RE_EOF 0 /* End of input */ + +/* The NFA is implemented as sequence of opcodes taken from the following +** set. Each opcode has a single integer argument. +*/ +#define RE_OP_MATCH 1 /* Match the one character in the argument */ +#define RE_OP_ANY 2 /* Match any one character. (Implements ".") */ +#define RE_OP_ANYSTAR 3 /* Special optimized version of .* */ +#define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */ +#define RE_OP_GOTO 5 /* Jump to opcode at iArg */ +#define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */ +#define RE_OP_CC_INC 7 /* Beginning of a [...] character class */ +#define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */ +#define RE_OP_CC_VALUE 9 /* Single value in a character class */ +#define RE_OP_CC_RANGE 10 /* Range of values in a character class */ +#define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */ +#define RE_OP_NOTWORD 12 /* Not a perl word character */ +#define RE_OP_DIGIT 13 /* digit: [0-9] */ +#define RE_OP_NOTDIGIT 14 /* Not a digit */ +#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */ +#define RE_OP_NOTSPACE 16 /* Not a digit */ +#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ + +/* Each opcode is a "state" in the NFA */ +typedef unsigned short ReStateNumber; + +/* Because this is an NFA and not a DFA, multiple states can be active at +** once. An instance of the following object records all active states in +** the NFA. The implementation is optimized for the common case where the +** number of actives states is small. +*/ +typedef struct ReStateSet { + unsigned nState; /* Number of current states */ + ReStateNumber *aState; /* Current states */ +} ReStateSet; + +/* An input string read one character at a time. +*/ +typedef struct ReInput ReInput; +struct ReInput { + const unsigned char *z; /* All text */ + int i; /* Next byte to read */ + int mx; /* EOF when i>=mx */ +}; + +/* A compiled NFA (or an NFA that is in the process of being compiled) is +** an instance of the following object. +*/ +typedef struct ReCompiled ReCompiled; +struct ReCompiled { + ReInput sIn; /* Regular expression text */ + const char *zErr; /* Error message to return */ + char *aOp; /* Operators for the virtual machine */ + int *aArg; /* Arguments to each operator */ + unsigned (*xNextChar)(ReInput*); /* Next character function */ + unsigned char zInit[12]; /* Initial text to match */ + int nInit; /* Number of characters in zInit */ + unsigned nState; /* Number of entries in aOp[] and aArg[] */ + unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ +}; + +/* Add a state to the given state set if it is not already there */ +static void re_add_state(ReStateSet *pSet, int newState){ + unsigned i; + for(i=0; inState; i++) if( pSet->aState[i]==newState ) return; + pSet->aState[pSet->nState++] = newState; +} + +/* Extract the next unicode character from *pzIn and return it. Advance +** *pzIn to the first byte past the end of the character returned. To +** be clear: this routine converts utf8 to unicode. This routine is +** optimized for the common case where the next character is a single byte. +*/ +static unsigned re_next_char(ReInput *p){ + unsigned c; + if( p->i>=p->mx ) return 0; + c = p->z[p->i++]; + if( c>=0x80 ){ + if( (c&0xe0)==0xc0 && p->imx && (p->z[p->i]&0xc0)==0x80 ){ + c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f); + if( c<0x80 ) c = 0xfffd; + }else if( (c&0xf0)==0xe0 && p->i+1mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 ){ + c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); + p->i += 2; + if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; + }else if( (c&0xf8)==0xf0 && p->i+3mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ + c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) + | (p->z[p->i+2]&0x3f); + p->i += 3; + if( c<=0xffff || c>0x10ffff ) c = 0xfffd; + }else{ + c = 0xfffd; + } + } + return c; +} +static unsigned re_next_char_nocase(ReInput *p){ + unsigned c = re_next_char(p); + if( c>='A' && c<='Z' ) c += 'a' - 'A'; + return c; +} + +/* Return true if c is a perl "word" character: [A-Za-z0-9_] */ +static int re_word_char(int c){ + return (c>='0' && c<='9') || (c>='a' && c<='z') + || (c>='A' && c<='Z') || c=='_'; +} + +/* Return true if c is a "digit" character: [0-9] */ +static int re_digit_char(int c){ + return (c>='0' && c<='9'); +} + +/* Return true if c is a perl "space" character: [ \t\r\n\v\f] */ +static int re_space_char(int c){ + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; +} + +/* Run a compiled regular expression on the zero-terminated input +** string zIn[]. Return true on a match and false if there is no match. +*/ +static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ + ReStateSet aStateSet[2], *pThis, *pNext; + ReStateNumber aSpace[100]; + ReStateNumber *pToFree; + unsigned int i = 0; + unsigned int iSwap = 0; + int c = RE_EOF+1; + int cPrev = 0; + int rc = 0; + ReInput in; + + in.z = zIn; + in.i = 0; + in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn); + + /* Look for the initial prefix match, if there is one. */ + if( pRe->nInit ){ + unsigned char x = pRe->zInit[0]; + while( in.i+pRe->nInit<=in.mx + && (zIn[in.i]!=x || + strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0) + ){ + in.i++; + } + if( in.i+pRe->nInit>in.mx ) return 0; + } + + if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ + pToFree = 0; + aStateSet[0].aState = aSpace; + }else{ + pToFree = sqlite3_malloc( sizeof(ReStateNumber)*2*pRe->nState ); + if( pToFree==0 ) return -1; + aStateSet[0].aState = pToFree; + } + aStateSet[1].aState = &aStateSet[0].aState[pRe->nState]; + pNext = &aStateSet[1]; + pNext->nState = 0; + re_add_state(pNext, 0); + while( c!=RE_EOF && pNext->nState>0 ){ + cPrev = c; + c = pRe->xNextChar(&in); + pThis = pNext; + pNext = &aStateSet[iSwap]; + iSwap = 1 - iSwap; + pNext->nState = 0; + for(i=0; inState; i++){ + int x = pThis->aState[i]; + switch( pRe->aOp[x] ){ + case RE_OP_MATCH: { + if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); + break; + } + case RE_OP_ANY: { + re_add_state(pNext, x+1); + break; + } + case RE_OP_WORD: { + if( re_word_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTWORD: { + if( !re_word_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_DIGIT: { + if( re_digit_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTDIGIT: { + if( !re_digit_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_SPACE: { + if( re_space_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTSPACE: { + if( !re_space_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_BOUNDARY: { + if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); + break; + } + case RE_OP_ANYSTAR: { + re_add_state(pNext, x); + re_add_state(pThis, x+1); + break; + } + case RE_OP_FORK: { + re_add_state(pThis, x+pRe->aArg[x]); + re_add_state(pThis, x+1); + break; + } + case RE_OP_GOTO: { + re_add_state(pThis, x+pRe->aArg[x]); + break; + } + case RE_OP_ACCEPT: { + rc = 1; + goto re_match_end; + } + case RE_OP_CC_INC: + case RE_OP_CC_EXC: { + int j = 1; + int n = pRe->aArg[x]; + int hit = 0; + for(j=1; j>0 && jaOp[x+j]==RE_OP_CC_VALUE ){ + if( pRe->aArg[x+j]==c ){ + hit = 1; + j = -1; + } + }else{ + if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){ + hit = 1; + j = -1; + }else{ + j++; + } + } + } + if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit; + if( hit ) re_add_state(pNext, x+n); + break; + } + } + } + } + for(i=0; inState; i++){ + if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } + } +re_match_end: + sqlite3_free(pToFree); + return rc; +} + +/* Resize the opcode and argument arrays for an RE under construction. +*/ +static int re_resize(ReCompiled *p, int N){ + char *aOp; + int *aArg; + aOp = sqlite3_realloc(p->aOp, N*sizeof(p->aOp[0])); + if( aOp==0 ) return 1; + p->aOp = aOp; + aArg = sqlite3_realloc(p->aArg, N*sizeof(p->aArg[0])); + if( aArg==0 ) return 1; + p->aArg = aArg; + p->nAlloc = N; + return 0; +} + +/* Insert a new opcode and argument into an RE under construction. The +** insertion point is just prior to existing opcode iBefore. +*/ +static int re_insert(ReCompiled *p, int iBefore, int op, int arg){ + int i; + if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0; + for(i=p->nState; i>iBefore; i--){ + p->aOp[i] = p->aOp[i-1]; + p->aArg[i] = p->aArg[i-1]; + } + p->nState++; + p->aOp[iBefore] = op; + p->aArg[iBefore] = arg; + return iBefore; +} + +/* Append a new opcode and argument to the end of the RE under construction. +*/ +static int re_append(ReCompiled *p, int op, int arg){ + return re_insert(p, p->nState, op, arg); +} + +/* Make a copy of N opcodes starting at iStart onto the end of the RE +** under construction. +*/ +static void re_copy(ReCompiled *p, int iStart, int N){ + if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return; + memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0])); + memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0])); + p->nState += N; +} + +/* Return true if c is a hexadecimal digit character: [0-9a-fA-F] +** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If +** c is not a hex digit *pV is unchanged. +*/ +static int re_hex(int c, int *pV){ + if( c>='0' && c<='9' ){ + c -= '0'; + }else if( c>='a' && c<='f' ){ + c -= 'a' - 10; + }else if( c>='A' && c<='F' ){ + c -= 'A' - 10; + }else{ + return 0; + } + *pV = (*pV)*16 + (c & 0xff); + return 1; +} + +/* A backslash character has been seen, read the next character and +** return its interpretation. +*/ +static unsigned re_esc_char(ReCompiled *p){ + static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; + static const char zTrans[] = "\a\f\n\r\t\v"; + int i, v = 0; + char c; + if( p->sIn.i>=p->sIn.mx ) return 0; + c = p->sIn.z[p->sIn.i]; + if( c=='u' && p->sIn.i+4sIn.mx ){ + const unsigned char *zIn = p->sIn.z + p->sIn.i; + if( re_hex(zIn[1],&v) + && re_hex(zIn[2],&v) + && re_hex(zIn[3],&v) + && re_hex(zIn[4],&v) + ){ + p->sIn.i += 5; + return v; + } + } + if( c=='x' && p->sIn.i+2sIn.mx ){ + const unsigned char *zIn = p->sIn.z + p->sIn.i; + if( re_hex(zIn[1],&v) + && re_hex(zIn[2],&v) + ){ + p->sIn.i += 3; + return v; + } + } + for(i=0; zEsc[i] && zEsc[i]!=c; i++){} + if( zEsc[i] ){ + if( i<6 ) c = zTrans[i]; + p->sIn.i++; + }else{ + p->zErr = "unknown \\ escape"; + } + return c; +} + +/* Forward declaration */ +static const char *re_subcompile_string(ReCompiled*); + +/* Peek at the next byte of input */ +static unsigned char rePeek(ReCompiled *p){ + return p->sIn.isIn.mx ? p->sIn.z[p->sIn.i] : 0; +} + +/* Compile RE text into a sequence of opcodes. Continue up to the +** first unmatched ")" character, then return. If an error is found, +** return a pointer to the error message string. +*/ +static const char *re_subcompile_re(ReCompiled *p){ + const char *zErr; + int iStart, iEnd, iGoto; + iStart = p->nState; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + while( rePeek(p)=='|' ){ + iEnd = p->nState; + re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart); + iGoto = re_append(p, RE_OP_GOTO, 0); + p->sIn.i++; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + p->aArg[iGoto] = p->nState - iGoto; + } + return 0; +} + +/* Compile an element of regular expression text (anything that can be +** an operand to the "|" operator). Return NULL on success or a pointer +** to the error message if there is a problem. +*/ +static const char *re_subcompile_string(ReCompiled *p){ + int iPrev = -1; + int iStart; + unsigned c; + const char *zErr; + while( (c = p->xNextChar(&p->sIn))!=0 ){ + iStart = p->nState; + switch( c ){ + case '|': + case '$': + case ')': { + p->sIn.i--; + return 0; + } + case '(': { + zErr = re_subcompile_re(p); + if( zErr ) return zErr; + if( rePeek(p)!=')' ) return "unmatched '('"; + p->sIn.i++; + break; + } + case '.': { + if( rePeek(p)=='*' ){ + re_append(p, RE_OP_ANYSTAR, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_ANY, 0); + } + break; + } + case '*': { + if( iPrev<0 ) return "'*' without operand"; + re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1); + re_append(p, RE_OP_FORK, iPrev - p->nState + 1); + break; + } + case '+': { + if( iPrev<0 ) return "'+' without operand"; + re_append(p, RE_OP_FORK, iPrev - p->nState); + break; + } + case '?': { + if( iPrev<0 ) return "'?' without operand"; + re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); + break; + } + case '{': { + int m = 0, n = 0; + int sz, j; + if( iPrev<0 ) return "'{m,n}' without operand"; + while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; } + n = m; + if( c==',' ){ + p->sIn.i++; + n = 0; + while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; } + } + if( c!='}' ) return "unmatched '{'"; + if( n>0 && nsIn.i++; + sz = p->nState - iPrev; + if( m==0 ){ + if( n==0 ) return "both m and n are zero in '{m,n}'"; + re_insert(p, iPrev, RE_OP_FORK, sz+1); + n--; + }else{ + for(j=1; j0 ){ + re_append(p, RE_OP_FORK, -sz); + } + break; + } + case '[': { + int iFirst = p->nState; + if( rePeek(p)=='^' ){ + re_append(p, RE_OP_CC_EXC, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_CC_INC, 0); + } + while( (c = p->xNextChar(&p->sIn))!=0 ){ + if( c=='[' && rePeek(p)==':' ){ + return "POSIX character classes not supported"; + } + if( c=='\\' ) c = re_esc_char(p); + if( rePeek(p)=='-' ){ + re_append(p, RE_OP_CC_RANGE, c); + p->sIn.i++; + c = p->xNextChar(&p->sIn); + if( c=='\\' ) c = re_esc_char(p); + re_append(p, RE_OP_CC_RANGE, c); + }else{ + re_append(p, RE_OP_CC_VALUE, c); + } + if( rePeek(p)==']' ){ p->sIn.i++; break; } + } + if( c==0 ) return "unclosed '['"; + p->aArg[iFirst] = p->nState - iFirst; + break; + } + case '\\': { + int specialOp = 0; + switch( rePeek(p) ){ + case 'b': specialOp = RE_OP_BOUNDARY; break; + case 'd': specialOp = RE_OP_DIGIT; break; + case 'D': specialOp = RE_OP_NOTDIGIT; break; + case 's': specialOp = RE_OP_SPACE; break; + case 'S': specialOp = RE_OP_NOTSPACE; break; + case 'w': specialOp = RE_OP_WORD; break; + case 'W': specialOp = RE_OP_NOTWORD; break; + } + if( specialOp ){ + p->sIn.i++; + re_append(p, specialOp, 0); + }else{ + c = re_esc_char(p); + re_append(p, RE_OP_MATCH, c); + } + break; + } + default: { + re_append(p, RE_OP_MATCH, c); + break; + } + } + iPrev = iStart; + } + return 0; +} + +/* Free and reclaim all the memory used by a previously compiled +** regular expression. Applications should invoke this routine once +** for every call to re_compile() to avoid memory leaks. +*/ +void re_free(ReCompiled *pRe){ + if( pRe ){ + sqlite3_free(pRe->aOp); + sqlite3_free(pRe->aArg); + sqlite3_free(pRe); + } +} + +/* +** Compile a textual regular expression in zIn[] into a compiled regular +** expression suitable for us by re_match() and return a pointer to the +** compiled regular expression in *ppRe. Return NULL on success or an +** error message if something goes wrong. +*/ +const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ + ReCompiled *pRe; + const char *zErr; + int i, j; + + *ppRe = 0; + pRe = sqlite3_malloc( sizeof(*pRe) ); + if( pRe==0 ){ + return "out of memory"; + } + memset(pRe, 0, sizeof(*pRe)); + pRe->xNextChar = noCase ? re_next_char_nocase : re_next_char; + if( re_resize(pRe, 30) ){ + re_free(pRe); + return "out of memory"; + } + if( zIn[0]=='^' ){ + zIn++; + }else{ + re_append(pRe, RE_OP_ANYSTAR, 0); + } + pRe->sIn.z = (unsigned char*)zIn; + pRe->sIn.i = 0; + pRe->sIn.mx = (int)strlen(zIn); + zErr = re_subcompile_re(pRe); + if( zErr ){ + re_free(pRe); + return zErr; + } + if( rePeek(pRe)=='$' && pRe->sIn.i+1>=pRe->sIn.mx ){ + re_append(pRe, RE_OP_MATCH, RE_EOF); + re_append(pRe, RE_OP_ACCEPT, 0); + *ppRe = pRe; + }else if( pRe->sIn.i>=pRe->sIn.mx ){ + re_append(pRe, RE_OP_ACCEPT, 0); + *ppRe = pRe; + }else{ + re_free(pRe); + return "unrecognized character"; + } + + /* The following is a performance optimization. If the regex begins with + ** ".*" (if the input regex lacks an initial "^") and afterwards there are + ** one or more matching characters, enter those matching characters into + ** zInit[]. The re_match() routine can then search ahead in the input + ** string looking for the initial match without having to run the whole + ** regex engine over the string. Do not worry able trying to match + ** unicode characters beyond plane 0 - those are very rare and this is + ** just an optimization. */ + if( pRe->aOp[0]==RE_OP_ANYSTAR ){ + for(j=0, i=1; jzInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ + unsigned x = pRe->aArg[i]; + if( x<=127 ){ + pRe->zInit[j++] = x; + }else if( x<=0xfff ){ + pRe->zInit[j++] = 0xc0 | (x>>6); + pRe->zInit[j++] = 0x80 | (x&0x3f); + }else if( x<=0xffff ){ + pRe->zInit[j++] = 0xd0 | (x>>12); + pRe->zInit[j++] = 0x80 | ((x>>6)&0x3f); + pRe->zInit[j++] = 0x80 | (x&0x3f); + }else{ + break; + } + } + if( j>0 && pRe->zInit[j-1]==0 ) j--; + pRe->nInit = j; + } + return pRe->zErr; +} + +/* +** Implementation of the regexp() SQL function. This function implements +** the build-in REGEXP operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: +** +** A REGEXP B +** +** is implemented as regexp(B,A). +*/ +static void re_sql_func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + ReCompiled *pRe; /* Compiled regular expression */ + const char *zPattern; /* The regular expression */ + const unsigned char *zStr;/* String being searched */ + const char *zErr; /* Compile error message */ + + pRe = sqlite3_get_auxdata(context, 0); + if( pRe==0 ){ + zPattern = (const char*)sqlite3_value_text(argv[0]); + if( zPattern==0 ) return; + zErr = re_compile(&pRe, zPattern, 0); + if( zErr ){ + re_free(pRe); + sqlite3_result_error(context, zErr, -1); + return; + } + if( pRe==0 ){ + sqlite3_result_error_nomem(context); + return; + } + sqlite3_set_auxdata(context, 0, pRe, (void(*)(void*))re_free); + } + zStr = (const unsigned char*)sqlite3_value_text(argv[1]); + if( zStr!=0 ){ + sqlite3_result_int(context, re_match(pRe, zStr, -1)); + } +} + +/* +** Invoke this routine to register the regexp() function with the +** SQLite database connection. +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_regexp_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); + rc = sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0, + re_sql_func, 0, 0); + return rc; +} diff --git a/ext/misc/rot13.c b/ext/misc/rot13.c new file mode 100644 index 0000000..68fdf60 --- /dev/null +++ b/ext/misc/rot13.c @@ -0,0 +1,114 @@ +/* +** 2013-05-15 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This SQLite extension implements a rot13() function and a rot13 +** collating sequence. +*/ +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include +#include + +/* +** Perform rot13 encoding on a single ASCII character. +*/ +static unsigned char rot13(unsigned char c){ + if( c>='a' && c<='z' ){ + c += 13; + if( c>'z' ) c -= 26; + }else if( c>='A' && c<='Z' ){ + c += 13; + if( c>'Z' ) c -= 26; + } + return c; +} + +/* +** Implementation of the rot13() function. +** +** Rotate ASCII alphabetic characters by 13 character positions. +** Non-ASCII characters are unchanged. rot13(rot13(X)) should always +** equal X. +*/ +static void rot13func( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zIn; + int nIn; + unsigned char *zOut; + char *zToFree = 0; + int i; + char zTemp[100]; + assert( argc==1 ); + if( sqlite3_value_type(argv[0])==SQLITE_NULL ) return; + zIn = (const unsigned char*)sqlite3_value_text(argv[0]); + nIn = sqlite3_value_bytes(argv[0]); + if( nIn +# include +# include +# include +# define ALWAYS(X) 1 +# define NEVER(X) 0 + typedef unsigned char u8; + typedef unsigned short u16; +# include +#endif + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** Character classes for ASCII characters: +** +** 0 '' Silent letters: H W +** 1 'A' Any vowel: A E I O U (Y) +** 2 'B' A bilabeal stop or fricative: B F P V W +** 3 'C' Other fricatives or back stops: C G J K Q S X Z +** 4 'D' Alveolar stops: D T +** 5 'H' Letter H at the beginning of a word +** 6 'L' Glide: L +** 7 'R' Semivowel: R +** 8 'M' Nasals: M N +** 9 'Y' Letter Y at the beginning of a word. +** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9 +** 11 ' ' White space +** 12 '?' Other. +*/ +#define CCLASS_SILENT 0 +#define CCLASS_VOWEL 1 +#define CCLASS_B 2 +#define CCLASS_C 3 +#define CCLASS_D 4 +#define CCLASS_H 5 +#define CCLASS_L 6 +#define CCLASS_R 7 +#define CCLASS_M 8 +#define CCLASS_Y 9 +#define CCLASS_DIGIT 10 +#define CCLASS_SPACE 11 +#define CCLASS_OTHER 12 + +/* +** The following table gives the character class for non-initial ASCII +** characters. +*/ +static const unsigned char midClass[] = { + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, + /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, + /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, + /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, + /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, + /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, + /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, + /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, + /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, + /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, + /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, + /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, + /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, + /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, + /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, + /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, + /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, + /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, + /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, + /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL, + /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, + /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, + /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, + /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, + /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, + /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, + /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, + /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, + /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, + /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, + /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C, + /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, + /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, +}; +/* +** This tables gives the character class for ASCII characters that form the +** initial character of a word. The only difference from midClass is with +** the letters H, W, and Y. +*/ +static const unsigned char initClass[] = { + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, + /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, + /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, + /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, + /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, + /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, + /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, + /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, + /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, + /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, + /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, + /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, + /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, + /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, + /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, + /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, + /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, + /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, + /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, + /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, + /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y, + /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, + /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, + /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, + /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, + /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, + /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, + /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, + /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, + /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, + /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, + /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C, + /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, + /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, +}; + +/* +** Mapping from the character class number (0-13) to a symbol for each +** character class. Note that initClass[] can be used to map the class +** symbol back into the class number. +*/ +static const unsigned char className[] = ".ABCDHLRMY9 ?"; + +/* +** Generate a "phonetic hash" from a string of ASCII characters +** in zIn[0..nIn-1]. +** +** * Map characters by character class as defined above. +** * Omit double-letters +** * Omit vowels beside R and L +** * Omit T when followed by CH +** * Omit W when followed by R +** * Omit D when followed by J or G +** * Omit K in KN or G in GN at the beginning of a word +** +** Space to hold the result is obtained from sqlite3_malloc() +** +** Return NULL if memory allocation fails. +*/ +static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ + unsigned char *zOut = sqlite3_malloc( nIn + 1 ); + int i; + int nOut = 0; + char cPrev = 0x77; + char cPrevX = 0x77; + const unsigned char *aClass = initClass; + + if( zOut==0 ) return 0; + if( nIn>2 ){ + switch( zIn[0] ){ + case 'g': + case 'k': { + if( zIn[1]=='n' ){ zIn++; nIn--; } + break; + } + } + } + for(i=0; i=0 ); + if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c; + } + zOut[nOut] = 0; + return zOut; +} + +/* +** This is an SQL function wrapper around phoneticHash(). See +** the description of phoneticHash() for additional information. +*/ +static void phoneticHashSqlFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zIn; + unsigned char *zOut; + + zIn = sqlite3_value_text(argv[0]); + if( zIn==0 ) return; + zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0])); + if( zOut==0 ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); + } +} + +/* +** Return the character class number for a character given its +** context. +*/ +static char characterClass(char cPrev, char c){ + return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f]; +} + +/* +** Return the cost of inserting or deleting character c immediately +** following character cPrev. If cPrev==0, that means c is the first +** character of the word. +*/ +static int insertOrDeleteCost(char cPrev, char c, char cNext){ + char classC = characterClass(cPrev, c); + char classCprev; + + if( classC==CCLASS_SILENT ){ + /* Insert or delete "silent" characters such as H or W */ + return 1; + } + if( cPrev==c ){ + /* Repeated characters, or miss a repeat */ + return 10; + } + if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){ + return 20; /* Insert a vowel before or after 'r' */ + } + classCprev = characterClass(cPrev, cPrev); + if( classC==classCprev ){ + if( classC==CCLASS_VOWEL ){ + /* Remove or add a new vowel to a vowel cluster */ + return 15; + }else{ + /* Remove or add a consonant not in the same class */ + return 50; + } + } + + /* any other character insertion or deletion */ + return 100; +} + +/* +** Divide the insertion cost by this factor when appending to the +** end of the word. +*/ +#define FINAL_INS_COST_DIV 4 + +/* +** Return the cost of substituting cTo in place of cFrom assuming +** the previous character is cPrev. If cPrev==0 then cTo is the first +** character of the word. +*/ +static int substituteCost(char cPrev, char cFrom, char cTo){ + char classFrom, classTo; + if( cFrom==cTo ){ + /* Exact match */ + return 0; + } + if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){ + /* differ only in case */ + return 0; + } + classFrom = characterClass(cPrev, cFrom); + classTo = characterClass(cPrev, cTo); + if( classFrom==classTo ){ + /* Same character class */ + return 40; + } + if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y + && classTo>=CCLASS_B && classTo<=CCLASS_Y ){ + /* Convert from one consonant to another, but in a different class */ + return 75; + } + /* Any other subsitution */ + return 100; +} + +/* +** Given two strings zA and zB which are pure ASCII, return the cost +** of transforming zA into zB. If zA ends with '*' assume that it is +** a prefix of zB and give only minimal penalty for extra characters +** on the end of zB. +** +** Smaller numbers mean a closer match. +** +** Negative values indicate an error: +** -1 One of the inputs is NULL +** -2 Non-ASCII characters on input +** -3 Unable to allocate memory +** +** If pnMatch is not NULL, then *pnMatch is set to the number of bytes +** of zB that matched the pattern in zA. If zA does not end with a '*', +** then this value is always the number of bytes in zB (i.e. strlen(zB)). +** If zA does end in a '*', then it is the number of bytes in the prefix +** of zB that was deemed to match zA. +*/ +static int editdist1(const char *zA, const char *zB, int *pnMatch){ + int nA, nB; /* Number of characters in zA[] and zB[] */ + int xA, xB; /* Loop counters for zA[] and zB[] */ + char cA, cB; /* Current character of zA and zB */ + char cAprev, cBprev; /* Previous character of zA and zB */ + char cAnext, cBnext; /* Next character in zA and zB */ + int d; /* North-west cost value */ + int dc = 0; /* North-west character value */ + int res; /* Final result */ + int *m; /* The cost matrix */ + char *cx; /* Corresponding character values */ + int *toFree = 0; /* Malloced space */ + int mStack[60+15]; /* Stack space to use if not too much is needed */ + int nMatch = 0; + + /* Early out if either input is NULL */ + if( zA==0 || zB==0 ) return -1; + + /* Skip any common prefix */ + while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } + if( pnMatch ) *pnMatch = nMatch; + if( zA[0]==0 && zB[0]==0 ) return 0; + +#if 0 + printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); +#endif + + /* Verify input strings and measure their lengths */ + for(nA=0; zA[nA]; nA++){ + if( zA[nA]&0x80 ) return -2; + } + for(nB=0; zB[nB]; nB++){ + if( zB[nB]&0x80 ) return -2; + } + + /* Special processing if either string is empty */ + if( nA==0 ){ + cBprev = dc; + for(xB=res=0; (cB = zB[xB])!=0; xB++){ + res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; + cBprev = cB; + } + return res; + } + if( nB==0 ){ + cAprev = dc; + for(xA=res=0; (cA = zA[xA])!=0; xA++){ + res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); + cAprev = cA; + } + return res; + } + + /* A is a prefix of B */ + if( zA[0]=='*' && zA[1]==0 ) return 0; + + /* Allocate and initialize the Wagner matrix */ + if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ + m = mStack; + }else{ + m = toFree = sqlite3_malloc( (nB+1)*5*sizeof(m[0])/4 ); + if( m==0 ) return -3; + } + cx = (char*)&m[nB+1]; + + /* Compute the Wagner edit distance */ + m[0] = 0; + cx[0] = dc; + cBprev = dc; + for(xB=1; xB<=nB; xB++){ + cBnext = zB[xB]; + cB = zB[xB-1]; + cx[xB] = cB; + m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); + cBprev = cB; + } + cAprev = dc; + for(xA=1; xA<=nA; xA++){ + int lastA = (xA==nA); + cA = zA[xA-1]; + cAnext = zA[xA]; + if( cA=='*' && lastA ) break; + d = m[0]; + dc = cx[0]; + m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); + cBprev = 0; + for(xB=1; xB<=nB; xB++){ + int totalCost, insCost, delCost, subCost, ncx; + cB = zB[xB-1]; + cBnext = zB[xB]; + + /* Cost to insert cB */ + insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext); + if( lastA ) insCost /= FINAL_INS_COST_DIV; + + /* Cost to delete cA */ + delCost = insertOrDeleteCost(cx[xB], cA, cBnext); + + /* Cost to substitute cA->cB */ + subCost = substituteCost(cx[xB-1], cA, cB); + + /* Best cost */ + totalCost = insCost + m[xB-1]; + ncx = cB; + if( (delCost + m[xB])nLang; i++){ + EditDist3Cost *pCost, *pNext; + pCost = p->a[i].pCost; + while( pCost ){ + pNext = pCost->pNext; + sqlite3_free(pCost); + pCost = pNext; + } + } + sqlite3_free(p->a); + memset(p, 0, sizeof(*p)); +} +static void editDist3ConfigDelete(void *pIn){ + EditDist3Config *p = (EditDist3Config*)pIn; + editDist3ConfigClear(p); + sqlite3_free(p); +} + +/* +** Load all edit-distance weights from a table. +*/ +static int editDist3ConfigLoad( + EditDist3Config *p, /* The edit distance configuration to load */ + sqlite3 *db, /* Load from this database */ + const char *zTable /* Name of the table from which to load */ +){ + sqlite3_stmt *pStmt; + int rc, rc2; + char *zSql; + int iLangPrev = -9999; + EditDist3Lang *pLang = 0; + + zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost" + " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable); + if( zSql==0 ) return SQLITE_NOMEM; + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + if( rc ) return rc; + editDist3ConfigClear(p); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + int iLang = sqlite3_column_int(pStmt, 0); + const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1); + int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0; + const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); + int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; + int iCost = sqlite3_column_int(pStmt, 3); + + assert( zFrom!=0 || nFrom==0 ); + assert( zTo!=0 || nTo==0 ); + if( nFrom>100 || nTo>100 ) continue; + if( iCost<0 ) continue; + if( pLang==0 || iLang!=iLangPrev ){ + EditDist3Lang *pNew; + pNew = sqlite3_realloc(p->a, (p->nLang+1)*sizeof(p->a[0])); + if( pNew==0 ){ rc = SQLITE_NOMEM; break; } + p->a = pNew; + pLang = &p->a[p->nLang]; + p->nLang++; + pLang->iLang = iLang; + pLang->iInsCost = 100; + pLang->iDelCost = 100; + pLang->iSubCost = 150; + pLang->pCost = 0; + iLangPrev = iLang; + } + if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ + pLang->iDelCost = iCost; + }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ + pLang->iInsCost = iCost; + }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ + pLang->iSubCost = iCost; + }else{ + EditDist3Cost *pCost; + int nExtra = nFrom + nTo - 4; + if( nExtra<0 ) nExtra = 0; + pCost = sqlite3_malloc( sizeof(*pCost) + nExtra ); + if( pCost==0 ){ rc = SQLITE_NOMEM; break; } + pCost->nFrom = nFrom; + pCost->nTo = nTo; + pCost->iCost = iCost; + memcpy(pCost->a, zFrom, nFrom); + memcpy(pCost->a + nFrom, zTo, nTo); + pCost->pNext = pLang->pCost; + pLang->pCost = pCost; + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + return rc; +} + +/* +** Return the length (in bytes) of a utf-8 character. Or return a maximum +** of N. +*/ +static int utf8Len(unsigned char c, int N){ + int len = 1; + if( c>0x7f ){ + if( (c&0xe0)==0xc0 ){ + len = 2; + }else if( (c&0xf0)==0xe0 ){ + len = 3; + }else{ + len = 4; + } + } + if( len>N ) len = N; + return len; +} + +/* +** Return TRUE (non-zero) if the To side of the given cost matches +** the given string. +*/ +static int matchTo(EditDist3Cost *p, const char *z, int n){ + if( p->nTo>n ) return 0; + if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0; + return 1; +} + +/* +** Return TRUE (non-zero) if the From side of the given cost matches +** the given string. +*/ +static int matchFrom(EditDist3Cost *p, const char *z, int n){ + assert( p->nFrom<=n ); + if( strncmp(p->a, z, p->nFrom)!=0 ) return 0; + return 1; +} + +/* +** Return TRUE (non-zero) of the next FROM character and the next TO +** character are the same. +*/ +static int matchFromTo( + EditDist3FromString *pStr, /* Left hand string */ + int n1, /* Index of comparison character on the left */ + const char *z2, /* Right-handl comparison character */ + int n2 /* Bytes remaining in z2[] */ +){ + int b1 = pStr->a[n1].nByte; + if( b1>n2 ) return 0; + if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0; + return 1; +} + +/* +** Delete an EditDist3FromString objecct +*/ +static void editDist3FromStringDelete(EditDist3FromString *p){ + int i; + if( p ){ + for(i=0; in; i++){ + sqlite3_free(p->a[i].apDel); + sqlite3_free(p->a[i].apSubst); + } + sqlite3_free(p); + } +} + +/* +** Create a EditDist3FromString object. +*/ +static EditDist3FromString *editDist3FromStringNew( + const EditDist3Lang *pLang, + const char *z, + int n +){ + EditDist3FromString *pStr; + EditDist3Cost *p; + int i; + + if( z==0 ) return 0; + if( n<0 ) n = (int)strlen(z); + pStr = sqlite3_malloc( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); + if( pStr==0 ) return 0; + pStr->a = (EditDist3From*)&pStr[1]; + memset(pStr->a, 0, sizeof(pStr->a[0])*n); + pStr->n = n; + pStr->z = (char*)&pStr->a[n]; + memcpy(pStr->z, z, n+1); + if( n && z[n-1]=='*' ){ + pStr->isPrefix = 1; + n--; + pStr->n--; + pStr->z[n] = 0; + }else{ + pStr->isPrefix = 0; + } + + for(i=0; ia[i]; + memset(pFrom, 0, sizeof(*pFrom)); + pFrom->nByte = utf8Len((unsigned char)z[i], n-i); + for(p=pLang->pCost; p; p=p->pNext){ + EditDist3Cost **apNew; + if( i+p->nFrom>n ) continue; + if( matchFrom(p, z+i, n-i)==0 ) continue; + if( p->nTo==0 ){ + apNew = sqlite3_realloc(pFrom->apDel, + sizeof(*apNew)*(pFrom->nDel+1)); + if( apNew==0 ) break; + pFrom->apDel = apNew; + apNew[pFrom->nDel++] = p; + }else{ + apNew = sqlite3_realloc(pFrom->apSubst, + sizeof(*apNew)*(pFrom->nSubst+1)); + if( apNew==0 ) break; + pFrom->apSubst = apNew; + apNew[pFrom->nSubst++] = p; + } + } + if( p ){ + editDist3FromStringDelete(pStr); + pStr = 0; + break; + } + } + return pStr; +} + +/* +** Update entry m[i] such that it is the minimum of its current value +** and m[j]+iCost. +** +** If the iCost is 1,000,000 or greater, then consider the cost to be +** infinite and skip the update. +*/ +static void updateCost( + unsigned int *m, + int i, + int j, + int iCost +){ + assert( iCost>=0 ); + if( iCost<10000 ){ + unsigned int b = m[j] + iCost; + if( bpCost; p; p=p->pNext){ + EditDist3Cost **apNew; + if( p->nFrom>0 ) continue; + if( i2+p->nTo>n2 ) continue; + if( matchTo(p, z2+i2, n2-i2)==0 ) continue; + a2[i2].nIns++; + apNew = sqlite3_realloc(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); + if( apNew==0 ){ + res = -1; /* Out of memory */ + goto editDist3Abort; + } + a2[i2].apIns = apNew; + a2[i2].apIns[a2[i2].nIns-1] = p; + } + } + + /* Prepare to compute the minimum edit distance */ + szRow = f.n+1; + memset(m, 0x01, (n2+1)*szRow*sizeof(m[0])); + m[0] = 0; + + /* First fill in the top-row of the matrix with FROM deletion costs */ + for(i1=0; i1iDelCost); + for(k=0; knFrom, i1, p->iCost); + } + } + + /* Fill in all subsequent rows, top-to-bottom, left-to-right */ + for(i2=0; i2iInsCost); + for(k=0; knTo), rxp, p->iCost); + } + for(i1=0; i1iDelCost); + for(k=0; knFrom, cxp, p->iCost); + } + updateCost(m, cx, cxu, pLang->iInsCost); + if( matchFromTo(&f, i1, z2+i2, n2-i2) ){ + updateCost(m, cx, cxd, 0); + } + updateCost(m, cx, cxd, pLang->iSubCost); + for(k=0; knFrom+szRow*p->nTo, cxd, p->iCost); + } + } + } + } + +#if 0 /* Enable for debugging */ + printf(" ^"); + for(i1=0; i19999 ) printf(" ****"); + else printf(" %4d", v); + } + printf("\n"); + for(i2=0; i29999 ) printf(" ****"); + else printf(" %4d", v); + } + printf("\n"); + } +#endif + + /* Free memory allocations and return the result */ + res = (int)m[szRow*(n2+1)-1]; + n = n2; + if( f.isPrefix ){ + for(i2=1; i2<=n2; i2++){ + int b = m[szRow*i2-1]; + if( b<=res ){ + res = b; + n = i2 - 1; + } + } + } + if( pnMatch ){ + int nExtra = 0; + for(k=0; knLang; i++){ + if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i]; + } + return &editDist3Lang; +} + +/* +** Function: editdist3(A,B,iLang) +** editdist3(tablename) +** +** Return the cost of transforming string A into string B using edit +** weights for iLang. +** +** The second form loads edit weights into memory from a table. +*/ +static void editDist3SqlFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context); + sqlite3 *db = sqlite3_context_db_handle(context); + int rc; + if( argc==1 ){ + const char *zTable = (const char*)sqlite3_value_text(argv[0]); + rc = editDist3ConfigLoad(pConfig, db, zTable); + if( rc ) sqlite3_result_error_code(context, rc); + }else{ + const char *zA = (const char*)sqlite3_value_text(argv[0]); + const char *zB = (const char*)sqlite3_value_text(argv[1]); + int nA = sqlite3_value_bytes(argv[0]); + int nB = sqlite3_value_bytes(argv[1]); + int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0; + const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang); + EditDist3FromString *pFrom; + int dist; + + pFrom = editDist3FromStringNew(pLang, zA, nA); + if( pFrom==0 ){ + sqlite3_result_error_nomem(context); + return; + } + dist = editDist3Core(pFrom, zB, nB, pLang, 0); + editDist3FromStringDelete(pFrom); + if( dist==(-1) ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_int(context, dist); + } + } +} + +/* +** Register the editDist3 function with SQLite +*/ +static int editDist3Install(sqlite3 *db){ + int rc; + EditDist3Config *pConfig = sqlite3_malloc( sizeof(*pConfig) ); + if( pConfig==0 ) return SQLITE_NOMEM; + memset(pConfig, 0, sizeof(*pConfig)); + rc = sqlite3_create_function_v2(db, "editdist3", + 2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function_v2(db, "editdist3", + 3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function_v2(db, "editdist3", + 1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, + editDist3ConfigDelete); + }else{ + sqlite3_free(pConfig); + } + return rc; +} +/* End configurable cost unicode edit distance routines +****************************************************************************** +****************************************************************************** +** Begin transliterate unicode-to-ascii implementation +*/ + +#if !SQLITE_AMALGAMATION +/* +** This lookup table is used to help decode the first byte of +** a multi-byte UTF8 character. +*/ +static const unsigned char sqlite3Utf8Trans1[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; +#endif + +/* +** Return the value of the first UTF-8 character in the string. +*/ +static int utf8Read(const unsigned char *z, int n, int *pSize){ + int c, i; + + /* All callers to this routine (in the current implementation) + ** always have n>0. */ + if( NEVER(n==0) ){ + c = i = 0; + }else{ + c = z[0]; + i = 1; + if( c>=0xc0 ){ + c = sqlite3Utf8Trans1[c-0xc0]; + while( i0 ){ + c = utf8Read(zIn, nIn, &sz); + zIn += sz; + nIn -= sz; + if( c<=127 ){ + zOut[nOut++] = c; + }else{ + int xTop, xBtm, x; + xTop = sizeof(translit)/sizeof(translit[0]) - 1; + xBtm = 0; + while( xTop>=xBtm ){ + x = (xTop + xBtm)/2; + if( translit[x].cFrom==c ){ + zOut[nOut++] = translit[x].cTo0; + if( translit[x].cTo1 ){ + zOut[nOut++] = translit[x].cTo1; + /* Add an extra "ch" after the "sh" for Щ and щ */ + if( c==0x0429 || c== 0x0449 ){ + zOut[nOut++] = 'c'; + zOut[nOut++] = 'h'; + } + } + c = 0; + break; + }else if( translit[x].cFrom>c ){ + xTop = x-1; + }else{ + xBtm = x+1; + } + } + if( c ) zOut[nOut++] = '?'; + } + } + zOut[nOut] = 0; + return zOut; +} + +/* +** Return the number of characters in the shortest prefix of the input +** string that transliterates to an ASCII string nTrans bytes or longer. +** Or, if the transliteration of the input string is less than nTrans +** bytes in size, return the number of characters in the input string. +*/ +static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ + int i, c, sz, nOut; + int nChar; + + i = nOut = 0; + for(nChar=0; i=128 ){ + int xTop, xBtm, x; + xTop = sizeof(translit)/sizeof(translit[0]) - 1; + xBtm = 0; + while( xTop>=xBtm ){ + x = (xTop + xBtm)/2; + if( translit[x].cFrom==c ){ + if( translit[x].cTo1 ) nOut++; + if( c==0x0429 || c== 0x0449 ) nOut += 2; + break; + }else if( translit[x].cFrom>c ){ + xTop = x-1; + }else{ + xBtm = x+1; + } + } + } + } + + return nChar; +} + + +/* +** spellfix1_translit(X) +** +** Convert a string that contains non-ASCII Roman characters into +** pure ASCII. +*/ +static void transliterateSqlFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zIn = sqlite3_value_text(argv[0]); + int nIn = sqlite3_value_bytes(argv[0]); + unsigned char *zOut = transliterate(zIn, nIn); + if( zOut==0 ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); + } +} + +/* +** spellfix1_scriptcode(X) +** +** Try to determine the dominant script used by the word X and return +** its ISO 15924 numeric code. +** +** The current implementation only understands the following scripts: +** +** 215 (Latin) +** 220 (Cyrillic) +** 200 (Greek) +** +** This routine will return 998 if the input X contains characters from +** two or more of the above scripts or 999 if X contains no characters +** from any of the above scripts. +*/ +static void scriptCodeSqlFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + const unsigned char *zIn = sqlite3_value_text(argv[0]); + int nIn = sqlite3_value_bytes(argv[0]); + int c, sz; + int scriptMask = 0; + int res; +# define SCRIPT_LATIN 0x0001 +# define SCRIPT_CYRILLIC 0x0002 +# define SCRIPT_GREEK 0x0004 + + while( nIn>0 ){ + c = utf8Read(zIn, nIn, &sz); + zIn += sz; + nIn -= sz; + if( c<0x02af ){ + scriptMask |= SCRIPT_LATIN; + }else if( c>=0x0400 && c<=0x04ff ){ + scriptMask |= SCRIPT_CYRILLIC; + }else if( c>=0x0386 && c<=0x03ce ){ + scriptMask |= SCRIPT_GREEK; + } + } + switch( scriptMask ){ + case 0: res = 999; break; + case SCRIPT_LATIN: res = 215; break; + case SCRIPT_CYRILLIC: res = 220; break; + case SCRIPT_GREEK: res = 200; break; + default: res = 998; break; + } + sqlite3_result_int(context, res); +} + +/* End transliterate +****************************************************************************** +****************************************************************************** +** Begin spellfix1 virtual table. +*/ + +/* Maximum length of a phonehash used for querying the shadow table */ +#define SPELLFIX_MX_HASH 8 + +/* Maximum number of hash strings to examine per query */ +#define SPELLFIX_MX_RUN 1 + +typedef struct spellfix1_vtab spellfix1_vtab; +typedef struct spellfix1_cursor spellfix1_cursor; + +/* Fuzzy-search virtual table object */ +struct spellfix1_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; /* Database connection */ + char *zDbName; /* Name of database holding this table */ + char *zTableName; /* Name of the virtual table */ + char *zCostTable; /* Table holding edit-distance cost numbers */ + EditDist3Config *pConfig3; /* Parsed edit distance costs */ +}; + +/* Fuzzy-search cursor object */ +struct spellfix1_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + spellfix1_vtab *pVTab; /* The table to which this cursor belongs */ + char *zPattern; /* rhs of MATCH clause */ + int nRow; /* Number of rows of content */ + int nAlloc; /* Number of allocated rows */ + int iRow; /* Current row of content */ + int iLang; /* Value of the langid= constraint */ + int iTop; /* Value of the top= constraint */ + int iScope; /* Value of the scope= constraint */ + int nSearch; /* Number of vocabulary items checked */ + sqlite3_stmt *pFullScan; /* Shadow query for a full table scan */ + struct spellfix1_row { /* For each row of content */ + sqlite3_int64 iRowid; /* Rowid for this row */ + char *zWord; /* Text for this row */ + int iRank; /* Rank for this row */ + int iDistance; /* Distance from pattern for this row */ + int iScore; /* Score for sorting */ + int iMatchlen; /* Value of matchlen column (or -1) */ + char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */ + } *a; +}; + +/* +** Construct one or more SQL statements from the format string given +** and then evaluate those statements. The success code is written +** into *pRc. +** +** If *pRc is initially non-zero then this routine is a no-op. +*/ +static void spellfix1DbExec( + int *pRc, /* Success code */ + sqlite3 *db, /* Database in which to run SQL */ + const char *zFormat, /* Format string for SQL */ + ... /* Arguments to the format string */ +){ + va_list ap; + char *zSql; + if( *pRc ) return; + va_start(ap, zFormat); + zSql = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( zSql==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + *pRc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3_free(zSql); + } +} + +/* +** xDisconnect/xDestroy method for the fuzzy-search module. +*/ +static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){ + spellfix1_vtab *p = (spellfix1_vtab*)pVTab; + int rc = SQLITE_OK; + if( isDestroy ){ + sqlite3 *db = p->db; + spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"", + p->zDbName, p->zTableName); + } + if( rc==SQLITE_OK ){ + sqlite3_free(p->zTableName); + editDist3ConfigDelete(p->pConfig3); + sqlite3_free(p->zCostTable); + sqlite3_free(p); + } + return rc; +} +static int spellfix1Disconnect(sqlite3_vtab *pVTab){ + return spellfix1Uninit(0, pVTab); +} +static int spellfix1Destroy(sqlite3_vtab *pVTab){ + return spellfix1Uninit(1, pVTab); +} + +/* +** Make a copy of a string. Remove leading and trailing whitespace +** and dequote it. +*/ +static char *spellfix1Dequote(const char *zIn){ + char *zOut; + int i, j; + char c; + while( isspace(zIn[0]) ) zIn++; + zOut = sqlite3_mprintf("%s", zIn); + if( zOut==0 ) return 0; + i = (int)strlen(zOut); +#if 0 /* The parser will never leave spaces at the end */ + while( i>0 && isspace(zOut[i-1]) ){ i--; } +#endif + zOut[i] = 0; + c = zOut[0]; + if( c=='\'' || c=='"' ){ + for(i=1, j=0; ALWAYS(zOut[i]); i++){ + zOut[j++] = zOut[i]; + if( zOut[i]==c ){ + if( zOut[i+1]==c ){ + i++; + }else{ + zOut[j-1] = 0; + break; + } + } + } + } + return zOut; +} + + +/* +** xConnect/xCreate method for the spellfix1 module. Arguments are: +** +** argv[0] -> module name ("spellfix1") +** argv[1] -> database name +** argv[2] -> table name +** argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter) +*/ +static int spellfix1Init( + int isCreate, + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, + char **pzErr +){ + spellfix1_vtab *pNew = 0; + const char *zModule = argv[0]; + const char *zDbName = argv[1]; + const char *zTableName = argv[2]; + int nDbName; + int rc = SQLITE_OK; + int i; + + nDbName = (int)strlen(zDbName); + pNew = sqlite3_malloc( sizeof(*pNew) + nDbName + 1); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pNew, 0, sizeof(*pNew)); + pNew->zDbName = (char*)&pNew[1]; + memcpy(pNew->zDbName, zDbName, nDbName+1); + pNew->zTableName = sqlite3_mprintf("%s", zTableName); + pNew->db = db; + if( pNew->zTableName==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(word,rank,distance,langid, " + "score, matchlen, phonehash HIDDEN, " + "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, " + "soundslike HIDDEN, command HIDDEN)" + ); +#define SPELLFIX_COL_WORD 0 +#define SPELLFIX_COL_RANK 1 +#define SPELLFIX_COL_DISTANCE 2 +#define SPELLFIX_COL_LANGID 3 +#define SPELLFIX_COL_SCORE 4 +#define SPELLFIX_COL_MATCHLEN 5 +#define SPELLFIX_COL_PHONEHASH 6 +#define SPELLFIX_COL_TOP 7 +#define SPELLFIX_COL_SCOPE 8 +#define SPELLFIX_COL_SRCHCNT 9 +#define SPELLFIX_COL_SOUNDSLIKE 10 +#define SPELLFIX_COL_COMMAND 11 + } + if( rc==SQLITE_OK && isCreate ){ + sqlite3_uint64 r; + spellfix1DbExec(&rc, db, + "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" + " id INTEGER PRIMARY KEY,\n" + " rank INT,\n" + " langid INT,\n" + " word TEXT,\n" + " k1 TEXT,\n" + " k2 TEXT\n" + ");\n", + zDbName, zTableName + ); + sqlite3_randomness(sizeof(r), &r); + spellfix1DbExec(&rc, db, + "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_index_%llx\" " + "ON \"%w_vocab\"(langid,k2);", + zDbName, zModule, r, zTableName + ); + } + for(i=3; rc==SQLITE_OK && ibase); + }else{ + *ppVTab = (sqlite3_vtab *)pNew; + } + return rc; +} + +/* +** The xConnect and xCreate methods +*/ +static int spellfix1Connect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, + char **pzErr +){ + return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr); +} +static int spellfix1Create( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, + char **pzErr +){ + return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr); +} + +/* +** Clear all of the content from a cursor. +*/ +static void spellfix1ResetCursor(spellfix1_cursor *pCur){ + int i; + for(i=0; inRow; i++){ + sqlite3_free(pCur->a[i].zWord); + } + pCur->nRow = 0; + pCur->iRow = 0; + pCur->nSearch = 0; + if( pCur->pFullScan ){ + sqlite3_finalize(pCur->pFullScan); + pCur->pFullScan = 0; + } +} + +/* +** Resize the cursor to hold up to N rows of content +*/ +static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ + struct spellfix1_row *aNew; + assert( N>=pCur->nRow ); + aNew = sqlite3_realloc(pCur->a, sizeof(pCur->a[0])*N); + if( aNew==0 && N>0 ){ + spellfix1ResetCursor(pCur); + sqlite3_free(pCur->a); + pCur->nAlloc = 0; + pCur->a = 0; + }else{ + pCur->nAlloc = N; + pCur->a = aNew; + } +} + + +/* +** Close a fuzzy-search cursor. +*/ +static int spellfix1Close(sqlite3_vtab_cursor *cur){ + spellfix1_cursor *pCur = (spellfix1_cursor *)cur; + spellfix1ResetCursor(pCur); + spellfix1ResizeCursor(pCur, 0); + sqlite3_free(pCur->zPattern); + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Search for terms of these forms: +** +** (A) word MATCH $str +** (B) langid == $langid +** (C) top = $top +** (D) scope = $scope +** (E) distance < $distance +** (F) distance <= $distance +** +** The plan number is a bit mask formed with these bits: +** +** 0x01 (A) is found +** 0x02 (B) is found +** 0x04 (C) is found +** 0x08 (D) is found +** 0x10 (E) is found +** 0x20 (F) is found +** +** filter.argv[*] values contains $str, $langid, $top, and $scope, +** if specified and in that order. +*/ +static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + int iPlan = 0; + int iLangTerm = -1; + int iTopTerm = -1; + int iScopeTerm = -1; + int iDistTerm = -1; + int i; + const struct sqlite3_index_constraint *pConstraint; + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + + /* Terms of the form: word MATCH $str */ + if( (iPlan & 1)==0 + && pConstraint->iColumn==SPELLFIX_COL_WORD + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH + ){ + iPlan |= 1; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->aConstraintUsage[i].omit = 1; + } + + /* Terms of the form: langid = $langid */ + if( (iPlan & 2)==0 + && pConstraint->iColumn==SPELLFIX_COL_LANGID + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 2; + iLangTerm = i; + } + + /* Terms of the form: top = $top */ + if( (iPlan & 4)==0 + && pConstraint->iColumn==SPELLFIX_COL_TOP + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 4; + iTopTerm = i; + } + + /* Terms of the form: scope = $scope */ + if( (iPlan & 8)==0 + && pConstraint->iColumn==SPELLFIX_COL_SCOPE + && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + iPlan |= 8; + iScopeTerm = i; + } + + /* Terms of the form: distance < $dist or distance <= $dist */ + if( (iPlan & (16|32))==0 + && pConstraint->iColumn==SPELLFIX_COL_DISTANCE + && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT + || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) + ){ + iPlan |= pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ? 16 : 32; + iDistTerm = i; + } + } + if( iPlan&1 ){ + int idx = 2; + pIdxInfo->idxNum = iPlan; + if( pIdxInfo->nOrderBy==1 + && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE + && pIdxInfo->aOrderBy[0].desc==0 + ){ + pIdxInfo->orderByConsumed = 1; /* Default order by iScore */ + } + if( iPlan&2 ){ + pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++; + pIdxInfo->aConstraintUsage[iLangTerm].omit = 1; + } + if( iPlan&4 ){ + pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++; + pIdxInfo->aConstraintUsage[iTopTerm].omit = 1; + } + if( iPlan&8 ){ + pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++; + pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1; + } + if( iPlan&(16|32) ){ + pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++; + pIdxInfo->aConstraintUsage[iDistTerm].omit = 1; + } + pIdxInfo->estimatedCost = (double)10000; + }else{ + pIdxInfo->idxNum = 0; + pIdxInfo->estimatedCost = (double)10000000; + } + return SQLITE_OK; +} + +/* +** Open a new fuzzy-search cursor. +*/ +static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + spellfix1_vtab *p = (spellfix1_vtab*)pVTab; + spellfix1_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + pCur->pVTab = p; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Adjust a distance measurement by the words rank in order to show +** preference to common words. +*/ +static int spellfix1Score(int iDistance, int iRank){ + int iLog2; + for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} + return iDistance + 32 - iLog2; +} + +/* +** Compare two spellfix1_row objects for sorting purposes in qsort() such +** that they sort in order of increasing distance. +*/ +static int spellfix1RowCompare(const void *A, const void *B){ + const struct spellfix1_row *a = (const struct spellfix1_row*)A; + const struct spellfix1_row *b = (const struct spellfix1_row*)B; + return a->iScore - b->iScore; +} + +/* +** A structure used to pass information from spellfix1FilterForMatch() +** into spellfix1RunQuery(). +*/ +typedef struct MatchQuery { + spellfix1_cursor *pCur; /* The cursor being queried */ + sqlite3_stmt *pStmt; /* shadow table query statment */ + char zHash[SPELLFIX_MX_HASH]; /* The current phonehash for zPattern */ + const char *zPattern; /* Transliterated input string */ + int nPattern; /* Length of zPattern */ + EditDist3FromString *pMatchStr3; /* Original unicode string */ + EditDist3Config *pConfig3; /* Edit-distance cost coefficients */ + const EditDist3Lang *pLang; /* The selected language coefficients */ + int iLang; /* The language id */ + int iScope; /* Default scope */ + int iMaxDist; /* Maximum allowed edit distance, or -1 */ + int rc; /* Error code */ + int nRun; /* Number of prior runs for the same zPattern */ + char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH]; /* Prior hashes */ +} MatchQuery; + +/* +** Run a query looking for the best matches against zPattern using +** zHash as the character class seed hash. +*/ +static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){ + const char *zK1; + const char *zWord; + int iDist; + int iRank; + int iScore; + int iWorst = 0; + int idx; + int idxWorst = -1; + int i; + int iScope = p->iScope; + spellfix1_cursor *pCur = p->pCur; + sqlite3_stmt *pStmt = p->pStmt; + char zHash1[SPELLFIX_MX_HASH]; + char zHash2[SPELLFIX_MX_HASH]; + char *zClass; + int nClass; + int rc; + + if( pCur->a==0 || p->rc ) return; /* Prior memory allocation failure */ + zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery); + if( zClass==0 ){ + p->rc = SQLITE_NOMEM; + return; + } + nClass = (int)strlen(zClass); + if( nClass>SPELLFIX_MX_HASH-2 ){ + nClass = SPELLFIX_MX_HASH-2; + zClass[nClass] = 0; + } + if( nClass<=iScope ){ + if( nClass>2 ){ + iScope = nClass-1; + }else{ + iScope = nClass; + } + } + memcpy(zHash1, zClass, iScope); + sqlite3_free(zClass); + zHash1[iScope] = 0; + memcpy(zHash2, zHash1, iScope); + zHash2[iScope] = 'Z'; + zHash2[iScope+1] = 0; +#if SPELLFIX_MX_RUN>1 + for(i=0; inRun; i++){ + if( strcmp(p->azPrior[i], zHash1)==0 ) return; + } +#endif + assert( p->nRunazPrior[p->nRun++], zHash1, iScope+1); + if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM + || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM + ){ + p->rc = SQLITE_NOMEM; + return; + } +#if SPELLFIX_MX_RUN>1 + for(i=0; inRow; i++){ + if( pCur->a[i].iScore>iWorst ){ + iWorst = pCur->a[i].iScore; + idxWorst = i; + } + } +#endif + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + int iMatchlen = -1; + iRank = sqlite3_column_int(pStmt, 2); + if( p->pMatchStr3 ){ + int nWord = sqlite3_column_bytes(pStmt, 1); + zWord = (const char*)sqlite3_column_text(pStmt, 1); + iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen); + }else{ + zK1 = (const char*)sqlite3_column_text(pStmt, 3); + if( zK1==0 ) continue; + iDist = editdist1(p->zPattern, zK1, 0); + } + if( iDist<0 ){ + p->rc = SQLITE_NOMEM; + break; + } + pCur->nSearch++; + iScore = spellfix1Score(iDist,iRank); + if( p->iMaxDist>=0 ){ + if( iDist>p->iMaxDist ) continue; + if( pCur->nRow>=pCur->nAlloc-1 ){ + spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10); + if( pCur->a==0 ) break; + } + idx = pCur->nRow; + }else if( pCur->nRownAlloc ){ + idx = pCur->nRow; + }else if( iScorea[idx].zWord); + }else{ + continue; + } + pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); + if( pCur->a[idx].zWord==0 ){ + p->rc = SQLITE_NOMEM; + break; + } + pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); + pCur->a[idx].iRank = iRank; + pCur->a[idx].iDistance = iDist; + pCur->a[idx].iScore = iScore; + pCur->a[idx].iMatchlen = iMatchlen; + memcpy(pCur->a[idx].zHash, zHash1, iScope+1); + if( pCur->nRownAlloc ) pCur->nRow++; + if( pCur->nRow==pCur->nAlloc ){ + iWorst = pCur->a[0].iScore; + idxWorst = 0; + for(i=1; inRow; i++){ + iScore = pCur->a[i].iScore; + if( iWorstrc = rc; +} + +/* +** This version of the xFilter method work if the MATCH term is present +** and we are doing a scan. +*/ +static int spellfix1FilterForMatch( + spellfix1_cursor *pCur, + int idxNum, + int argc, + sqlite3_value **argv +){ + const unsigned char *zMatchThis; /* RHS of the MATCH operator */ + EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */ + char *zPattern; /* Transliteration of zMatchThis */ + int nPattern; /* Length of zPattern */ + int iLimit = 20; /* Max number of rows of output */ + int iScope = 3; /* Use this many characters of zClass */ + int iLang = 0; /* Language code */ + char *zSql; /* SQL of shadow table query */ + sqlite3_stmt *pStmt = 0; /* Shadow table query */ + int rc; /* Result code */ + int idx = 1; /* Next available filter parameter */ + spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */ + MatchQuery x; /* For passing info to RunQuery() */ + + /* Load the cost table if we have not already done so */ + if( p->zCostTable!=0 && p->pConfig3==0 ){ + p->pConfig3 = sqlite3_malloc( sizeof(p->pConfig3[0]) ); + if( p->pConfig3==0 ) return SQLITE_NOMEM; + memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); + rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); + if( rc ) return rc; + } + memset(&x, 0, sizeof(x)); + x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */ + x.iMaxDist = -1; /* Maximum allowed edit distance */ + + if( idxNum&2 ){ + iLang = sqlite3_value_int(argv[idx++]); + } + if( idxNum&4 ){ + iLimit = sqlite3_value_int(argv[idx++]); + if( iLimit<1 ) iLimit = 1; + } + if( idxNum&8 ){ + x.iScope = sqlite3_value_int(argv[idx++]); + if( x.iScope<1 ) x.iScope = 1; + if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2; + } + if( idxNum&(16|32) ){ + x.iMaxDist = sqlite3_value_int(argv[idx++]); + if( idxNum&16 ) x.iMaxDist--; + if( x.iMaxDist<0 ) x.iMaxDist = 0; + } + spellfix1ResetCursor(pCur); + spellfix1ResizeCursor(pCur, iLimit); + zMatchThis = sqlite3_value_text(argv[0]); + if( zMatchThis==0 ) return SQLITE_OK; + if( p->pConfig3 ){ + x.pLang = editDist3FindLang(p->pConfig3, iLang); + pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1); + if( pMatchStr3==0 ){ + x.rc = SQLITE_NOMEM; + goto filter_exit; + } + }else{ + x.pLang = 0; + } + zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0])); + sqlite3_free(pCur->zPattern); + pCur->zPattern = zPattern; + if( zPattern==0 ){ + x.rc = SQLITE_NOMEM; + goto filter_exit; + } + nPattern = (int)strlen(zPattern); + if( zPattern[nPattern-1]=='*' ) nPattern--; + zSql = sqlite3_mprintf( + "SELECT id, word, rank, k1" + " FROM \"%w\".\"%w_vocab\"" + " WHERE langid=%d AND k2>=?1 AND k2zDbName, p->zTableName, iLang + ); + if( zSql==0 ){ + x.rc = SQLITE_NOMEM; + pStmt = 0; + goto filter_exit; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + pCur->iLang = iLang; + x.pCur = pCur; + x.pStmt = pStmt; + x.zPattern = zPattern; + x.nPattern = nPattern; + x.pMatchStr3 = pMatchStr3; + x.iLang = iLang; + x.rc = rc; + x.pConfig3 = p->pConfig3; + if( x.rc==SQLITE_OK ){ + spellfix1RunQuery(&x, zPattern, nPattern); + } + + if( pCur->a ){ + qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare); + pCur->iTop = iLimit; + pCur->iScope = iScope; + }else{ + x.rc = SQLITE_NOMEM; + } + +filter_exit: + sqlite3_finalize(pStmt); + editDist3FromStringDelete(pMatchStr3); + return x.rc; +} + +/* +** This version of xFilter handles a full-table scan case +*/ +static int spellfix1FilterForFullScan( + spellfix1_cursor *pCur, + int idxNum, + int argc, + sqlite3_value **argv +){ + int rc; + char *zSql; + spellfix1_vtab *pVTab = pCur->pVTab; + spellfix1ResetCursor(pCur); + zSql = sqlite3_mprintf( + "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"", + pVTab->zDbName, pVTab->zTableName); + if( zSql==0 ) return SQLITE_NOMEM; + rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0); + sqlite3_free(zSql); + pCur->nRow = pCur->iRow = 0; + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pCur->pFullScan); + if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; } + if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } + }else{ + pCur->iRow = 0; + } + return rc; +} + + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again. Always called at least once +** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call. +*/ +static int spellfix1Filter( + sqlite3_vtab_cursor *cur, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + spellfix1_cursor *pCur = (spellfix1_cursor *)cur; + int rc; + if( idxNum & 1 ){ + rc = spellfix1FilterForMatch(pCur, idxNum, argc, argv); + }else{ + rc = spellfix1FilterForFullScan(pCur, idxNum, argc, argv); + } + return rc; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int spellfix1Next(sqlite3_vtab_cursor *cur){ + spellfix1_cursor *pCur = (spellfix1_cursor *)cur; + int rc = SQLITE_OK; + if( pCur->iRow < pCur->nRow ){ + if( pCur->pFullScan ){ + rc = sqlite3_step(pCur->pFullScan); + if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow; + if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK; + }else{ + pCur->iRow++; + } + } + return rc; +} + +/* +** Return TRUE if we are at the end-of-file +*/ +static int spellfix1Eof(sqlite3_vtab_cursor *cur){ + spellfix1_cursor *pCur = (spellfix1_cursor *)cur; + return pCur->iRow>=pCur->nRow; +} + +/* +** Return columns from the current row. +*/ +static int spellfix1Column( + sqlite3_vtab_cursor *cur, + sqlite3_context *ctx, + int i +){ + spellfix1_cursor *pCur = (spellfix1_cursor*)cur; + if( pCur->pFullScan ){ + if( i<=SPELLFIX_COL_LANGID ){ + sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i)); + }else{ + sqlite3_result_null(ctx); + } + return SQLITE_OK; + } + switch( i ){ + case SPELLFIX_COL_WORD: { + sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC); + break; + } + case SPELLFIX_COL_RANK: { + sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank); + break; + } + case SPELLFIX_COL_DISTANCE: { + sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance); + break; + } + case SPELLFIX_COL_LANGID: { + sqlite3_result_int(ctx, pCur->iLang); + break; + } + case SPELLFIX_COL_SCORE: { + sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); + break; + } + case SPELLFIX_COL_MATCHLEN: { + int iMatchlen = pCur->a[pCur->iRow].iMatchlen; + if( iMatchlen<0 ){ + int nPattern = (int)strlen(pCur->zPattern); + char *zWord = pCur->a[pCur->iRow].zWord; + int nWord = (int)strlen(zWord); + + if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){ + char *zTranslit; + int res; + zTranslit = (char *)transliterate((unsigned char *)zWord, nWord); + if( !zTranslit ) return SQLITE_NOMEM; + res = editdist1(pCur->zPattern, zTranslit, &iMatchlen); + sqlite3_free(zTranslit); + if( res<0 ) return SQLITE_NOMEM; + iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen); + }else{ + iMatchlen = utf8Charlen(zWord, nWord); + } + } + + sqlite3_result_int(ctx, iMatchlen); + break; + } + case SPELLFIX_COL_PHONEHASH: { + sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC); + break; + } + case SPELLFIX_COL_TOP: { + sqlite3_result_int(ctx, pCur->iTop); + break; + } + case SPELLFIX_COL_SCOPE: { + sqlite3_result_int(ctx, pCur->iScope); + break; + } + case SPELLFIX_COL_SRCHCNT: { + sqlite3_result_int(ctx, pCur->nSearch); + break; + } + default: { + sqlite3_result_null(ctx); + break; + } + } + return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + spellfix1_cursor *pCur = (spellfix1_cursor*)cur; + if( pCur->pFullScan ){ + *pRowid = sqlite3_column_int64(pCur->pFullScan, 4); + }else{ + *pRowid = pCur->a[pCur->iRow].iRowid; + } + return SQLITE_OK; +} + +/* +** The xUpdate() method. +*/ +static int spellfix1Update( + sqlite3_vtab *pVTab, + int argc, + sqlite3_value **argv, + sqlite_int64 *pRowid +){ + int rc = SQLITE_OK; + sqlite3_int64 rowid, newRowid; + spellfix1_vtab *p = (spellfix1_vtab*)pVTab; + sqlite3 *db = p->db; + + if( argc==1 ){ + /* A delete operation on the rowid given by argv[0] */ + rowid = *pRowid = sqlite3_value_int64(argv[0]); + spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" " + " WHERE id=%lld", + p->zDbName, p->zTableName, rowid); + }else{ + const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]); + int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]); + int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]); + int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]); + const unsigned char *zSoundslike = + sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]); + int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]); + char *zK1, *zK2; + int i; + char c; + + if( zWord==0 ){ + /* Inserts of the form: INSERT INTO table(command) VALUES('xyzzy'); + ** cause zWord to be NULL, so we look at the "command" column to see + ** what special actions to take */ + const char *zCmd = + (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]); + if( zCmd==0 ){ + pVTab->zErrMsg = sqlite3_mprintf("%s.word may not be NULL", + p->zTableName); + return SQLITE_CONSTRAINT_NOTNULL; + } + if( strcmp(zCmd,"reset")==0 ){ + /* Reset the edit cost table (if there is one). */ + editDist3ConfigDelete(p->pConfig3); + p->pConfig3 = 0; + return SQLITE_OK; + } + if( strncmp(zCmd,"edit_cost_table=",16)==0 ){ + editDist3ConfigDelete(p->pConfig3); + p->pConfig3 = 0; + sqlite3_free(p->zCostTable); + p->zCostTable = spellfix1Dequote(zCmd+16); + if( p->zCostTable==0 ) return SQLITE_NOMEM; + if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){ + sqlite3_free(p->zCostTable); + p->zCostTable = 0; + } + return SQLITE_OK; + } + pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"", + p->zTableName, zCmd); + return SQLITE_ERROR; + } + if( iRank<1 ) iRank = 1; + if( zSoundslike ){ + zK1 = (char*)transliterate(zSoundslike, nSoundslike); + }else{ + zK1 = (char*)transliterate(zWord, nWord); + } + if( zK1==0 ) return SQLITE_NOMEM; + for(i=0; (c = zK1[i])!=0; i++){ + if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A'; + } + zK2 = (char*)phoneticHash((const unsigned char*)zK1, i); + if( zK2==0 ){ + sqlite3_free(zK1); + return SQLITE_NOMEM; + } + if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ + spellfix1DbExec(&rc, db, + "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) " + "VALUES(%d,%d,%Q,%Q,%Q)", + p->zDbName, p->zTableName, + iRank, iLang, zWord, zK1, zK2 + ); + *pRowid = sqlite3_last_insert_rowid(db); + }else{ + rowid = sqlite3_value_int64(argv[0]); + newRowid = *pRowid = sqlite3_value_int64(argv[1]); + spellfix1DbExec(&rc, db, + "UPDATE \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d," + " word=%Q, k1=%Q, k2=%Q WHERE id=%lld", + p->zDbName, p->zTableName, newRowid, iRank, iLang, + zWord, zK1, zK2, rowid + ); + } + sqlite3_free(zK1); + sqlite3_free(zK2); + } + return rc; +} + +/* +** Rename the spellfix1 table. +*/ +static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){ + spellfix1_vtab *p = (spellfix1_vtab*)pVTab; + sqlite3 *db = p->db; + int rc = SQLITE_OK; + char *zNewName = sqlite3_mprintf("%s", zNew); + if( zNewName==0 ){ + return SQLITE_NOMEM; + } + spellfix1DbExec(&rc, db, + "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"", + p->zDbName, p->zTableName, zNewName + ); + if( rc==SQLITE_OK ){ + sqlite3_free(p->zTableName); + p->zTableName = zNewName; + }else{ + sqlite3_free(zNewName); + } + return rc; +} + + +/* +** A virtual table module that provides fuzzy search. +*/ +static sqlite3_module spellfix1Module = { + 0, /* iVersion */ + spellfix1Create, /* xCreate - handle CREATE VIRTUAL TABLE */ + spellfix1Connect, /* xConnect - reconnected to an existing table */ + spellfix1BestIndex, /* xBestIndex - figure out how to do a query */ + spellfix1Disconnect, /* xDisconnect - close a connection */ + spellfix1Destroy, /* xDestroy - handle DROP TABLE */ + spellfix1Open, /* xOpen - open a cursor */ + spellfix1Close, /* xClose - close a cursor */ + spellfix1Filter, /* xFilter - configure scan constraints */ + spellfix1Next, /* xNext - advance a cursor */ + spellfix1Eof, /* xEof - check for end of scan */ + spellfix1Column, /* xColumn - read data */ + spellfix1Rowid, /* xRowid - read data */ + spellfix1Update, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + spellfix1Rename, /* xRename */ +}; + +/* +** Register the various functions and the virtual table. +*/ +static int spellfix1Register(sqlite3 *db){ + int rc = SQLITE_OK; + int i; + rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0, + transliterateSqlFunc, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0, + editdistSqlFunc, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0, + phoneticHashSqlFunc, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0, + scriptCodeSqlFunc, 0, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0); + } + if( rc==SQLITE_OK ){ + rc = editDist3Install(db); + } + + /* Verify sanity of the translit[] table */ + for(i=0; i +#include + +#ifndef SQLITE_OMIT_VIRTUALTABLE + + +/* A wholenumber cursor object */ +typedef struct wholenumber_cursor wholenumber_cursor; +struct wholenumber_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + sqlite3_int64 iValue; /* Current value */ + sqlite3_int64 mxValue; /* Maximum value */ +}; + +/* Methods for the wholenumber module */ +static int wholenumberConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + sqlite3_vtab *pNew; + pNew = *ppVtab = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return SQLITE_NOMEM; + sqlite3_declare_vtab(db, "CREATE TABLE x(value)"); + memset(pNew, 0, sizeof(*pNew)); + return SQLITE_OK; +} +/* Note that for this virtual table, the xCreate and xConnect +** methods are identical. */ + +static int wholenumberDisconnect(sqlite3_vtab *pVtab){ + sqlite3_free(pVtab); + return SQLITE_OK; +} +/* The xDisconnect and xDestroy methods are also the same */ + + +/* +** Open a new wholenumber cursor. +*/ +static int wholenumberOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + wholenumber_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, sizeof(*pCur)); + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Close a wholenumber cursor. +*/ +static int wholenumberClose(sqlite3_vtab_cursor *cur){ + sqlite3_free(cur); + return SQLITE_OK; +} + + +/* +** Advance a cursor to its next row of output +*/ +static int wholenumberNext(sqlite3_vtab_cursor *cur){ + wholenumber_cursor *pCur = (wholenumber_cursor*)cur; + pCur->iValue++; + return SQLITE_OK; +} + +/* +** Return the value associated with a wholenumber. +*/ +static int wholenumberColumn( + sqlite3_vtab_cursor *cur, + sqlite3_context *ctx, + int i +){ + wholenumber_cursor *pCur = (wholenumber_cursor*)cur; + sqlite3_result_int64(ctx, pCur->iValue); + return SQLITE_OK; +} + +/* +** The rowid. +*/ +static int wholenumberRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + wholenumber_cursor *pCur = (wholenumber_cursor*)cur; + *pRowid = pCur->iValue; + return SQLITE_OK; +} + +/* +** When the wholenumber_cursor.rLimit value is 0 or less, that is a signal +** that the cursor has nothing more to output. +*/ +static int wholenumberEof(sqlite3_vtab_cursor *cur){ + wholenumber_cursor *pCur = (wholenumber_cursor*)cur; + return pCur->iValue>pCur->mxValue || pCur->iValue==0; +} + +/* +** Called to "rewind" a cursor back to the beginning so that +** it starts its output over again. Always called at least once +** prior to any wholenumberColumn, wholenumberRowid, or wholenumberEof call. +** +** idxNum Constraints +** ------ --------------------- +** 0 (none) +** 1 value > $argv0 +** 2 value >= $argv0 +** 4 value < $argv0 +** 8 value <= $argv0 +** +** 5 value > $argv0 AND value < $argv1 +** 6 value >= $argv0 AND value < $argv1 +** 9 value > $argv0 AND value <= $argv1 +** 10 value >= $argv0 AND value <= $argv1 +*/ +static int wholenumberFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + wholenumber_cursor *pCur = (wholenumber_cursor *)pVtabCursor; + sqlite3_int64 v; + int i = 0; + pCur->iValue = 1; + pCur->mxValue = 0xffffffff; /* 4294967295 */ + if( idxNum & 3 ){ + v = sqlite3_value_int64(argv[0]) + (idxNum&1); + if( v>pCur->iValue && v<=pCur->mxValue ) pCur->iValue = v; + i++; + } + if( idxNum & 12 ){ + v = sqlite3_value_int64(argv[i]) - ((idxNum>>2)&1); + if( v>=pCur->iValue && vmxValue ) pCur->mxValue = v; + } + return SQLITE_OK; +} + +/* +** Search for terms of these forms: +** +** (1) value > $value +** (2) value >= $value +** (4) value < $value +** (8) value <= $value +** +** idxNum is an ORed combination of 1 or 2 with 4 or 8. +*/ +static int wholenumberBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; + int idxNum = 0; + int argvIdx = 1; + int ltIdx = -1; + int gtIdx = -1; + const struct sqlite3_index_constraint *pConstraint; + pConstraint = pIdxInfo->aConstraint; + for(i=0; inConstraint; i++, pConstraint++){ + if( pConstraint->usable==0 ) continue; + if( (idxNum & 3)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_GT ){ + idxNum |= 1; + ltIdx = i; + } + if( (idxNum & 3)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_GE ){ + idxNum |= 2; + ltIdx = i; + } + if( (idxNum & 12)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){ + idxNum |= 4; + gtIdx = i; + } + if( (idxNum & 12)==0 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE ){ + idxNum |= 8; + gtIdx = i; + } + } + pIdxInfo->idxNum = idxNum; + if( ltIdx>=0 ){ + pIdxInfo->aConstraintUsage[ltIdx].argvIndex = argvIdx++; + pIdxInfo->aConstraintUsage[ltIdx].omit = 1; + } + if( gtIdx>=0 ){ + pIdxInfo->aConstraintUsage[gtIdx].argvIndex = argvIdx; + pIdxInfo->aConstraintUsage[gtIdx].omit = 1; + } + if( pIdxInfo->nOrderBy==1 + && pIdxInfo->aOrderBy[0].desc==0 + ){ + pIdxInfo->orderByConsumed = 1; + } + if( (idxNum & 12)==0 ){ + pIdxInfo->estimatedCost = (double)100000000; + }else if( (idxNum & 3)==0 ){ + pIdxInfo->estimatedCost = (double)5; + }else{ + pIdxInfo->estimatedCost = (double)1; + } + return SQLITE_OK; +} + +/* +** A virtual table module that provides read-only access to a +** Tcl global variable namespace. +*/ +static sqlite3_module wholenumberModule = { + 0, /* iVersion */ + wholenumberConnect, + wholenumberConnect, + wholenumberBestIndex, + wholenumberDisconnect, + wholenumberDisconnect, + wholenumberOpen, /* xOpen - open a cursor */ + wholenumberClose, /* xClose - close a cursor */ + wholenumberFilter, /* xFilter - configure scan constraints */ + wholenumberNext, /* xNext - advance a cursor */ + wholenumberEof, /* xEof - check for end of scan */ + wholenumberColumn, /* xColumn - read data */ + wholenumberRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; + +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_wholenumber_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + int rc = SQLITE_OK; + SQLITE_EXTENSION_INIT2(pApi); +#ifndef SQLITE_OMIT_VIRTUALTABLE + rc = sqlite3_create_module(db, "wholenumber", &wholenumberModule, 0); +#endif + return rc; +} -- cgit v1.2.3