diff options
author | Hans-Christoph Steiner <hans@eds.org> | 2012-03-30 20:42:12 -0400 |
---|---|---|
committer | Hans-Christoph Steiner <hans@eds.org> | 2012-03-30 20:42:12 -0400 |
commit | 7bb481fda9ecb134804b49c2ce77ca28f7eea583 (patch) | |
tree | 31b520b9914d3e2453968abe375f2c102772c3dc /ext/fts3/fts3_snippet.c |
Imported Upstream version 2.0.3
Diffstat (limited to 'ext/fts3/fts3_snippet.c')
-rw-r--r-- | ext/fts3/fts3_snippet.c | 1500 |
1 files changed, 1500 insertions, 0 deletions
diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c new file mode 100644 index 0000000..23ef25c --- /dev/null +++ b/ext/fts3/fts3_snippet.c @@ -0,0 +1,1500 @@ +/* +** 2009 Oct 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +#include "fts3Int.h" +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +#include <string.h> +#include <assert.h> + +/* +** Characters that may appear in the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ +#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ +#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ +#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ +#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ +#define FTS3_MATCHINFO_LCS 's' /* nCol values */ +#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ + +/* +** The default value for the second argument to matchinfo(). +*/ +#define FTS3_MATCHINFO_DEFAULT "pcx" + + +/* +** Used as an fts3ExprIterate() context when loading phrase doclists to +** Fts3Expr.aDoclist[]/nDoclist. +*/ +typedef struct LoadDoclistCtx LoadDoclistCtx; +struct LoadDoclistCtx { + Fts3Cursor *pCsr; /* FTS3 Cursor */ + int nPhrase; /* Number of phrases seen so far */ + int nToken; /* Number of tokens seen so far */ +}; + +/* +** The following types are used as part of the implementation of the +** fts3BestSnippet() routine. +*/ +typedef struct SnippetIter SnippetIter; +typedef struct SnippetPhrase SnippetPhrase; +typedef struct SnippetFragment SnippetFragment; + +struct SnippetIter { + Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ + int iCol; /* Extract snippet from this column */ + int nSnippet; /* Requested snippet length (in tokens) */ + int nPhrase; /* Number of phrases in query */ + SnippetPhrase *aPhrase; /* Array of size nPhrase */ + int iCurrent; /* First token of current snippet */ +}; + +struct SnippetPhrase { + int nToken; /* Number of tokens in phrase */ + char *pList; /* Pointer to start of phrase position list */ + int iHead; /* Next value in position list */ + char *pHead; /* Position list data following iHead */ + int iTail; /* Next value in trailing position list */ + char *pTail; /* Position list data following iTail */ +}; + +struct SnippetFragment { + int iCol; /* Column snippet is extracted from */ + int iPos; /* Index of first token in snippet */ + u64 covered; /* Mask of query phrases covered */ + u64 hlmask; /* Mask of snippet terms to highlight */ +}; + +/* +** This type is used as an fts3ExprIterate() context object while +** accumulating the data returned by the matchinfo() function. +*/ +typedef struct MatchInfo MatchInfo; +struct MatchInfo { + Fts3Cursor *pCursor; /* FTS3 Cursor */ + int nCol; /* Number of columns in table */ + int nPhrase; /* Number of matchable phrases in query */ + sqlite3_int64 nDoc; /* Number of docs in database */ + u32 *aMatchinfo; /* Pre-allocated buffer */ +}; + + + +/* +** The snippet() and offsets() functions both return text values. An instance +** of the following structure is used to accumulate those values while the +** functions are running. See fts3StringAppend() for details. +*/ +typedef struct StrBuffer StrBuffer; +struct StrBuffer { + char *z; /* Pointer to buffer containing string */ + int n; /* Length of z in bytes (excl. nul-term) */ + int nAlloc; /* Allocated size of buffer z in bytes */ +}; + + +/* +** This function is used to help iterate through a position-list. A position +** list is a list of unique integers, sorted from smallest to largest. Each +** element of the list is represented by an FTS3 varint that takes the value +** of the difference between the current element and the previous one plus +** two. For example, to store the position-list: +** +** 4 9 113 +** +** the three varints: +** +** 6 7 106 +** +** are encoded. +** +** When this function is called, *pp points to the start of an element of +** the list. *piPos contains the value of the previous entry in the list. +** After it returns, *piPos contains the value of the next element of the +** list and *pp is advanced to the following varint. +*/ +static void fts3GetDeltaPosition(char **pp, int *piPos){ + int iVal; + *pp += sqlite3Fts3GetVarint32(*pp, &iVal); + *piPos += (iVal-2); +} + +/* +** Helper function for fts3ExprIterate() (see below). +*/ +static int fts3ExprIterate2( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int *piPhrase, /* Pointer to phrase counter */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int rc; /* Return code */ + int eType = pExpr->eType; /* Type of expression node pExpr */ + + if( eType!=FTSQUERY_PHRASE ){ + assert( pExpr->pLeft && pExpr->pRight ); + rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); + if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ + rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); + } + }else{ + rc = x(pExpr, *piPhrase, pCtx); + (*piPhrase)++; + } + return rc; +} + +/* +** Iterate through all phrase nodes in an FTS3 query, except those that +** are part of a sub-tree that is the right-hand-side of a NOT operator. +** For each phrase node found, the supplied callback function is invoked. +** +** If the callback function returns anything other than SQLITE_OK, +** the iteration is abandoned and the error code returned immediately. +** Otherwise, SQLITE_OK is returned after a callback has been made for +** all eligible phrase nodes. +*/ +static int fts3ExprIterate( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int iPhrase = 0; /* Variable used as the phrase counter */ + return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); +} + +/* +** This is an fts3ExprIterate() callback used while loading the doclists +** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also +** fts3ExprLoadDoclists(). +*/ +static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + int rc = SQLITE_OK; + Fts3Phrase *pPhrase = pExpr->pPhrase; + LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; + + UNUSED_PARAMETER(iPhrase); + + p->nPhrase++; + p->nToken += pPhrase->nToken; + + return rc; +} + +/* +** Load the doclists for each phrase in the query associated with FTS3 cursor +** pCsr. +** +** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable +** phrases in the expression (all phrases except those directly or +** indirectly descended from the right-hand-side of a NOT operator). If +** pnToken is not NULL, then it is set to the number of tokens in all +** matchable phrases of the expression. +*/ +static int fts3ExprLoadDoclists( + Fts3Cursor *pCsr, /* Fts3 cursor for current query */ + int *pnPhrase, /* OUT: Number of phrases in query */ + int *pnToken /* OUT: Number of tokens in query */ +){ + int rc; /* Return Code */ + LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ + sCtx.pCsr = pCsr; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); + if( pnPhrase ) *pnPhrase = sCtx.nPhrase; + if( pnToken ) *pnToken = sCtx.nToken; + return rc; +} + +static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + (*(int *)ctx)++; + UNUSED_PARAMETER(pExpr); + UNUSED_PARAMETER(iPhrase); + return SQLITE_OK; +} +static int fts3ExprPhraseCount(Fts3Expr *pExpr){ + int nPhrase = 0; + (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + return nPhrase; +} + +/* +** Advance the position list iterator specified by the first two +** arguments so that it points to the first element with a value greater +** than or equal to parameter iNext. +*/ +static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ + char *pIter = *ppIter; + if( pIter ){ + int iIter = *piIter; + + while( iIter<iNext ){ + if( 0==(*pIter & 0xFE) ){ + iIter = -1; + pIter = 0; + break; + } + fts3GetDeltaPosition(&pIter, &iIter); + } + + *piIter = iIter; + *ppIter = pIter; + } +} + +/* +** Advance the snippet iterator to the next candidate snippet. +*/ +static int fts3SnippetNextCandidate(SnippetIter *pIter){ + int i; /* Loop counter */ + + if( pIter->iCurrent<0 ){ + /* The SnippetIter object has just been initialized. The first snippet + ** candidate always starts at offset 0 (even if this candidate has a + ** score of 0.0). + */ + pIter->iCurrent = 0; + + /* Advance the 'head' iterator of each phrase to the first offset that + ** is greater than or equal to (iNext+nSnippet). + */ + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); + } + }else{ + int iStart; + int iEnd = 0x7FFFFFFF; + + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pHead && pPhrase->iHead<iEnd ){ + iEnd = pPhrase->iHead; + } + } + if( iEnd==0x7FFFFFFF ){ + return 1; + } + + pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); + fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); + } + } + + return 0; +} + +/* +** Retrieve information about the current candidate snippet of snippet +** iterator pIter. +*/ +static void fts3SnippetDetails( + SnippetIter *pIter, /* Snippet iterator */ + u64 mCovered, /* Bitmask of phrases already covered */ + int *piToken, /* OUT: First token of proposed snippet */ + int *piScore, /* OUT: "Score" for this snippet */ + u64 *pmCover, /* OUT: Bitmask of phrases covered */ + u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ +){ + int iStart = pIter->iCurrent; /* First token of snippet */ + int iScore = 0; /* Score of this snippet */ + int i; /* Loop counter */ + u64 mCover = 0; /* Mask of phrases covered by this snippet */ + u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ + + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pTail ){ + char *pCsr = pPhrase->pTail; + int iCsr = pPhrase->iTail; + + while( iCsr<(iStart+pIter->nSnippet) ){ + int j; + u64 mPhrase = (u64)1 << i; + u64 mPos = (u64)1 << (iCsr - iStart); + assert( iCsr>=iStart ); + if( (mCover|mCovered)&mPhrase ){ + iScore++; + }else{ + iScore += 1000; + } + mCover |= mPhrase; + + for(j=0; j<pPhrase->nToken; j++){ + mHighlight |= (mPos>>j); + } + + if( 0==(*pCsr & 0x0FE) ) break; + fts3GetDeltaPosition(&pCsr, &iCsr); + } + } + } + + /* Set the output variables before returning. */ + *piToken = iStart; + *piScore = iScore; + *pmCover = mCover; + *pmHighlight = mHighlight; +} + +/* +** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). +** Each invocation populates an element of the SnippetIter.aPhrase[] array. +*/ +static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ + SnippetIter *p = (SnippetIter *)ctx; + SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; + char *pCsr; + + pPhrase->nToken = pExpr->pPhrase->nToken; + + pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); + if( pCsr ){ + int iFirst = 0; + pPhrase->pList = pCsr; + fts3GetDeltaPosition(&pCsr, &iFirst); + assert( iFirst>=0 ); + pPhrase->pHead = pCsr; + pPhrase->pTail = pCsr; + pPhrase->iHead = iFirst; + pPhrase->iTail = iFirst; + }else{ + assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); + } + + return SQLITE_OK; +} + +/* +** Select the fragment of text consisting of nFragment contiguous tokens +** from column iCol that represent the "best" snippet. The best snippet +** is the snippet with the highest score, where scores are calculated +** by adding: +** +** (a) +1 point for each occurence of a matchable phrase in the snippet. +** +** (b) +1000 points for the first occurence of each matchable phrase in +** the snippet for which the corresponding mCovered bit is not set. +** +** The selected snippet parameters are stored in structure *pFragment before +** returning. The score of the selected snippet is stored in *piScore +** before returning. +*/ +static int fts3BestSnippet( + int nSnippet, /* Desired snippet length */ + Fts3Cursor *pCsr, /* Cursor to create snippet for */ + int iCol, /* Index of column to create snippet from */ + u64 mCovered, /* Mask of phrases already covered */ + u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ + SnippetFragment *pFragment, /* OUT: Best snippet found */ + int *piScore /* OUT: Score of snippet pFragment */ +){ + int rc; /* Return Code */ + int nList; /* Number of phrases in expression */ + SnippetIter sIter; /* Iterates through snippet candidates */ + int nByte; /* Number of bytes of space to allocate */ + int iBestScore = -1; /* Best snippet score found so far */ + int i; /* Loop counter */ + + memset(&sIter, 0, sizeof(sIter)); + + /* Iterate through the phrases in the expression to count them. The same + ** callback makes sure the doclists are loaded for each phrase. + */ + rc = fts3ExprLoadDoclists(pCsr, &nList, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* Now that it is known how many phrases there are, allocate and zero + ** the required space using malloc(). + */ + nByte = sizeof(SnippetPhrase) * nList; + sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); + if( !sIter.aPhrase ){ + return SQLITE_NOMEM; + } + memset(sIter.aPhrase, 0, nByte); + + /* Initialize the contents of the SnippetIter object. Then iterate through + ** the set of phrases in the expression to populate the aPhrase[] array. + */ + sIter.pCsr = pCsr; + sIter.iCol = iCol; + sIter.nSnippet = nSnippet; + sIter.nPhrase = nList; + sIter.iCurrent = -1; + (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); + + /* Set the *pmSeen output variable. */ + for(i=0; i<nList; i++){ + if( sIter.aPhrase[i].pHead ){ + *pmSeen |= (u64)1 << i; + } + } + + /* Loop through all candidate snippets. Store the best snippet in + ** *pFragment. Store its associated 'score' in iBestScore. + */ + pFragment->iCol = iCol; + while( !fts3SnippetNextCandidate(&sIter) ){ + int iPos; + int iScore; + u64 mCover; + u64 mHighlight; + fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight); + assert( iScore>=0 ); + if( iScore>iBestScore ){ + pFragment->iPos = iPos; + pFragment->hlmask = mHighlight; + pFragment->covered = mCover; + iBestScore = iScore; + } + } + + sqlite3_free(sIter.aPhrase); + *piScore = iBestScore; + return SQLITE_OK; +} + + +/* +** Append a string to the string-buffer passed as the first argument. +** +** If nAppend is negative, then the length of the string zAppend is +** determined using strlen(). +*/ +static int fts3StringAppend( + StrBuffer *pStr, /* Buffer to append to */ + const char *zAppend, /* Pointer to data to append to buffer */ + int nAppend /* Size of zAppend in bytes (or -1) */ +){ + if( nAppend<0 ){ + nAppend = (int)strlen(zAppend); + } + + /* If there is insufficient space allocated at StrBuffer.z, use realloc() + ** to grow the buffer until so that it is big enough to accomadate the + ** appended data. + */ + if( pStr->n+nAppend+1>=pStr->nAlloc ){ + int nAlloc = pStr->nAlloc+nAppend+100; + char *zNew = sqlite3_realloc(pStr->z, nAlloc); + if( !zNew ){ + return SQLITE_NOMEM; + } + pStr->z = zNew; + pStr->nAlloc = nAlloc; + } + + /* Append the data to the string buffer. */ + memcpy(&pStr->z[pStr->n], zAppend, nAppend); + pStr->n += nAppend; + pStr->z[pStr->n] = '\0'; + + return SQLITE_OK; +} + +/* +** The fts3BestSnippet() function often selects snippets that end with a +** query term. That is, the final term of the snippet is always a term +** that requires highlighting. For example, if 'X' is a highlighted term +** and '.' is a non-highlighted term, BestSnippet() may select: +** +** ........X.....X +** +** This function "shifts" the beginning of the snippet forward in the +** document so that there are approximately the same number of +** non-highlighted terms to the right of the final highlighted term as there +** are to the left of the first highlighted term. For example, to this: +** +** ....X.....X.... +** +** This is done as part of extracting the snippet text, not when selecting +** the snippet. Snippet selection is done based on doclists only, so there +** is no way for fts3BestSnippet() to know whether or not the document +** actually contains terms that follow the final highlighted term. +*/ +static int fts3SnippetShift( + Fts3Table *pTab, /* FTS3 table snippet comes from */ + int nSnippet, /* Number of tokens desired for snippet */ + const char *zDoc, /* Document text to extract snippet from */ + int nDoc, /* Size of buffer zDoc in bytes */ + int *piPos, /* IN/OUT: First token of snippet */ + u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ +){ + u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ + + if( hlmask ){ + int nLeft; /* Tokens to the left of first highlight */ + int nRight; /* Tokens to the right of last highlight */ + int nDesired; /* Ideal number of tokens to shift forward */ + + for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); + for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + nDesired = (nLeft-nRight)/2; + + /* Ideally, the start of the snippet should be pushed forward in the + ** document nDesired tokens. This block checks if there are actually + ** nDesired tokens to the right of the snippet. If so, *piPos and + ** *pHlMask are updated to shift the snippet nDesired tokens to the + ** right. Otherwise, the snippet is shifted by the number of tokens + ** available. + */ + if( nDesired>0 ){ + int nShift; /* Number of tokens to shift snippet by */ + int iCurrent = 0; /* Token counter */ + int rc; /* Return Code */ + sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer_cursor *pC; + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + + /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) + ** or more tokens in zDoc/nDoc. + */ + rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ){ + return rc; + } + pC->pTokenizer = pTab->pTokenizer; + while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ + const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + } + pMod->xClose(pC); + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } + + nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; + assert( nShift<=nDesired ); + if( nShift>0 ){ + *piPos += nShift; + *pHlmask = hlmask >> nShift; + } + } + } + return SQLITE_OK; +} + +/* +** Extract the snippet text for fragment pFragment from cursor pCsr and +** append it to string buffer pOut. +*/ +static int fts3SnippetText( + Fts3Cursor *pCsr, /* FTS3 Cursor */ + SnippetFragment *pFragment, /* Snippet to extract */ + int iFragment, /* Fragment number */ + int isLast, /* True for final fragment in snippet */ + int nSnippet, /* Number of tokens in extracted snippet */ + const char *zOpen, /* String inserted before highlighted term */ + const char *zClose, /* String inserted after highlighted term */ + const char *zEllipsis, /* String inserted between snippets */ + StrBuffer *pOut /* Write output here */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc; /* Return code */ + const char *zDoc; /* Document text to extract snippet from */ + int nDoc; /* Size of zDoc in bytes */ + int iCurrent = 0; /* Current token number of document */ + int iEnd = 0; /* Byte offset of end of current token */ + int isShiftDone = 0; /* True after snippet is shifted */ + int iPos = pFragment->iPos; /* First token of snippet */ + u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ + int iCol = pFragment->iCol+1; /* Query column to extract text from */ + sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ + const char *ZDUMMY; /* Dummy argument used with tokenizer */ + int DUMMY1; /* Dummy argument used with tokenizer */ + + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + return SQLITE_OK; + } + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); + + /* Open a token cursor on the document. */ + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ){ + return rc; + } + pC->pTokenizer = pTab->pTokenizer; + + while( rc==SQLITE_OK ){ + int iBegin; /* Offset in zDoc of start of token */ + int iFin; /* Offset in zDoc of end of token */ + int isHighlight; /* True for highlighted terms */ + + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + /* Special case - the last token of the snippet is also the last token + ** of the column. Append any punctuation that occurred between the end + ** of the previous token and the end of the document to the output. + ** Then break out of the loop. */ + rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + } + break; + } + if( iCurrent<iPos ){ continue; } + + if( !isShiftDone ){ + int n = nDoc - iBegin; + rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask); + isShiftDone = 1; + + /* Now that the shift has been done, check if the initial "..." are + ** required. They are required if (a) this is not the first fragment, + ** or (b) this fragment does not begin at position 0 of its column. + */ + if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + if( rc!=SQLITE_OK || iCurrent<iPos ) continue; + } + + if( iCurrent>=(iPos+nSnippet) ){ + if( isLast ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + break; + } + + /* Set isHighlight to true if this term should be highlighted. */ + isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; + + if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); + if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); + + iEnd = iFin; + } + + pMod->xClose(pC); + return rc; +} + + +/* +** This function is used to count the entries in a column-list (a +** delta-encoded list of term offsets within a single column of a single +** row). When this function is called, *ppCollist should point to the +** beginning of the first varint in the column-list (the varint that +** contains the position of the first matching term in the column data). +** Before returning, *ppCollist is set to point to the first byte after +** the last varint in the column-list (either the 0x00 signifying the end +** of the position-list, or the 0x01 that precedes the column number of +** the next column in the position-list). +** +** The number of elements in the column-list is returned. +*/ +static int fts3ColumnlistCount(char **ppCollist){ + char *pEnd = *ppCollist; + char c = 0; + int nEntry = 0; + + /* A column-list is terminated by either a 0x01 or 0x00. */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + if( !c ) nEntry++; + } + + *ppCollist = pEnd; + return nEntry; +} + +/* +** fts3ExprIterate() callback used to collect the "global" matchinfo stats +** for a single query. +** +** fts3ExprIterate() callback to load the 'global' elements of a +** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements +** of the matchinfo array that are constant for all rows returned by the +** current query. +** +** Argument pCtx is actually a pointer to a struct of type MatchInfo. This +** function populates Matchinfo.aMatchinfo[] as follows: +** +** for(iCol=0; iCol<nCol; iCol++){ +** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; +** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; +** } +** +** where X is the number of matches for phrase iPhrase is column iCol of all +** rows of the table. Y is the number of rows for which column iCol contains +** at least one instance of phrase iPhrase. +** +** If the phrase pExpr consists entirely of deferred tokens, then all X and +** Y values are set to nDoc, where nDoc is the number of documents in the +** file system. This is done because the full-text index doclist is required +** to calculate these values properly, and the full-text index doclist is +** not available for deferred tokens. +*/ +static int fts3ExprGlobalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + MatchInfo *p = (MatchInfo *)pCtx; + return sqlite3Fts3EvalPhraseStats( + p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] + ); +} + +/* +** fts3ExprIterate() callback used to collect the "local" part of the +** FTS3_MATCHINFO_HITS array. The local stats are those elements of the +** array that are different for each row returned by the query. +*/ +static int fts3ExprLocalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + MatchInfo *p = (MatchInfo *)pCtx; + int iStart = iPhrase * p->nCol * 3; + int i; + + for(i=0; i<p->nCol; i++){ + char *pCsr; + pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i); + if( pCsr ){ + p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); + }else{ + p->aMatchinfo[iStart+i*3] = 0; + } + } + + return SQLITE_OK; +} + +static int fts3MatchinfoCheck( + Fts3Table *pTab, + char cArg, + char **pzErr +){ + if( (cArg==FTS3_MATCHINFO_NPHRASE) + || (cArg==FTS3_MATCHINFO_NCOL) + || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat) + || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat) + || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) + || (cArg==FTS3_MATCHINFO_LCS) + || (cArg==FTS3_MATCHINFO_HITS) + ){ + return SQLITE_OK; + } + *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg); + return SQLITE_ERROR; +} + +static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + int nVal; /* Number of integers output by cArg */ + + switch( cArg ){ + case FTS3_MATCHINFO_NDOC: + case FTS3_MATCHINFO_NPHRASE: + case FTS3_MATCHINFO_NCOL: + nVal = 1; + break; + + case FTS3_MATCHINFO_AVGLENGTH: + case FTS3_MATCHINFO_LENGTH: + case FTS3_MATCHINFO_LCS: + nVal = pInfo->nCol; + break; + + default: + assert( cArg==FTS3_MATCHINFO_HITS ); + nVal = pInfo->nCol * pInfo->nPhrase * 3; + break; + } + + return nVal; +} + +static int fts3MatchinfoSelectDoctotal( + Fts3Table *pTab, + sqlite3_stmt **ppStmt, + sqlite3_int64 *pnDoc, + const char **paLen +){ + sqlite3_stmt *pStmt; + const char *a; + sqlite3_int64 nDoc; + + if( !*ppStmt ){ + int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); + if( rc!=SQLITE_OK ) return rc; + } + pStmt = *ppStmt; + assert( sqlite3_data_count(pStmt)==1 ); + + a = sqlite3_column_blob(pStmt, 0); + a += sqlite3Fts3GetVarint(a, &nDoc); + if( nDoc==0 ) return FTS_CORRUPT_VTAB; + *pnDoc = (u32)nDoc; + + if( paLen ) *paLen = a; + return SQLITE_OK; +} + +/* +** An instance of the following structure is used to store state while +** iterating through a multi-column position-list corresponding to the +** hits for a single phrase on a single row in order to calculate the +** values for a matchinfo() FTS3_MATCHINFO_LCS request. +*/ +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + int iPosOffset; /* Tokens count up to end of this phrase */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPos; /* Current position */ +}; + +/* +** If LcsIterator.iCol is set to the following value, the iterator has +** finished iterating through all offsets for all columns. +*/ +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} + +/* +** Advance the iterator passed as an argument to the next position. Return +** 1 if the iterator is at EOF or if it now points to the start of the +** position list for the next column. +*/ +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead = pIter->pRead; + sqlite3_int64 iRead; + int rc = 0; + + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 || iRead==1 ){ + pRead = 0; + rc = 1; + }else{ + pIter->iPos += (int)(iRead-2); + } + + pIter->pRead = pRead; + return rc; +} + +/* +** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. +** +** If the call is successful, the longest-common-substring lengths for each +** column are written into the first nCol elements of the pInfo->aMatchinfo[] +** array before returning. SQLITE_OK is returned in this case. +** +** Otherwise, if an error occurs, an SQLite error code is returned and the +** data written to the first nCol elements of pInfo->aMatchinfo[] is +** undefined. +*/ +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; + + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + + for(i=0; i<pInfo->nPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; + } + + for(iCol=0; iCol<pInfo->nCol; iCol++){ + int nLcs = 0; /* LCS value for this column */ + int nLive = 0; /* Number of iterators in aIter not at EOF */ + + for(i=0; i<pInfo->nPhrase; i++){ + LcsIterator *pIt = &aIter[i]; + pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol); + if( pIt->pRead ){ + pIt->iPos = pIt->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + nLive++; + } + } + + while( nLive>0 ){ + LcsIterator *pAdv = 0; /* The iterator to advance by one position */ + int nThisLcs = 0; /* LCS for the current iterator positions */ + + for(i=0; i<pInfo->nPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + if( pIter->pRead==0 ){ + /* This iterator is already at EOF for this column. */ + nThisLcs = 0; + }else{ + if( pAdv==0 || pIter->iPos<pAdv->iPos ){ + pAdv = pIter; + } + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } + + pInfo->aMatchinfo[iCol] = nLcs; + } + + sqlite3_free(aIter); + return SQLITE_OK; +} + +/* +** Populate the buffer pInfo->aMatchinfo[] with an array of integers to +** be returned by the matchinfo() function. Argument zArg contains the +** format string passed as the second argument to matchinfo (or the +** default value "pcx" if no second argument was specified). The format +** string has already been validated and the pInfo->aMatchinfo[] array +** is guaranteed to be large enough for the output. +** +** If bGlobal is true, then populate all fields of the matchinfo() output. +** If it is false, then assume that those fields that do not change between +** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) +** have already been populated. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. If a value other than SQLITE_OK is returned, the state the +** pInfo->aMatchinfo[] buffer is left in is undefined. +*/ +static int fts3MatchinfoValues( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + int bGlobal, /* True to grab the global stats */ + MatchInfo *pInfo, /* Matchinfo context object */ + const char *zArg /* Matchinfo format string */ +){ + int rc = SQLITE_OK; + int i; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_stmt *pSelect = 0; + + for(i=0; rc==SQLITE_OK && zArg[i]; i++){ + + switch( zArg[i] ){ + case FTS3_MATCHINFO_NPHRASE: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; + break; + + case FTS3_MATCHINFO_NCOL: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; + break; + + case FTS3_MATCHINFO_NDOC: + if( bGlobal ){ + sqlite3_int64 nDoc = 0; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); + pInfo->aMatchinfo[0] = (u32)nDoc; + } + break; + + case FTS3_MATCHINFO_AVGLENGTH: + if( bGlobal ){ + sqlite3_int64 nDoc; /* Number of rows in table */ + const char *a; /* Aggregate column length array */ + + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); + if( rc==SQLITE_OK ){ + int iCol; + for(iCol=0; iCol<pInfo->nCol; iCol++){ + u32 iVal; + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); + pInfo->aMatchinfo[iCol] = iVal; + } + } + } + break; + + case FTS3_MATCHINFO_LENGTH: { + sqlite3_stmt *pSelectDocsize = 0; + rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); + if( rc==SQLITE_OK ){ + int iCol; + const char *a = sqlite3_column_blob(pSelectDocsize, 0); + for(iCol=0; iCol<pInfo->nCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + pInfo->aMatchinfo[iCol] = (u32)nToken; + } + } + sqlite3_reset(pSelectDocsize); + break; + } + + case FTS3_MATCHINFO_LCS: + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3MatchinfoLcs(pCsr, pInfo); + } + break; + + default: { + Fts3Expr *pExpr; + assert( zArg[i]==FTS3_MATCHINFO_HITS ); + pExpr = pCsr->pExpr; + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc!=SQLITE_OK ) break; + if( bGlobal ){ + if( pCsr->pDeferred ){ + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); + if( rc!=SQLITE_OK ) break; + } + rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + if( rc!=SQLITE_OK ) break; + } + (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + break; + } + } + + pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); + } + + sqlite3_reset(pSelect); + return rc; +} + + +/* +** Populate pCsr->aMatchinfo[] with data for the current row. The +** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). +*/ +static int fts3GetMatchinfo( + Fts3Cursor *pCsr, /* FTS3 Cursor object */ + const char *zArg /* Second argument to matchinfo() function */ +){ + MatchInfo sInfo; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int bGlobal = 0; /* Collect 'global' stats as well as local */ + + memset(&sInfo, 0, sizeof(MatchInfo)); + sInfo.pCursor = pCsr; + sInfo.nCol = pTab->nColumn; + + /* If there is cached matchinfo() data, but the format string for the + ** cache does not match the format string for this request, discard + ** the cached data. */ + if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ + assert( pCsr->aMatchinfo ); + sqlite3_free(pCsr->aMatchinfo); + pCsr->zMatchinfo = 0; + pCsr->aMatchinfo = 0; + } + + /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the + ** matchinfo function has been called for this query. In this case + ** allocate the array used to accumulate the matchinfo data and + ** initialize those elements that are constant for every row. + */ + if( pCsr->aMatchinfo==0 ){ + int nMatchinfo = 0; /* Number of u32 elements in match-info */ + int nArg; /* Bytes in zArg */ + int i; /* Used to iterate through zArg */ + + /* Determine the number of phrases in the query */ + pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); + sInfo.nPhrase = pCsr->nPhrase; + + /* Determine the number of integers in the buffer returned by this call. */ + for(i=0; zArg[i]; i++){ + nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); + } + + /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ + nArg = (int)strlen(zArg); + pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); + if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; + + pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; + pCsr->nMatchinfo = nMatchinfo; + memcpy(pCsr->zMatchinfo, zArg, nArg+1); + memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); + pCsr->isMatchinfoNeeded = 1; + bGlobal = 1; + } + + sInfo.aMatchinfo = pCsr->aMatchinfo; + sInfo.nPhrase = pCsr->nPhrase; + if( pCsr->isMatchinfoNeeded ){ + rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); + pCsr->isMatchinfoNeeded = 0; + } + + return rc; +} + +/* +** Implementation of snippet() function. +*/ +void sqlite3Fts3Snippet( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr, /* Cursor object */ + const char *zStart, /* Snippet start text - "<b>" */ + const char *zEnd, /* Snippet end text - "</b>" */ + const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ + int iCol, /* Extract snippet from this column */ + int nToken /* Approximate number of tokens in snippet */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int i; + StrBuffer res = {0, 0, 0}; + + /* The returned text includes up to four fragments of text extracted from + ** the data in the current row. The first iteration of the for(...) loop + ** below attempts to locate a single fragment of text nToken tokens in + ** size that contains at least one instance of all phrases in the query + ** expression that appear in the current row. If such a fragment of text + ** cannot be found, the second iteration of the loop attempts to locate + ** a pair of fragments, and so on. + */ + int nSnippet = 0; /* Number of fragments in this snippet */ + SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ + int nFToken = -1; /* Number of tokens in each fragment */ + + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + for(nSnippet=1; 1; nSnippet++){ + + int iSnip; /* Loop counter 0..nSnippet-1 */ + u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ + u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ + + if( nToken>=0 ){ + nFToken = (nToken+nSnippet-1) / nSnippet; + }else{ + nFToken = -1 * nToken; + } + + for(iSnip=0; iSnip<nSnippet; iSnip++){ + int iBestScore = -1; /* Best score of columns checked so far */ + int iRead; /* Used to iterate through columns */ + SnippetFragment *pFragment = &aSnippet[iSnip]; + + memset(pFragment, 0, sizeof(*pFragment)); + + /* Loop through all columns of the table being considered for snippets. + ** If the iCol argument to this function was negative, this means all + ** columns of the FTS3 table. Otherwise, only column iCol is considered. + */ + for(iRead=0; iRead<pTab->nColumn; iRead++){ + SnippetFragment sF = {0, 0, 0, 0}; + int iS; + if( iCol>=0 && iRead!=iCol ) continue; + + /* Find the best snippet of nFToken tokens in column iRead. */ + rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); + if( rc!=SQLITE_OK ){ + goto snippet_out; + } + if( iS>iBestScore ){ + *pFragment = sF; + iBestScore = iS; + } + } + + mCovered |= pFragment->covered; + } + + /* If all query phrases seen by fts3BestSnippet() are present in at least + ** one of the nSnippet snippet fragments, break out of the loop. + */ + assert( (mCovered&mSeen)==mCovered ); + if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; + } + + assert( nFToken>0 ); + + for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ + rc = fts3SnippetText(pCsr, &aSnippet[i], + i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res + ); + } + + snippet_out: + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); + } +} + + +typedef struct TermOffset TermOffset; +typedef struct TermOffsetCtx TermOffsetCtx; + +struct TermOffset { + char *pList; /* Position-list */ + int iPos; /* Position just read from pList */ + int iOff; /* Offset of this term from read positions */ +}; + +struct TermOffsetCtx { + Fts3Cursor *pCsr; + int iCol; /* Column of table to populate aTerm for */ + int iTerm; + sqlite3_int64 iDocid; + TermOffset *aTerm; +}; + +/* +** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). +*/ +static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ + TermOffsetCtx *p = (TermOffsetCtx *)ctx; + int nTerm; /* Number of tokens in phrase */ + int iTerm; /* For looping through nTerm phrase terms */ + char *pList; /* Pointer to position list for phrase */ + int iPos = 0; /* First position in position-list */ + + UNUSED_PARAMETER(iPhrase); + pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol); + nTerm = pExpr->pPhrase->nToken; + if( pList ){ + fts3GetDeltaPosition(&pList, &iPos); + assert( iPos>=0 ); + } + + for(iTerm=0; iTerm<nTerm; iTerm++){ + TermOffset *pT = &p->aTerm[p->iTerm++]; + pT->iOff = nTerm-iTerm-1; + pT->pList = pList; + pT->iPos = iPos; + } + + return SQLITE_OK; +} + +/* +** Implementation of offsets() function. +*/ +void sqlite3Fts3Offsets( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr /* Cursor object */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; + const char *ZDUMMY; /* Dummy argument used with xNext() */ + int NDUMMY; /* Dummy argument used with xNext() */ + int rc; /* Return Code */ + int nToken; /* Number of tokens in query */ + int iCol; /* Column currently being processed */ + StrBuffer res = {0, 0, 0}; /* Result string */ + TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ + + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + memset(&sCtx, 0, sizeof(sCtx)); + assert( pCsr->isRequireSeek==0 ); + + /* Count the number of terms in the query */ + rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); + if( rc!=SQLITE_OK ) goto offsets_out; + + /* Allocate the array of TermOffset iterators. */ + sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); + if( 0==sCtx.aTerm ){ + rc = SQLITE_NOMEM; + goto offsets_out; + } + sCtx.iDocid = pCsr->iPrevId; + sCtx.pCsr = pCsr; + + /* Loop through the table columns, appending offset information to + ** string-buffer res for each column. + */ + for(iCol=0; iCol<pTab->nColumn; iCol++){ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ + int iStart; + int iEnd; + int iCurrent; + const char *zDoc; + int nDoc; + + /* Initialize the contents of sCtx.aTerm[] for column iCol. There is + ** no way that this operation can fail, so the return code from + ** fts3ExprIterate() can be discarded. + */ + sCtx.iCol = iCol; + sCtx.iTerm = 0; + (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); + + /* Retreive the text stored in column iCol. If an SQL NULL is stored + ** in column iCol, jump immediately to the next iteration of the loop. + ** If an OOM occurs while retrieving the data (this can happen if SQLite + ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM + ** to the caller. + */ + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ + continue; + } + rc = SQLITE_NOMEM; + goto offsets_out; + } + + /* Initialize a tokenizer iterator to iterate through column iCol. */ + rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ) goto offsets_out; + pC->pTokenizer = pTab->pTokenizer; + + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + while( rc==SQLITE_OK ){ + int i; /* Used to loop through terms */ + int iMinPos = 0x7FFFFFFF; /* Position of next token */ + TermOffset *pTerm = 0; /* TermOffset associated with next token */ + + for(i=0; i<nToken; i++){ + TermOffset *pT = &sCtx.aTerm[i]; + if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ + iMinPos = pT->iPos-pT->iOff; + pTerm = pT; + } + } + + if( !pTerm ){ + /* All offsets for this column have been gathered. */ + rc = SQLITE_DONE; + }else{ + assert( iCurrent<=iMinPos ); + if( 0==(0xFE&*pTerm->pList) ){ + pTerm->pList = 0; + }else{ + fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); + } + while( rc==SQLITE_OK && iCurrent<iMinPos ){ + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + } + if( rc==SQLITE_OK ){ + char aBuffer[64]; + sqlite3_snprintf(sizeof(aBuffer), aBuffer, + "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart + ); + rc = fts3StringAppend(&res, aBuffer, -1); + }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; + } + } + } + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } + + pMod->xClose(pC); + if( rc!=SQLITE_OK ) goto offsets_out; + } + + offsets_out: + sqlite3_free(sCtx.aTerm); + assert( rc!=SQLITE_DONE ); + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); + } + return; +} + +/* +** Implementation of matchinfo() function. +*/ +void sqlite3Fts3Matchinfo( + sqlite3_context *pContext, /* Function call context */ + Fts3Cursor *pCsr, /* FTS3 table cursor */ + const char *zArg /* Second arg to matchinfo() function */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc; + int i; + const char *zFormat; + + if( zArg ){ + for(i=0; zArg[i]; i++){ + char *zErr = 0; + if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ + sqlite3_result_error(pContext, zErr, -1); + sqlite3_free(zErr); + return; + } + } + zFormat = zArg; + }else{ + zFormat = FTS3_MATCHINFO_DEFAULT; + } + + if( !pCsr->pExpr ){ + sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); + return; + } + + /* Retrieve matchinfo() data. */ + rc = fts3GetMatchinfo(pCsr, zFormat); + sqlite3Fts3SegmentsClose(pTab); + + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pContext, rc); + }else{ + int n = pCsr->nMatchinfo * sizeof(u32); + sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); + } +} + +#endif |