diff options
Diffstat (limited to 'ext/fts3')
| -rw-r--r-- | ext/fts3/fts3.c | 759 | ||||
| -rw-r--r-- | ext/fts3/fts3Int.h | 46 | ||||
| -rw-r--r-- | ext/fts3/fts3_aux.c | 129 | ||||
| -rw-r--r-- | ext/fts3/fts3_expr.c | 221 | ||||
| -rw-r--r-- | ext/fts3/fts3_hash.c | 4 | ||||
| -rw-r--r-- | ext/fts3/fts3_porter.c | 74 | ||||
| -rw-r--r-- | ext/fts3/fts3_snippet.c | 3 | ||||
| -rw-r--r-- | ext/fts3/fts3_test.c | 49 | ||||
| -rw-r--r-- | ext/fts3/fts3_unicode.c | 16 | ||||
| -rw-r--r-- | ext/fts3/fts3_unicode2.c | 53 | ||||
| -rw-r--r-- | ext/fts3/fts3_write.c | 506 | ||||
| -rw-r--r-- | ext/fts3/tool/fts3view.c | 15 | ||||
| -rw-r--r-- | ext/fts3/unicode/mkunicode.tcl | 15 | 
13 files changed, 1378 insertions, 512 deletions
| diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index c00a13f..4f4b667 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -330,21 +330,37 @@ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){    return (int) (q - (unsigned char *)p);  } +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ +  v = (v & mask1) | ( (*ptr++) << shift );                    \ +  if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ +  v = (*ptr++);                                               \ +  if( (v & mask2)==0 ){ var = v; return ret; } +  /*   ** Read a 64-bit variable-length integer from memory starting at p[0].  ** Return the number of bytes read, or 0 on error.  ** The value is stored in *v.  */  int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ -  const unsigned char *q = (const unsigned char *) p; -  sqlite_uint64 x = 0, y = 1; -  while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){ -    x += y * (*q++ & 0x7f); -    y <<= 7; -  } -  x += y * (*q++); -  *v = (sqlite_int64) x; -  return (int) (q - (unsigned char *)p); +  const char *pStart = p; +  u32 a; +  u64 b; +  int shift; + +  GETVARINT_INIT(a, p, 0,  0x00,     0x80, *v, 1); +  GETVARINT_STEP(a, p, 7,  0x7F,     0x4000, *v, 2); +  GETVARINT_STEP(a, p, 14, 0x3FFF,   0x200000, *v, 3); +  GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); +  b = (a & 0x0FFFFFFF ); + +  for(shift=28; shift<=63; shift+=7){ +    u64 c = *p++; +    b += (c&0x7F) << shift; +    if( (c & 0x80)==0 ) break; +  } +  *v = b; +  return (int)(p - pStart);  }  /* @@ -352,10 +368,21 @@ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){  ** 32-bit integer before it is returned.  */  int sqlite3Fts3GetVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = sqlite3Fts3GetVarint(p, &i); - *pi = (int) i; - return ret; +  u32 a; + +#ifndef fts3GetVarint32 +  GETVARINT_INIT(a, p, 0,  0x00,     0x80, *pi, 1); +#else +  a = (*p++); +  assert( a & 0x80 ); +#endif + +  GETVARINT_STEP(a, p, 7,  0x7F,     0x4000, *pi, 2); +  GETVARINT_STEP(a, p, 14, 0x3FFF,   0x200000, *pi, 3); +  GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); +  a = (a & 0x0FFFFFFF ); +  *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); +  return 5;  }  /* @@ -1081,6 +1108,8 @@ static int fts3InitVtab(    char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */    char *zContent = 0;             /* content=? parameter (or NULL) */    char *zLanguageid = 0;          /* languageid=? parameter (or NULL) */ +  char **azNotindexed = 0;        /* The set of notindexed= columns */ +  int nNotindexed = 0;            /* Size of azNotindexed[] array */    assert( strlen(argv[0])==4 );    assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) @@ -1090,9 +1119,19 @@ static int fts3InitVtab(    nDb = (int)strlen(argv[1]) + 1;    nName = (int)strlen(argv[2]) + 1; -  aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) ); -  if( !aCol ) return SQLITE_NOMEM; -  memset((void *)aCol, 0, sizeof(const char *) * (argc-2)); +  nByte = sizeof(const char *) * (argc-2); +  aCol = (const char **)sqlite3_malloc(nByte); +  if( aCol ){ +    memset((void*)aCol, 0, nByte); +    azNotindexed = (char **)sqlite3_malloc(nByte); +  } +  if( azNotindexed ){ +    memset(azNotindexed, 0, nByte); +  } +  if( !aCol || !azNotindexed ){ +    rc = SQLITE_NOMEM; +    goto fts3_init_out; +  }    /* Loop through all of the arguments passed by the user to the FTS3/4    ** module (i.e. all the column names and special arguments). This loop @@ -1131,7 +1170,8 @@ static int fts3InitVtab(          { "uncompress", 10 },     /* 3 -> UNCOMPRESS */          { "order",       5 },     /* 4 -> ORDER */          { "content",     7 },     /* 5 -> CONTENT */ -        { "languageid", 10 }      /* 6 -> LANGUAGEID */ +        { "languageid", 10 },     /* 6 -> LANGUAGEID */ +        { "notindexed", 10 }      /* 7 -> NOTINDEXED */        };        int iOpt; @@ -1197,6 +1237,11 @@ static int fts3InitVtab(                zLanguageid = zVal;                zVal = 0;                break; + +            case 7:              /* NOTINDEXED */ +              azNotindexed[nNotindexed++] = zVal; +              zVal = 0; +              break;            }          }          sqlite3_free(zVal); @@ -1268,6 +1313,7 @@ static int fts3InitVtab(    nByte = sizeof(Fts3Table) +                  /* Fts3Table */            nCol * sizeof(char *) +              /* azColumn */            nIndex * sizeof(struct Fts3Index) +  /* aIndex */ +          nCol * sizeof(u8) +                  /* abNotindexed */            nName +                              /* zName */            nDb +                                /* zDb */            nString;                             /* Space for azColumn strings */ @@ -1287,7 +1333,7 @@ static int fts3InitVtab(    p->bHasStat = isFts4;    p->bFts4 = isFts4;    p->bDescIdx = bDescIdx; -  p->bAutoincrmerge = 0xff;   /* 0xff means setting unknown */ +  p->nAutoincrmerge = 0xff;   /* 0xff means setting unknown */    p->zContentTbl = zContent;    p->zLanguageid = zLanguageid;    zContent = 0; @@ -1301,9 +1347,10 @@ static int fts3InitVtab(    for(i=0; i<nIndex; i++){      fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);    } +  p->abNotindexed = (u8 *)&p->aIndex[nIndex];    /* Fill in the zName and zDb fields of the vtab structure. */ -  zCsr = (char *)&p->aIndex[nIndex]; +  zCsr = (char *)&p->abNotindexed[nCol];    p->zName = zCsr;    memcpy(zCsr, argv[2], nName);    zCsr += nName; @@ -1324,7 +1371,28 @@ static int fts3InitVtab(      assert( zCsr <= &((char *)p)[nByte] );    } -  if( (zCompress==0)!=(zUncompress==0) ){ +  /* Fill in the abNotindexed array */ +  for(iCol=0; iCol<nCol; iCol++){ +    int n = (int)strlen(p->azColumn[iCol]); +    for(i=0; i<nNotindexed; i++){ +      char *zNot = azNotindexed[i]; +      if( zNot && n==(int)strlen(zNot) +       && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n)  +      ){ +        p->abNotindexed[iCol] = 1; +        sqlite3_free(zNot); +        azNotindexed[i] = 0; +      } +    } +  } +  for(i=0; i<nNotindexed; i++){ +    if( azNotindexed[i] ){ +      *pzErr = sqlite3_mprintf("no such column: %s", azNotindexed[i]); +      rc = SQLITE_ERROR; +    } +  } + +  if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){      char const *zMiss = (zCompress==0 ? "compress" : "uncompress");      rc = SQLITE_ERROR;      *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss); @@ -1344,10 +1412,7 @@ static int fts3InitVtab(    ** addition of a %_stat table so that it can use incremental merge.    */    if( !isFts4 && !isCreate ){ -    int rc2 = SQLITE_OK; -    fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2", -               p->zDb, p->zName); -    if( rc2==SQLITE_OK ) p->bHasStat = 1; +    p->bHasStat = 2;    }    /* Figure out the page-size for the database. This is required in order to @@ -1365,7 +1430,9 @@ fts3_init_out:    sqlite3_free(zUncompress);    sqlite3_free(zContent);    sqlite3_free(zLanguageid); +  for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]);    sqlite3_free((void *)aCol); +  sqlite3_free((void *)azNotindexed);    if( rc!=SQLITE_OK ){      if( p ){        fts3DisconnectMethod((sqlite3_vtab *)p); @@ -1404,6 +1471,19 @@ static int fts3CreateMethod(    return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);  } +/* +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. +*/ +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 +  if( sqlite3_libversion_number()>=3008002 ){ +    pIdxInfo->estimatedRows = nRow; +  } +#endif +} +  /*   ** Implementation of the xBestIndex method for FTS3 tables. There  ** are three possible strategies, in order of preference: @@ -1416,23 +1496,40 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){    Fts3Table *p = (Fts3Table *)pVTab;    int i;                          /* Iterator variable */    int iCons = -1;                 /* Index of constraint to use */ +    int iLangidCons = -1;           /* Index of langid=x constraint, if present */ +  int iDocidGe = -1;              /* Index of docid>=x constraint, if present */ +  int iDocidLe = -1;              /* Index of docid<=x constraint, if present */ +  int iIdx;    /* By default use a full table scan. This is an expensive option,    ** so search through the constraints to see if a more efficient     ** strategy is possible.    */    pInfo->idxNum = FTS3_FULLSCAN_SEARCH; -  pInfo->estimatedCost = 500000; +  pInfo->estimatedCost = 5000000;    for(i=0; i<pInfo->nConstraint; i++){ +    int bDocid;                 /* True if this constraint is on docid */      struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; -    if( pCons->usable==0 ) continue; +    if( pCons->usable==0 ){ +      if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ +        /* There exists an unusable MATCH constraint. This means that if +        ** the planner does elect to use the results of this call as part +        ** of the overall query plan the user will see an "unable to use +        ** function MATCH in the requested context" error. To discourage +        ** this, return a very high cost here.  */ +        pInfo->idxNum = FTS3_FULLSCAN_SEARCH; +        pInfo->estimatedCost = 1e50; +        fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); +        return SQLITE_OK; +      } +      continue; +    } + +    bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);      /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ -    if( iCons<0  -     && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ  -     && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) -    ){ +    if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){        pInfo->idxNum = FTS3_DOCID_SEARCH;        pInfo->estimatedCost = 1.0;        iCons = i; @@ -1461,14 +1558,38 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){      ){        iLangidCons = i;      } + +    if( bDocid ){ +      switch( pCons->op ){ +        case SQLITE_INDEX_CONSTRAINT_GE: +        case SQLITE_INDEX_CONSTRAINT_GT: +          iDocidGe = i; +          break; + +        case SQLITE_INDEX_CONSTRAINT_LE: +        case SQLITE_INDEX_CONSTRAINT_LT: +          iDocidLe = i; +          break; +      } +    }    } +  iIdx = 1;    if( iCons>=0 ){ -    pInfo->aConstraintUsage[iCons].argvIndex = 1; +    pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;      pInfo->aConstraintUsage[iCons].omit = 1;    }     if( iLangidCons>=0 ){ -    pInfo->aConstraintUsage[iLangidCons].argvIndex = 2; +    pInfo->idxNum |= FTS3_HAVE_LANGID; +    pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; +  }  +  if( iDocidGe>=0 ){ +    pInfo->idxNum |= FTS3_HAVE_DOCID_GE; +    pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; +  }  +  if( iDocidLe>=0 ){ +    pInfo->idxNum |= FTS3_HAVE_DOCID_LE; +    pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;    }     /* Regardless of the strategy selected, FTS can deliver rows in rowid (or @@ -1646,10 +1767,10 @@ static int fts3ScanInteriorNode(      /* Load the next term on the node into zBuffer. Use realloc() to expand      ** the size of zBuffer if required.  */      if( !isFirstTerm ){ -      zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); +      zCsr += fts3GetVarint32(zCsr, &nPrefix);      }      isFirstTerm = 0; -    zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); +    zCsr += fts3GetVarint32(zCsr, &nSuffix);      if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){        rc = FTS_CORRUPT_VTAB; @@ -1737,7 +1858,7 @@ static int fts3SelectLeaf(    assert( piLeaf || piLeaf2 ); -  sqlite3Fts3GetVarint32(zNode, &iHeight); +  fts3GetVarint32(zNode, &iHeight);    rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);    assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); @@ -1939,11 +2060,11 @@ static void fts3PoslistMerge(      int iCol1;         /* The current column index in pp1 */      int iCol2;         /* The current column index in pp2 */ -    if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1); +    if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1);      else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;      else iCol1 = 0; -    if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2); +    if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2);      else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;      else iCol2 = 0; @@ -2036,11 +2157,11 @@ static int fts3PoslistPhraseMerge(    assert( p!=0 && *p1!=0 && *p2!=0 );    if( *p1==POS_COLUMN ){       p1++; -    p1 += sqlite3Fts3GetVarint32(p1, &iCol1); +    p1 += fts3GetVarint32(p1, &iCol1);    }    if( *p2==POS_COLUMN ){       p2++; -    p2 += sqlite3Fts3GetVarint32(p2, &iCol2); +    p2 += fts3GetVarint32(p2, &iCol2);    }    while( 1 ){ @@ -2090,9 +2211,9 @@ static int fts3PoslistPhraseMerge(        if( 0==*p1 || 0==*p2 ) break;        p1++; -      p1 += sqlite3Fts3GetVarint32(p1, &iCol1); +      p1 += fts3GetVarint32(p1, &iCol1);        p2++; -      p2 += sqlite3Fts3GetVarint32(p2, &iCol2); +      p2 += fts3GetVarint32(p2, &iCol2);      }      /* Advance pointer p1 or p2 (whichever corresponds to the smaller of @@ -2104,12 +2225,12 @@ static int fts3PoslistPhraseMerge(        fts3ColumnlistCopy(0, &p1);        if( 0==*p1 ) break;        p1++; -      p1 += sqlite3Fts3GetVarint32(p1, &iCol1); +      p1 += fts3GetVarint32(p1, &iCol1);      }else{        fts3ColumnlistCopy(0, &p2);        if( 0==*p2 ) break;        p2++; -      p2 += sqlite3Fts3GetVarint32(p2, &iCol2); +      p2 += fts3GetVarint32(p2, &iCol2);      }    } @@ -2916,6 +3037,33 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){  }  /* +** The following are copied from sqliteInt.h. +** +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. +*/ +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64  (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif + +/* +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. +*/ +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ +  if( pVal ){ +    int eType = sqlite3_value_numeric_type(pVal); +    if( eType==SQLITE_INTEGER ){ +      return sqlite3_value_int64(pVal); +    } +  } +  return iDefault; +} + +/*  ** This is the xFilter interface for the virtual table.  See  ** the virtual table xFilter method documentation for additional  ** information. @@ -2940,40 +3088,58 @@ static int fts3FilterMethod(  ){    int rc;    char *zSql;                     /* SQL statement used to access %_content */ +  int eSearch;    Fts3Table *p = (Fts3Table *)pCursor->pVtab;    Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; +  sqlite3_value *pCons = 0;       /* The MATCH or rowid constraint, if any */ +  sqlite3_value *pLangid = 0;     /* The "langid = ?" constraint, if any */ +  sqlite3_value *pDocidGe = 0;    /* The "docid >= ?" constraint, if any */ +  sqlite3_value *pDocidLe = 0;    /* The "docid <= ?" constraint, if any */ +  int iIdx; +    UNUSED_PARAMETER(idxStr);    UNUSED_PARAMETER(nVal); -  assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); -  assert( nVal==0 || nVal==1 || nVal==2 ); -  assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); +  eSearch = (idxNum & 0x0000FFFF); +  assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );    assert( p->pSegments==0 ); +  /* Collect arguments into local variables */ +  iIdx = 0; +  if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; +  if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; +  if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; +  if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; +  assert( iIdx==nVal ); +    /* In case the cursor has been used before, clear it now. */    sqlite3_finalize(pCsr->pStmt);    sqlite3_free(pCsr->aDoclist);    sqlite3Fts3ExprFree(pCsr->pExpr);    memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); +  /* Set the lower and upper bounds on docids to return */ +  pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); +  pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); +    if( idxStr ){      pCsr->bDesc = (idxStr[0]=='D');    }else{      pCsr->bDesc = p->bDescIdx;    } -  pCsr->eSearch = (i16)idxNum; +  pCsr->eSearch = (i16)eSearch; -  if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ -    int iCol = idxNum-FTS3_FULLTEXT_SEARCH; -    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); +  if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ +    int iCol = eSearch-FTS3_FULLTEXT_SEARCH; +    const char *zQuery = (const char *)sqlite3_value_text(pCons); -    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ +    if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){        return SQLITE_NOMEM;      }      pCsr->iLangid = 0; -    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]); +    if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);      assert( p->base.zErrMsg==0 );      rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, @@ -2984,11 +3150,7 @@ static int fts3FilterMethod(        return rc;      } -    rc = sqlite3Fts3ReadLock(p); -    if( rc!=SQLITE_OK ) return rc; -      rc = fts3EvalStart(pCsr); -      sqlite3Fts3SegmentsClose(p);      if( rc!=SQLITE_OK ) return rc;      pCsr->pNextId = pCsr->aDoclist; @@ -3000,7 +3162,7 @@ static int fts3FilterMethod(    ** full-text query or docid lookup, the statement retrieves a single    ** row by docid.    */ -  if( idxNum==FTS3_FULLSCAN_SEARCH ){ +  if( eSearch==FTS3_FULLSCAN_SEARCH ){      zSql = sqlite3_mprintf(          "SELECT %s ORDER BY rowid %s",          p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") @@ -3011,10 +3173,10 @@ static int fts3FilterMethod(      }else{        rc = SQLITE_NOMEM;      } -  }else if( idxNum==FTS3_DOCID_SEARCH ){ +  }else if( eSearch==FTS3_DOCID_SEARCH ){      rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);      if( rc==SQLITE_OK ){ -      rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); +      rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);      }    }    if( rc!=SQLITE_OK ) return rc; @@ -3142,7 +3304,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){    Fts3Table *p = (Fts3Table*)pVtab;    int rc = sqlite3Fts3PendingTermsFlush(p); -  if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){ +  if( rc==SQLITE_OK  +   && p->nLeafAdd>(nMinMerge/16)  +   && p->nAutoincrmerge && p->nAutoincrmerge!=0xff +  ){      int mxLevel = 0;              /* Maximum relative level value in db */      int A;                        /* Incr-merge parameter A */ @@ -3150,14 +3315,41 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){      assert( rc==SQLITE_OK || mxLevel==0 );      A = p->nLeafAdd * mxLevel;      A += (A/2); -    if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8); +    if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);    }    sqlite3Fts3SegmentsClose(p);    return rc;  }  /* -** Implementation of xBegin() method. This is a no-op. +** If it is currently unknown whether or not the FTS table has an %_stat +** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat +** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code +** if an error occurs. +*/ +static int fts3SetHasStat(Fts3Table *p){ +  int rc = SQLITE_OK; +  if( p->bHasStat==2 ){ +    const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'"; +    char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName); +    if( zSql ){ +      sqlite3_stmt *pStmt = 0; +      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); +      if( rc==SQLITE_OK ){ +        int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW); +        rc = sqlite3_finalize(pStmt); +        if( rc==SQLITE_OK ) p->bHasStat = bHasStat; +      } +      sqlite3_free(zSql); +    }else{ +      rc = SQLITE_NOMEM; +    } +  } +  return rc; +} + +/* +** Implementation of xBegin() method.   */  static int fts3BeginMethod(sqlite3_vtab *pVtab){    Fts3Table *p = (Fts3Table*)pVtab; @@ -3168,7 +3360,7 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){    TESTONLY( p->inTransaction = 1 );    TESTONLY( p->mxSavepoint = -1; );    p->nLeafAdd = 0; -  return SQLITE_OK; +  return fts3SetHasStat(p);  }  /* @@ -3417,6 +3609,10 @@ static int fts3RenameMethod(    sqlite3 *db = p->db;            /* Database connection */    int rc;                         /* Return Code */ +  /* At this point it must be known if the %_stat table exists or not. +  ** So bHasStat may not be 2.  */ +  rc = fts3SetHasStat(p); +      /* As it happens, the pending terms table is always empty here. This is    ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction     ** always opens a savepoint transaction. And the xSavepoint() method  @@ -3424,7 +3620,9 @@ static int fts3RenameMethod(    ** PendingTermsFlush() in in case that changes.    */    assert( p->nPendingData==0 ); -  rc = sqlite3Fts3PendingTermsFlush(p); +  if( rc==SQLITE_OK ){ +    rc = sqlite3Fts3PendingTermsFlush(p); +  }    if( p->zContentTbl==0 ){      fts3DbExec(&rc, db, @@ -3552,7 +3750,7 @@ static void hashDestroy(void *p){  */  void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);  void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE  void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);  #endif  #ifdef SQLITE_ENABLE_ICU @@ -3570,7 +3768,7 @@ int sqlite3Fts3Init(sqlite3 *db){    Fts3Hash *pHash = 0;    const sqlite3_tokenizer_module *pSimple = 0;    const sqlite3_tokenizer_module *pPorter = 0; -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE    const sqlite3_tokenizer_module *pUnicode = 0;  #endif @@ -3579,7 +3777,7 @@ int sqlite3Fts3Init(sqlite3 *db){    sqlite3Fts3IcuTokenizerModule(&pIcu);  #endif -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE    sqlite3Fts3UnicodeTokenizer(&pUnicode);  #endif @@ -3607,7 +3805,7 @@ int sqlite3Fts3Init(sqlite3 *db){      if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)       || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)  -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE       || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)   #endif  #ifdef SQLITE_ENABLE_ICU @@ -3906,6 +4104,12 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){  }  /* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + +/*  ** This function is called for each Fts3Phrase in a full-text query   ** expression to initialize the mechanism for returning rows. Once this  ** function has been called successfully on an Fts3Phrase, it may be @@ -3918,23 +4122,43 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){  ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.  */  static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ -  int rc;                         /* Error code */ -  Fts3PhraseToken *pFirst = &p->aToken[0];    Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; +  int rc = SQLITE_OK;             /* Error code */ +  int i; -  if( pCsr->bDesc==pTab->bDescIdx  -   && bOptOk==1  -   && p->nToken==1  -   && pFirst->pSegcsr  -   && pFirst->pSegcsr->bLookup  -   && pFirst->bFirst==0 -  ){ +  /* Determine if doclists may be loaded from disk incrementally. This is +  ** possible if the bOptOk argument is true, the FTS doclists will be +  ** scanned in forward order, and the phrase consists of  +  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" +  ** tokens or prefix tokens that cannot use a prefix-index.  */ +  int bHaveIncr = 0; +  int bIncrOk = (bOptOk  +   && pCsr->bDesc==pTab->bDescIdx  +   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST +   && pTab->bNoIncrDoclist==0 +#endif +  ); +  for(i=0; bIncrOk==1 && i<p->nToken; i++){ +    Fts3PhraseToken *pToken = &p->aToken[i]; +    if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ +      bIncrOk = 0; +    } +    if( pToken->pSegcsr ) bHaveIncr = 1; +  } + +  if( bIncrOk && bHaveIncr ){      /* Use the incremental approach. */      int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); -    rc = sqlite3Fts3MsrIncrStart( -        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); +    for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ +      Fts3PhraseToken *pToken = &p->aToken[i]; +      Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; +      if( pSegcsr ){ +        rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); +      } +    }      p->bIncr = 1; -    }else{      /* Load the full doclist for the phrase into memory. */      rc = fts3EvalPhraseLoad(pCsr, p); @@ -4044,15 +4268,125 @@ void sqlite3Fts3DoclistNext(  }  /* -** Attempt to move the phrase iterator to point to the next matching docid.  +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( +  Fts3Table *pTab, +  Fts3Doclist *pDL, +  u8 *pbEof +){ +  char *pIter;                            /* Used to iterate through aAll */ +  char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */ +  +  if( pDL->pNextDocid ){ +    pIter = pDL->pNextDocid; +  }else{ +    pIter = pDL->aAll; +  } + +  if( pIter>=pEnd ){ +    /* We have already reached the end of this doclist. EOF. */ +    *pbEof = 1; +  }else{ +    sqlite3_int64 iDelta; +    pIter += sqlite3Fts3GetVarint(pIter, &iDelta); +    if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ +      pDL->iDocid += iDelta; +    }else{ +      pDL->iDocid -= iDelta; +    } +    pDL->pList = pIter; +    fts3PoslistCopy(0, &pIter); +    pDL->nList = (int)(pIter - pDL->pList); + +    /* pIter now points just past the 0x00 that terminates the position- +    ** list for document pDL->iDocid. However, if this position-list was +    ** edited in place by fts3EvalNearTrim(), then pIter may not actually +    ** point to the start of the next docid value. The following line deals +    ** with this case by advancing pIter past the zero-padding added by +    ** fts3EvalNearTrim().  */ +    while( pIter<pEnd && *pIter==0 ) pIter++; + +    pDL->pNextDocid = pIter; +    assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); +    *pbEof = 0; +  } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { +  int bIgnore; +  sqlite3_int64 iDocid; +  char *pList; +  int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a  +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +**  ** If an error occurs, return an SQLite error code. Otherwise, return   ** SQLITE_OK. +*/ +static int incrPhraseTokenNext( +  Fts3Table *pTab,                /* Virtual table handle */ +  Fts3Phrase *pPhrase,            /* Phrase to advance token of */ +  int iToken,                     /* Specific token to advance */ +  TokenDoclist *p,                /* OUT: Docid and doclist for new entry */ +  u8 *pbEof                       /* OUT: True if iterator is at EOF */ +){ +  int rc = SQLITE_OK; + +  if( pPhrase->iDoclistToken==iToken ){ +    assert( p->bIgnore==0 ); +    assert( pPhrase->aToken[iToken].pSegcsr==0 ); +    fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); +    p->pList = pPhrase->doclist.pList; +    p->nList = pPhrase->doclist.nList; +    p->iDocid = pPhrase->doclist.iDocid; +  }else{ +    Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; +    assert( pToken->pDeferred==0 ); +    assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); +    if( pToken->pSegcsr ){ +      assert( p->bIgnore==0 ); +      rc = sqlite3Fts3MsrIncrNext( +          pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList +      ); +      if( p->pList==0 ) *pbEof = 1; +    }else{ +      p->bIgnore = 1; +    } +  } + +  return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +**   * features at least one token that uses an incremental doclist, and  +** +**   * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields.   **  ** If there is no "next" entry and no error occurs, then *pbEof is set to  ** 1 before returning. Otherwise, if no error occurs and the iterator is  ** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return  +** SQLITE_OK.  */ -static int fts3EvalPhraseNext( +static int fts3EvalIncrPhraseNext(    Fts3Cursor *pCsr,               /* FTS Cursor handle */    Fts3Phrase *p,                  /* Phrase object to advance to next docid */    u8 *pbEof                       /* OUT: Set to 1 if EOF */ @@ -4060,57 +4394,116 @@ static int fts3EvalPhraseNext(    int rc = SQLITE_OK;    Fts3Doclist *pDL = &p->doclist;    Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; +  u8 bEof = 0; -  if( p->bIncr ){ -    assert( p->nToken==1 ); -    assert( pDL->pNextDocid==0 ); +  /* This is only called if it is guaranteed that the phrase has at least +  ** one incremental token. In which case the bIncr flag is set. */ +  assert( p->bIncr==1 ); + +  if( p->nToken==1 && p->bIncr ){      rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,           &pDL->iDocid, &pDL->pList, &pDL->nList      ); -    if( rc==SQLITE_OK && !pDL->pList ){ -      *pbEof = 1; +    if( pDL->pList==0 ) bEof = 1; +  }else{ +    int bDescDoclist = pCsr->bDesc; +    struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + +    memset(a, 0, sizeof(a)); +    assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); +    assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS ); + +    while( bEof==0 ){ +      int bMaxSet = 0; +      sqlite3_int64 iMax = 0;     /* Largest docid for all iterators */ +      int i;                      /* Used to iterate through tokens */ + +      /* Advance the iterator for each token in the phrase once. */ +      for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){ +        rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); +        if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ +          iMax = a[i].iDocid; +          bMaxSet = 1; +        } +      } +      assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); +      assert( rc!=SQLITE_OK || bMaxSet ); + +      /* Keep advancing iterators until they all point to the same document */ +      for(i=0; i<p->nToken; i++){ +        while( rc==SQLITE_OK && bEof==0  +            && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0  +        ){ +          rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); +          if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ +            iMax = a[i].iDocid; +            i = 0; +          } +        } +      } + +      /* Check if the current entries really are a phrase match */ +      if( bEof==0 ){ +        int nList = 0; +        int nByte = a[p->nToken-1].nList; +        char *aDoclist = sqlite3_malloc(nByte+1); +        if( !aDoclist ) return SQLITE_NOMEM; +        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + +        for(i=0; i<(p->nToken-1); i++){ +          if( a[i].bIgnore==0 ){ +            char *pL = a[i].pList; +            char *pR = aDoclist; +            char *pOut = aDoclist; +            int nDist = p->nToken-1-i; +            int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); +            if( res==0 ) break; +            nList = (int)(pOut - aDoclist); +          } +        } +        if( i==(p->nToken-1) ){ +          pDL->iDocid = iMax; +          pDL->pList = aDoclist; +          pDL->nList = nList; +          pDL->bFreeList = 1; +          break; +        } +        sqlite3_free(aDoclist); +      }      } +  } + +  *pbEof = bEof; +  return rc; +} + +/* +** Attempt to move the phrase iterator to point to the next matching docid.  +** If an error occurs, return an SQLite error code. Otherwise, return  +** SQLITE_OK. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +*/ +static int fts3EvalPhraseNext( +  Fts3Cursor *pCsr,               /* FTS Cursor handle */ +  Fts3Phrase *p,                  /* Phrase object to advance to next docid */ +  u8 *pbEof                       /* OUT: Set to 1 if EOF */ +){ +  int rc = SQLITE_OK; +  Fts3Doclist *pDL = &p->doclist; +  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + +  if( p->bIncr ){ +    rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);    }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){      sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,           &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof      );      pDL->pList = pDL->pNextDocid;    }else{ -    char *pIter;                            /* Used to iterate through aAll */ -    char *pEnd = &pDL->aAll[pDL->nAll];     /* 1 byte past end of aAll */ -    if( pDL->pNextDocid ){ -      pIter = pDL->pNextDocid; -    }else{ -      pIter = pDL->aAll; -    } - -    if( pIter>=pEnd ){ -      /* We have already reached the end of this doclist. EOF. */ -      *pbEof = 1; -    }else{ -      sqlite3_int64 iDelta; -      pIter += sqlite3Fts3GetVarint(pIter, &iDelta); -      if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ -        pDL->iDocid += iDelta; -      }else{ -        pDL->iDocid -= iDelta; -      } -      pDL->pList = pIter; -      fts3PoslistCopy(0, &pIter); -      pDL->nList = (int)(pIter - pDL->pList); - -      /* pIter now points just past the 0x00 that terminates the position- -      ** list for document pDL->iDocid. However, if this position-list was -      ** edited in place by fts3EvalNearTrim(), then pIter may not actually -      ** point to the start of the next docid value. The following line deals -      ** with this case by advancing pIter past the zero-padding added by -      ** fts3EvalNearTrim().  */ -      while( pIter<pEnd && *pIter==0 ) pIter++; - -      pDL->pNextDocid = pIter; -      assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); -      *pbEof = 0; -    } +    fts3EvalDlPhraseNext(pTab, pDL, pbEof);    }    return rc; @@ -4135,7 +4528,6 @@ static int fts3EvalPhraseNext(  static void fts3EvalStartReaders(    Fts3Cursor *pCsr,               /* FTS Cursor handle */    Fts3Expr *pExpr,                /* Expression to initialize phrases in */ -  int bOptOk,                     /* True to enable incremental loading */    int *pRc                        /* IN/OUT: Error code */  ){    if( pExpr && SQLITE_OK==*pRc ){ @@ -4146,10 +4538,10 @@ static void fts3EvalStartReaders(          if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;        }        pExpr->bDeferred = (i==nToken); -      *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); +      *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);      }else{ -      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); -      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); +      fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); +      fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);        pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);      }    } @@ -4391,7 +4783,7 @@ static int fts3EvalSelectDeferred(        ** overflowing the 32-bit integer it is stored in. */        if( ii<12 ) nLoad4 = nLoad4*4; -      if( ii==0 || pTC->pPhrase->nToken>1 ){ +      if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){          /* Either this is the cheapest token in the entire query, or it is          ** part of a multi-token phrase. Either way, the entire doclist will          ** (eventually) be loaded into memory. It may as well be now. */ @@ -4471,7 +4863,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){    }  #endif -  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc); +  fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);    return rc;  } @@ -4954,6 +5346,16 @@ static int fts3EvalNext(Fts3Cursor *pCsr){        pCsr->iPrevId = pExpr->iDocid;      }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );    } + +  /* Check if the cursor is past the end of the docid range specified +  ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag.  */ +  if( rc==SQLITE_OK && ( +        (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) +     || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid) +  )){ +    pCsr->isEof = 1; +  } +    return rc;  } @@ -4977,12 +5379,16 @@ static void fts3EvalRestart(      if( pPhrase ){        fts3EvalInvalidatePoslist(pPhrase);        if( pPhrase->bIncr ){ -        assert( pPhrase->nToken==1 ); -        assert( pPhrase->aToken[0].pSegcsr ); -        sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr); +        int i; +        for(i=0; i<pPhrase->nToken; i++){ +          Fts3PhraseToken *pToken = &pPhrase->aToken[i]; +          assert( pToken->pDeferred==0 ); +          if( pToken->pSegcsr ){ +            sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); +          } +        }          *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);        } -        pPhrase->doclist.pNextDocid = 0;        pPhrase->doclist.iDocid = 0;      } @@ -5027,7 +5433,7 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr){          pExpr->aMI[iCol*3 + 2] += (iCnt>0);          if( *p==0x00 ) break;          p++; -        p += sqlite3Fts3GetVarint32(p, &iCol); +        p += fts3GetVarint32(p, &iCol);        }      } @@ -5231,15 +5637,23 @@ int sqlite3Fts3EvalPhrasePoslist(    pIter = pPhrase->doclist.pList;    if( iDocid!=pCsr->iPrevId || pExpr->bEof ){      int bDescDoclist = pTab->bDescIdx;      /* For DOCID_CMP macro */ +    int iMul;                     /* +1 if csr dir matches index dir, else -1 */      int bOr = 0;      u8 bEof = 0; -    Fts3Expr *p; +    u8 bTreeEof = 0; +    Fts3Expr *p;                  /* Used to iterate from pExpr to root */ +    Fts3Expr *pNear;              /* Most senior NEAR ancestor (or pExpr) */      /* Check if this phrase descends from an OR expression node. If not,       ** return NULL. Otherwise, the entry that corresponds to docid  -    ** pCsr->iPrevId may lie earlier in the doclist buffer. */ +    ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the +    ** tree that the node is part of has been marked as EOF, but the node +    ** itself is not EOF, then it may point to an earlier entry. */ +    pNear = pExpr;      for(p=pExpr->pParent; p; p=p->pParent){        if( p->eType==FTSQUERY_OR ) bOr = 1; +      if( p->eType==FTSQUERY_NEAR ) pNear = p; +      if( p->bEof ) bTreeEof = 1;      }      if( bOr==0 ) return SQLITE_OK; @@ -5258,29 +5672,59 @@ int sqlite3Fts3EvalPhrasePoslist(        assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );        if( rc!=SQLITE_OK ) return rc;      } - -    if( pExpr->bEof ){ -      pIter = 0; -      iDocid = 0; +     +    iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); +    while( bTreeEof==1  +        && pNear->bEof==0 +        && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 +    ){ +      int rc = SQLITE_OK; +      fts3EvalNextRow(pCsr, pExpr, &rc); +      if( rc!=SQLITE_OK ) return rc; +      iDocid = pExpr->iDocid; +      pIter = pPhrase->doclist.pList;      } +      bEof = (pPhrase->doclist.nAll==0);      assert( bDescDoclist==0 || bDescDoclist==1 );      assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); -    if( pCsr->bDesc==bDescDoclist ){ -      int dummy; -      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ -        sqlite3Fts3DoclistPrev( -            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,  -            &pIter, &iDocid, &dummy, &bEof -        ); -      } -    }else{ -      while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ -        sqlite3Fts3DoclistNext( -            bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,  -            &pIter, &iDocid, &bEof -        ); +    if( bEof==0 ){ +      if( pCsr->bDesc==bDescDoclist ){ +        int dummy; +        if( pNear->bEof ){ +          /* This expression is already at EOF. So position it to point to the +          ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable +          ** iDocid is already set for this entry, so all that is required is +          ** to set pIter to point to the first byte of the last position-list +          ** in the doclist.  +          ** +          ** It would also be correct to set pIter and iDocid to zero. In +          ** this case, the first call to sqltie3Fts4DoclistPrev() below +          ** would also move the iterator to point to the last entry in the  +          ** doclist. However, this is expensive, as to do so it has to  +          ** iterate through the entire doclist from start to finish (since +          ** it does not know the docid for the last entry).  */ +          pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; +          fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); +        } +        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ +          sqlite3Fts3DoclistPrev( +              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,  +              &pIter, &iDocid, &dummy, &bEof +          ); +        } +      }else{ +        if( pNear->bEof ){ +          pIter = 0; +          iDocid = 0; +        } +        while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ +          sqlite3Fts3DoclistNext( +              bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,  +              &pIter, &iDocid, &bEof +          ); +        }        }      } @@ -5290,7 +5734,7 @@ int sqlite3Fts3EvalPhrasePoslist(    if( *pIter==0x01 ){      pIter++; -    pIter += sqlite3Fts3GetVarint32(pIter, &iThis); +    pIter += fts3GetVarint32(pIter, &iThis);    }else{      iThis = 0;    } @@ -5298,7 +5742,7 @@ int sqlite3Fts3EvalPhrasePoslist(      fts3ColumnlistCopy(0, &pIter);      if( *pIter==0x00 ) return 0;      pIter++; -    pIter += sqlite3Fts3GetVarint32(pIter, &iThis); +    pIter += fts3GetVarint32(pIter, &iThis);    }    *ppOut = ((iCol==iThis)?pIter:0); @@ -5339,7 +5783,10 @@ int sqlite3Fts3Corrupt(){  /*  ** Initialize API pointer table, if required.  */ -int sqlite3_extension_init( +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fts3_init(    sqlite3 *db,     char **pzErrMsg,    const sqlite3_api_routines *pApi diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index b19064c..b2827b7 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -32,7 +32,7 @@  /* If not building as part of the core, include sqlite3ext.h. */  #ifndef SQLITE_CORE  # include "sqlite3ext.h"  -extern const sqlite3_api_routines *sqlite3_api; +SQLITE_EXTENSION_INIT3  #endif  #include "sqlite3.h" @@ -40,6 +40,18 @@ extern const sqlite3_api_routines *sqlite3_api;  #include "fts3_hash.h"  /* +** This constant determines the maximum depth of an FTS expression tree +** that the library will create and use. FTS uses recursion to perform  +** various operations on the query tree, so the disadvantage of a large +** limit is that it may allow very large queries to use large amounts +** of stack space (perhaps causing a stack overflow). +*/ +#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH +# define SQLITE_FTS3_MAX_EXPR_DEPTH 12 +#endif + + +/*  ** This constant controls how often segments are merged. Once there are  ** FTS3_MERGE_COUNT segments of level N, they are merged into a single  ** segment of level N+1. @@ -194,23 +206,24 @@ struct Fts3Table {    const char *zName;              /* virtual table name */    int nColumn;                    /* number of named columns in virtual table */    char **azColumn;                /* column names.  malloced */ +  u8 *abNotindexed;               /* True for 'notindexed' columns */    sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */    char *zContentTbl;              /* content=xxx option, or NULL */    char *zLanguageid;              /* languageid=xxx option, or NULL */ -  u8 bAutoincrmerge;              /* True if automerge=1 */ +  int nAutoincrmerge;             /* Value configured by 'automerge' */    u32 nLeafAdd;                   /* Number of leaf blocks added this trans */    /* Precompiled statements used by the implementation. Each of these     ** statements is run and reset within a single virtual table API call.     */ -  sqlite3_stmt *aStmt[37]; +  sqlite3_stmt *aStmt[40];    char *zReadExprlist;    char *zWriteExprlist;    int nNodeSize;                  /* Soft limit for node size */    u8 bFts4;                       /* True for FTS4, false for FTS3 */ -  u8 bHasStat;                    /* True if %_stat table exists */ +  u8 bHasStat;                    /* True if %_stat table exists (2==unknown) */    u8 bHasDocsize;                 /* True if %_docsize table exists */    u8 bDescIdx;                    /* True if doclists are in reverse order */    u8 bIgnoreSavepoint;            /* True to ignore xSavepoint invocations */ @@ -254,6 +267,12 @@ struct Fts3Table {    int inTransaction;     /* True after xBegin but before xCommit/xRollback */    int mxSavepoint;       /* Largest valid xSavepoint integer */  #endif + +#ifdef SQLITE_TEST +  /* True to disable the incremental doclist optimization. This is controled +  ** by special insert command 'test-no-incr-doclist'.  */ +  int bNoIncrDoclist; +#endif  };  /* @@ -279,7 +298,8 @@ struct Fts3Cursor {    int eEvalmode;                  /* An FTS3_EVAL_XX constant */    int nRowAvg;                    /* Average size of database rows, in pages */    sqlite3_int64 nDoc;             /* Documents in table */ - +  i64 iMinDocid;                  /* Minimum docid to return */ +  i64 iMaxDocid;                  /* Maximum docid to return */    int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */    u32 *aMatchinfo;                /* Information about most recent match */    int nMatchinfo;                 /* Number of elements in aMatchinfo[] */ @@ -309,6 +329,15 @@ struct Fts3Cursor {  #define FTS3_DOCID_SEARCH    1    /* Lookup by rowid on %_content table */  #define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */ +/* +** The lower 16-bits of the sqlite3_index_info.idxNum value set by +** the xBestIndex() method contains the Fts3Cursor.eSearch value described +** above. The upper 16-bits contain a combination of the following +** bits, used to describe extra constraints on full-text searches. +*/ +#define FTS3_HAVE_LANGID    0x00010000      /* languageid=? */ +#define FTS3_HAVE_DOCID_GE  0x00020000      /* docid>=? */ +#define FTS3_HAVE_DOCID_LE  0x00040000      /* docid<=? */  struct Fts3Doclist {    char *aAll;                    /* Array containing doclist (or NULL) */ @@ -421,7 +450,6 @@ int sqlite3Fts3SegReaderPending(    Fts3Table*,int,const char*,int,int,Fts3SegReader**);  void sqlite3Fts3SegReaderFree(Fts3SegReader *);  int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); -int sqlite3Fts3ReadLock(Fts3Table *);  int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);  int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); @@ -496,6 +524,10 @@ struct Fts3MultiSegReader {  int sqlite3Fts3Incrmerge(Fts3Table*,int,int); +#define fts3GetVarint32(p, piVal) (                                           \ +  (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ +) +  /* fts3.c */  int sqlite3Fts3PutVarint(char *, sqlite3_int64);  int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); @@ -553,7 +585,7 @@ int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);  int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);  /* fts3_unicode2.c (functions generated by parsing unicode text files) */ -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE  int sqlite3FtsUnicodeFold(int, int);  int sqlite3FtsUnicodeIsalnum(int);  int sqlite3FtsUnicodeIsdiacritic(int); diff --git a/ext/fts3/fts3_aux.c b/ext/fts3/fts3_aux.c index 9b582fc..c68b1a9 100644 --- a/ext/fts3/fts3_aux.c +++ b/ext/fts3/fts3_aux.c @@ -31,6 +31,7 @@ struct Fts3auxCursor {    Fts3SegFilter filter;    char *zStop;    int nStop;                      /* Byte-length of string zStop */ +  int iLangid;                    /* Language id to query */    int isEof;                      /* True if cursor is at EOF */    sqlite3_int64 iRowid;           /* Current rowid */ @@ -45,7 +46,8 @@ struct Fts3auxCursor {  /*  ** Schema of the terms table.  */ -#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)" +#define FTS3_AUX_SCHEMA \ +  "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"  /*  ** This function does all the work for both the xConnect and xCreate methods. @@ -92,7 +94,7 @@ static int fts3auxConnectMethod(    }    nFts3 = (int)strlen(zFts3); -  rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA); +  rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);    if( rc!=SQLITE_OK ) return rc;    nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2; @@ -152,6 +154,8 @@ static int fts3auxBestIndexMethod(    int iEq = -1;    int iGe = -1;    int iLe = -1; +  int iLangid = -1; +  int iNext = 1;                  /* Next free argvIndex value */    UNUSED_PARAMETER(pVTab); @@ -163,36 +167,48 @@ static int fts3auxBestIndexMethod(      pInfo->orderByConsumed = 1;    } -  /* Search for equality and range constraints on the "term" column. */ +  /* Search for equality and range constraints on the "term" column.  +  ** And equality constraints on the hidden "languageid" column. */    for(i=0; i<pInfo->nConstraint; i++){ -    if( pInfo->aConstraint[i].usable && pInfo->aConstraint[i].iColumn==0 ){ +    if( pInfo->aConstraint[i].usable ){        int op = pInfo->aConstraint[i].op; -      if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; -      if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; -      if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; -      if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; -      if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; +      int iCol = pInfo->aConstraint[i].iColumn; + +      if( iCol==0 ){ +        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i; +        if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i; +        if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i; +        if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i; +        if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i; +      } +      if( iCol==4 ){ +        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i; +      }      }    }    if( iEq>=0 ){      pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT; -    pInfo->aConstraintUsage[iEq].argvIndex = 1; +    pInfo->aConstraintUsage[iEq].argvIndex = iNext++;      pInfo->estimatedCost = 5;    }else{      pInfo->idxNum = 0;      pInfo->estimatedCost = 20000;      if( iGe>=0 ){        pInfo->idxNum += FTS4AUX_GE_CONSTRAINT; -      pInfo->aConstraintUsage[iGe].argvIndex = 1; +      pInfo->aConstraintUsage[iGe].argvIndex = iNext++;        pInfo->estimatedCost /= 2;      }      if( iLe>=0 ){        pInfo->idxNum += FTS4AUX_LE_CONSTRAINT; -      pInfo->aConstraintUsage[iLe].argvIndex = 1 + (iGe>=0); +      pInfo->aConstraintUsage[iLe].argvIndex = iNext++;        pInfo->estimatedCost /= 2;      }    } +  if( iLangid>=0 ){ +    pInfo->aConstraintUsage[iLangid].argvIndex = iNext++; +    pInfo->estimatedCost--; +  }    return SQLITE_OK;  } @@ -352,7 +368,14 @@ static int fts3auxFilterMethod(    Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;    Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;    int rc; -  int isScan; +  int isScan = 0; +  int iLangVal = 0;               /* Language id to query */ + +  int iEq = -1;                   /* Index of term=? value in apVal */ +  int iGe = -1;                   /* Index of term>=? value in apVal */ +  int iLe = -1;                   /* Index of term<=? value in apVal */ +  int iLangid = -1;               /* Index of languageid=? value in apVal */ +  int iNext = 0;    UNUSED_PARAMETER(nVal);    UNUSED_PARAMETER(idxStr); @@ -362,7 +385,21 @@ static int fts3auxFilterMethod(         || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT         || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)    ); -  isScan = (idxNum!=FTS4AUX_EQ_CONSTRAINT); + +  if( idxNum==FTS4AUX_EQ_CONSTRAINT ){ +    iEq = iNext++; +  }else{ +    isScan = 1; +    if( idxNum & FTS4AUX_GE_CONSTRAINT ){ +      iGe = iNext++; +    } +    if( idxNum & FTS4AUX_LE_CONSTRAINT ){ +      iLe = iNext++; +    } +  } +  if( iNext<nVal ){ +    iLangid = iNext++; +  }    /* In case this cursor is being reused, close and zero it. */    testcase(pCsr->filter.zTerm); @@ -374,22 +411,35 @@ static int fts3auxFilterMethod(    pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;    if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN; -  if( idxNum&(FTS4AUX_EQ_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ){ +  if( iEq>=0 || iGe>=0 ){      const unsigned char *zStr = sqlite3_value_text(apVal[0]); +    assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );      if( zStr ){        pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);        pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);        if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;      }    } -  if( idxNum&FTS4AUX_LE_CONSTRAINT ){ -    int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0; -    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx])); -    pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]); + +  if( iLe>=0 ){ +    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe])); +    pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);      if( pCsr->zStop==0 ) return SQLITE_NOMEM;    } +   +  if( iLangid>=0 ){ +    iLangVal = sqlite3_value_int(apVal[iLangid]); + +    /* If the user specified a negative value for the languageid, use zero +    ** instead. This works, as the "languageid=?" constraint will also +    ** be tested by the VDBE layer. The test will always be false (since +    ** this module will not return a row with a negative languageid), and +    ** so the overall query will return zero rows.  */ +    if( iLangVal<0 ) iLangVal = 0; +  } +  pCsr->iLangid = iLangVal; -  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL, +  rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,        pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr    );    if( rc==SQLITE_OK ){ @@ -413,24 +463,37 @@ static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){  */  static int fts3auxColumnMethod(    sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */ -  sqlite3_context *pContext,      /* Context for sqlite3_result_xxx() calls */ +  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */    int iCol                        /* Index of column to read value from */  ){    Fts3auxCursor *p = (Fts3auxCursor *)pCursor;    assert( p->isEof==0 ); -  if( iCol==0 ){        /* Column "term" */ -    sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); -  }else if( iCol==1 ){  /* Column "col" */ -    if( p->iCol ){ -      sqlite3_result_int(pContext, p->iCol-1); -    }else{ -      sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC); -    } -  }else if( iCol==2 ){  /* Column "documents" */ -    sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc); -  }else{                /* Column "occurrences" */ -    sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc); +  switch( iCol ){ +    case 0: /* term */ +      sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT); +      break; + +    case 1: /* col */ +      if( p->iCol ){ +        sqlite3_result_int(pCtx, p->iCol-1); +      }else{ +        sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC); +      } +      break; + +    case 2: /* documents */ +      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc); +      break; + +    case 3: /* occurrences */ +      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc); +      break; + +    default: /* languageid */ +      assert( iCol==4 ); +      sqlite3_result_int(pCtx, p->iLangid); +      break;    }    return SQLITE_OK; diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index c046d7d..f5d28cb 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -155,6 +155,11 @@ int sqlite3Fts3OpenTokenizer(    return rc;  } +/* +** Function getNextNode(), which is called by fts3ExprParse(), may itself +** call fts3ExprParse(). So this forward declaration is required. +*/ +static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);  /*  ** Extract the next token from buffer z (length n) using the tokenizer @@ -180,9 +185,16 @@ static int getNextToken(    int rc;    sqlite3_tokenizer_cursor *pCursor;    Fts3Expr *pRet = 0; -  int nConsumed = 0; +  int i = 0; + +  /* Set variable i to the maximum number of bytes of input to tokenize. */ +  for(i=0; i<n; i++){ +    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break; +    if( z[i]=='*' || z[i]=='"' ) break; +  } -  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); +  *pnConsumed = i; +  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);    if( rc==SQLITE_OK ){      const char *zToken;      int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; @@ -223,13 +235,14 @@ static int getNextToken(          }        } -      nConsumed = iEnd; +      *pnConsumed = iEnd; +    }else if( i && rc==SQLITE_DONE ){ +      rc = SQLITE_OK;      }      pModule->xClose(pCursor);    } -  *pnConsumed = nConsumed;    *ppExpr = pRet;    return rc;  } @@ -370,12 +383,6 @@ no_mem:  }  /* -** Function getNextNode(), which is called by fts3ExprParse(), may itself -** call fts3ExprParse(). So this forward declaration is required. -*/ -static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); - -/*  ** The output variable *ppExpr is populated with an allocated Fts3Expr   ** structure, or set to 0 if the end of the input buffer is reached.  ** @@ -471,27 +478,6 @@ static int getNextNode(      }    } -  /* Check for an open bracket. */ -  if( sqlite3_fts3_enable_parentheses ){ -    if( *zInput=='(' ){ -      int nConsumed; -      pParse->nNest++; -      rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); -      if( rc==SQLITE_OK && !*ppExpr ){ -        rc = SQLITE_DONE; -      } -      *pnConsumed = (int)((zInput - z) + 1 + nConsumed); -      return rc; -    } -   -    /* Check for a close bracket. */ -    if( *zInput==')' ){ -      pParse->nNest--; -      *pnConsumed = (int)((zInput - z) + 1); -      return SQLITE_DONE; -    } -  } -    /* See if we are dealing with a quoted phrase. If this is the case, then    ** search for the closing quote and pass the whole string to getNextString()    ** for processing. This is easy to do, as fts3 has no syntax for escaping @@ -506,6 +492,21 @@ static int getNextNode(      return getNextString(pParse, &zInput[1], ii-1, ppExpr);    } +  if( sqlite3_fts3_enable_parentheses ){ +    if( *zInput=='(' ){ +      int nConsumed = 0; +      pParse->nNest++; +      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); +      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } +      *pnConsumed = (int)(zInput - z) + 1 + nConsumed; +      return rc; +    }else if( *zInput==')' ){ +      pParse->nNest--; +      *pnConsumed = (int)((zInput - z) + 1); +      *ppExpr = 0; +      return SQLITE_DONE; +    } +  }    /* If control flows to this point, this must be a regular token, or     ** the end of the input. Read a regular token using the sqlite3_tokenizer @@ -624,96 +625,100 @@ static int fts3ExprParse(    while( rc==SQLITE_OK ){      Fts3Expr *p = 0;      int nByte = 0; +      rc = getNextNode(pParse, zIn, nIn, &p, &nByte); +    assert( nByte>0 || (rc!=SQLITE_OK && p==0) );      if( rc==SQLITE_OK ){ -      int isPhrase; - -      if( !sqlite3_fts3_enable_parentheses  -       && p->eType==FTSQUERY_PHRASE && pParse->isNot  -      ){ -        /* Create an implicit NOT operator. */ -        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); -        if( !pNot ){ -          sqlite3Fts3ExprFree(p); -          rc = SQLITE_NOMEM; -          goto exprparse_out; -        } -        pNot->eType = FTSQUERY_NOT; -        pNot->pRight = p; -        p->pParent = pNot; -        if( pNotBranch ){ -          pNot->pLeft = pNotBranch; -          pNotBranch->pParent = pNot; -        } -        pNotBranch = pNot; -        p = pPrev; -      }else{ -        int eType = p->eType; -        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); - -        /* The isRequirePhrase variable is set to true if a phrase or -        ** an expression contained in parenthesis is required. If a -        ** binary operator (AND, OR, NOT or NEAR) is encounted when -        ** isRequirePhrase is set, this is a syntax error. -        */ -        if( !isPhrase && isRequirePhrase ){ -          sqlite3Fts3ExprFree(p); -          rc = SQLITE_ERROR; -          goto exprparse_out; -        } -   -        if( isPhrase && !isRequirePhrase ){ -          /* Insert an implicit AND operator. */ -          Fts3Expr *pAnd; -          assert( pRet && pPrev ); -          pAnd = fts3MallocZero(sizeof(Fts3Expr)); -          if( !pAnd ){ +      if( p ){ +        int isPhrase; + +        if( !sqlite3_fts3_enable_parentheses  +            && p->eType==FTSQUERY_PHRASE && pParse->isNot  +        ){ +          /* Create an implicit NOT operator. */ +          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); +          if( !pNot ){              sqlite3Fts3ExprFree(p);              rc = SQLITE_NOMEM;              goto exprparse_out;            } -          pAnd->eType = FTSQUERY_AND; -          insertBinaryOperator(&pRet, pPrev, pAnd); -          pPrev = pAnd; -        } +          pNot->eType = FTSQUERY_NOT; +          pNot->pRight = p; +          p->pParent = pNot; +          if( pNotBranch ){ +            pNot->pLeft = pNotBranch; +            pNotBranch->pParent = pNot; +          } +          pNotBranch = pNot; +          p = pPrev; +        }else{ +          int eType = p->eType; +          isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + +          /* The isRequirePhrase variable is set to true if a phrase or +          ** an expression contained in parenthesis is required. If a +          ** binary operator (AND, OR, NOT or NEAR) is encounted when +          ** isRequirePhrase is set, this is a syntax error. +          */ +          if( !isPhrase && isRequirePhrase ){ +            sqlite3Fts3ExprFree(p); +            rc = SQLITE_ERROR; +            goto exprparse_out; +          } + +          if( isPhrase && !isRequirePhrase ){ +            /* Insert an implicit AND operator. */ +            Fts3Expr *pAnd; +            assert( pRet && pPrev ); +            pAnd = fts3MallocZero(sizeof(Fts3Expr)); +            if( !pAnd ){ +              sqlite3Fts3ExprFree(p); +              rc = SQLITE_NOMEM; +              goto exprparse_out; +            } +            pAnd->eType = FTSQUERY_AND; +            insertBinaryOperator(&pRet, pPrev, pAnd); +            pPrev = pAnd; +          } -        /* This test catches attempts to make either operand of a NEAR -        ** operator something other than a phrase. For example, either of -        ** the following: -        ** -        **    (bracketed expression) NEAR phrase -        **    phrase NEAR (bracketed expression) -        ** -        ** Return an error in either case. -        */ -        if( pPrev && ( +          /* This test catches attempts to make either operand of a NEAR +           ** operator something other than a phrase. For example, either of +           ** the following: +           ** +           **    (bracketed expression) NEAR phrase +           **    phrase NEAR (bracketed expression) +           ** +           ** Return an error in either case. +           */ +          if( pPrev && (              (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)           || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) -        )){ -          sqlite3Fts3ExprFree(p); -          rc = SQLITE_ERROR; -          goto exprparse_out; -        } -   -        if( isPhrase ){ -          if( pRet ){ -            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); -            pPrev->pRight = p; -            p->pParent = pPrev; +          )){ +            sqlite3Fts3ExprFree(p); +            rc = SQLITE_ERROR; +            goto exprparse_out; +          } + +          if( isPhrase ){ +            if( pRet ){ +              assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); +              pPrev->pRight = p; +              p->pParent = pPrev; +            }else{ +              pRet = p; +            }            }else{ -            pRet = p; +            insertBinaryOperator(&pRet, pPrev, p);            } -        }else{ -          insertBinaryOperator(&pRet, pPrev, p); +          isRequirePhrase = !isPhrase;          } -        isRequirePhrase = !isPhrase; +        pPrev = p;        }        assert( nByte>0 );      }      assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );      nIn -= nByte;      zIn += nByte; -    pPrev = p;    }    if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ @@ -1000,17 +1005,16 @@ int sqlite3Fts3ExprParse(    Fts3Expr **ppExpr,                  /* OUT: Parsed query structure */    char **pzErr                        /* OUT: Error message (sqlite3_malloc) */  ){ -  static const int MAX_EXPR_DEPTH = 12;    int rc = fts3ExprParseUnbalanced(        pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr    );    /* Rebalance the expression. And check that its depth does not exceed -  ** MAX_EXPR_DEPTH.  */ +  ** SQLITE_FTS3_MAX_EXPR_DEPTH.  */    if( rc==SQLITE_OK && *ppExpr ){ -    rc = fts3ExprBalance(ppExpr, MAX_EXPR_DEPTH); +    rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);      if( rc==SQLITE_OK ){ -      rc = fts3ExprCheckDepth(*ppExpr, MAX_EXPR_DEPTH); +      rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);      }    } @@ -1019,7 +1023,8 @@ int sqlite3Fts3ExprParse(      *ppExpr = 0;      if( rc==SQLITE_TOOBIG ){        *pzErr = sqlite3_mprintf( -          "FTS expression tree is too large (maximum depth %d)", MAX_EXPR_DEPTH +          "FTS expression tree is too large (maximum depth %d)",  +          SQLITE_FTS3_MAX_EXPR_DEPTH        );        rc = SQLITE_ERROR;      }else if( rc==SQLITE_ERROR ){ diff --git a/ext/fts3/fts3_hash.c b/ext/fts3/fts3_hash.c index 57c59b5..1a32a53 100644 --- a/ext/fts3/fts3_hash.c +++ b/ext/fts3/fts3_hash.c @@ -96,13 +96,13 @@ void sqlite3Fts3HashClear(Fts3Hash *pH){  */  static int fts3StrHash(const void *pKey, int nKey){    const char *z = (const char *)pKey; -  int h = 0; +  unsigned h = 0;    if( nKey<=0 ) nKey = (int) strlen(z);    while( nKey > 0  ){      h = (h<<3) ^ h ^ *z++;      nKey--;    } -  return h & 0x7fffffff; +  return (int)(h & 0x7fffffff);  }  static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){    if( n1!=n2 ) return 1; diff --git a/ext/fts3/fts3_porter.c b/ext/fts3/fts3_porter.c index 579745b..db175ac 100644 --- a/ext/fts3/fts3_porter.c +++ b/ext/fts3/fts3_porter.c @@ -403,12 +403,14 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){    /* Step 2 */    switch( z[1] ){     case 'a': -     stem(&z, "lanoita", "ate", m_gt_0) || -     stem(&z, "lanoit", "tion", m_gt_0); +     if( !stem(&z, "lanoita", "ate", m_gt_0) ){ +       stem(&z, "lanoit", "tion", m_gt_0); +     }       break;     case 'c': -     stem(&z, "icne", "ence", m_gt_0) || -     stem(&z, "icna", "ance", m_gt_0); +     if( !stem(&z, "icne", "ence", m_gt_0) ){ +       stem(&z, "icna", "ance", m_gt_0); +     }       break;     case 'e':       stem(&z, "rezi", "ize", m_gt_0); @@ -417,43 +419,54 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){       stem(&z, "igol", "log", m_gt_0);       break;     case 'l': -     stem(&z, "ilb", "ble", m_gt_0) || -     stem(&z, "illa", "al", m_gt_0) || -     stem(&z, "iltne", "ent", m_gt_0) || -     stem(&z, "ile", "e", m_gt_0) || -     stem(&z, "ilsuo", "ous", m_gt_0); +     if( !stem(&z, "ilb", "ble", m_gt_0)  +      && !stem(&z, "illa", "al", m_gt_0) +      && !stem(&z, "iltne", "ent", m_gt_0) +      && !stem(&z, "ile", "e", m_gt_0) +     ){ +       stem(&z, "ilsuo", "ous", m_gt_0); +     }       break;     case 'o': -     stem(&z, "noitazi", "ize", m_gt_0) || -     stem(&z, "noita", "ate", m_gt_0) || -     stem(&z, "rota", "ate", m_gt_0); +     if( !stem(&z, "noitazi", "ize", m_gt_0) +      && !stem(&z, "noita", "ate", m_gt_0) +     ){ +       stem(&z, "rota", "ate", m_gt_0); +     }       break;     case 's': -     stem(&z, "msila", "al", m_gt_0) || -     stem(&z, "ssenevi", "ive", m_gt_0) || -     stem(&z, "ssenluf", "ful", m_gt_0) || -     stem(&z, "ssensuo", "ous", m_gt_0); +     if( !stem(&z, "msila", "al", m_gt_0) +      && !stem(&z, "ssenevi", "ive", m_gt_0) +      && !stem(&z, "ssenluf", "ful", m_gt_0) +     ){ +       stem(&z, "ssensuo", "ous", m_gt_0); +     }       break;     case 't': -     stem(&z, "itila", "al", m_gt_0) || -     stem(&z, "itivi", "ive", m_gt_0) || -     stem(&z, "itilib", "ble", m_gt_0); +     if( !stem(&z, "itila", "al", m_gt_0) +      && !stem(&z, "itivi", "ive", m_gt_0) +     ){ +       stem(&z, "itilib", "ble", m_gt_0); +     }       break;    }    /* Step 3 */    switch( z[0] ){     case 'e': -     stem(&z, "etaci", "ic", m_gt_0) || -     stem(&z, "evita", "", m_gt_0)   || -     stem(&z, "ezila", "al", m_gt_0); +     if( !stem(&z, "etaci", "ic", m_gt_0) +      && !stem(&z, "evita", "", m_gt_0) +     ){ +       stem(&z, "ezila", "al", m_gt_0); +     }       break;     case 'i':       stem(&z, "itici", "ic", m_gt_0);       break;     case 'l': -     stem(&z, "laci", "ic", m_gt_0) || -     stem(&z, "luf", "", m_gt_0); +     if( !stem(&z, "laci", "ic", m_gt_0) ){ +       stem(&z, "luf", "", m_gt_0); +     }       break;     case 's':       stem(&z, "ssen", "", m_gt_0); @@ -494,9 +507,11 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){             z += 3;           }         }else if( z[2]=='e' ){ -         stem(&z, "tneme", "", m_gt_1) || -         stem(&z, "tnem", "", m_gt_1) || -         stem(&z, "tne", "", m_gt_1); +         if( !stem(&z, "tneme", "", m_gt_1) +          && !stem(&z, "tnem", "", m_gt_1) +         ){ +           stem(&z, "tne", "", m_gt_1); +         }         }       }       break; @@ -515,8 +530,9 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){       }       break;     case 't': -     stem(&z, "eta", "", m_gt_1) || -     stem(&z, "iti", "", m_gt_1); +     if( !stem(&z, "eta", "", m_gt_1) ){ +       stem(&z, "iti", "", m_gt_1); +     }       break;     case 'u':       if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index d54a787..aa8779f 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -128,7 +128,7 @@ struct StrBuffer {  */  static void fts3GetDeltaPosition(char **pp, int *piPos){    int iVal; -  *pp += sqlite3Fts3GetVarint32(*pp, &iVal); +  *pp += fts3GetVarint32(*pp, &iVal);    *piPos += (iVal-2);  } @@ -504,6 +504,7 @@ static int fts3StringAppend(      pStr->z = zNew;      pStr->nAlloc = nAlloc;    } +  assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );    /* Append the data to the string buffer. */    memcpy(&pStr->z[pStr->n], zAppend, nAppend); diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c index 75ec6bd..36dcc94 100644 --- a/ext/fts3/fts3_test.c +++ b/ext/fts3/fts3_test.c @@ -517,6 +517,51 @@ static int fts3_test_tokenizer_cmd(    return TCL_OK;  } +static int fts3_test_varint_cmd( +  ClientData clientData, +  Tcl_Interp *interp, +  int objc, +  Tcl_Obj *CONST objv[] +){ +#ifdef SQLITE_ENABLE_FTS3 +  char aBuf[24]; +  int rc; +  Tcl_WideInt w, w2; +  int nByte, nByte2; + +  if( objc!=2 ){ +    Tcl_WrongNumArgs(interp, 1, objv, "INTEGER"); +    return TCL_ERROR; +  } + +  rc = Tcl_GetWideIntFromObj(interp, objv[1], &w); +  if( rc!=TCL_OK ) return rc; + +  nByte = sqlite3Fts3PutVarint(aBuf, w); +  nByte2 = sqlite3Fts3GetVarint(aBuf, &w2); +  if( w!=w2 || nByte!=nByte2 ){ +    char *zErr = sqlite3_mprintf("error testing %lld", w); +    Tcl_ResetResult(interp); +    Tcl_AppendResult(interp, zErr, 0); +    return TCL_ERROR; +  } + +  if( w<=2147483647 && w>=0 ){ +    int i; +    nByte2 = fts3GetVarint32(aBuf, &i); +    if( (int)w!=i || nByte!=nByte2 ){ +      char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w); +      Tcl_ResetResult(interp); +      Tcl_AppendResult(interp, zErr, 0); +      return TCL_ERROR; +    } +  } + +#endif +  UNUSED_PARAMETER(clientData); +  return TCL_OK; +} +  /*   ** End of tokenizer code.  **************************************************************************/  @@ -529,6 +574,10 @@ int Sqlitetestfts3_Init(Tcl_Interp *interp){    Tcl_CreateObjCommand(        interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0    ); + +  Tcl_CreateObjCommand( +      interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0 +  );    return TCL_OK;  }  #endif                  /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */ diff --git a/ext/fts3/fts3_unicode.c b/ext/fts3/fts3_unicode.c index 188358e..94fc27b 100644 --- a/ext/fts3/fts3_unicode.c +++ b/ext/fts3/fts3_unicode.c @@ -13,7 +13,7 @@  ** Implementation of the "unicode" full-text-search tokenizer.  */ -#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +#ifndef SQLITE_DISABLE_FTS3_UNICODE  #include "fts3Int.h"  #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) @@ -231,7 +231,7 @@ static int unicodeCreate(    for(i=0; rc==SQLITE_OK && i<nArg; i++){      const char *z = azArg[i]; -    int n = strlen(z); +    int n = (int)strlen(z);      if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){        pNew->bRemoveDiacritic = 1; @@ -318,7 +318,7 @@ static int unicodeNext(  ){    unicode_cursor *pCsr = (unicode_cursor *)pC;    unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); -  int iCode; +  int iCode = 0;    char *zOut;    const unsigned char *z = &pCsr->aInput[pCsr->iOff];    const unsigned char *zStart = z; @@ -363,11 +363,11 @@ static int unicodeNext(    );    /* Set the output variables and return. */ -  pCsr->iOff = (z - pCsr->aInput); +  pCsr->iOff = (int)(z - pCsr->aInput);    *paToken = pCsr->zToken; -  *pnToken = zOut - pCsr->zToken; -  *piStart = (zStart - pCsr->aInput); -  *piEnd = (zEnd - pCsr->aInput); +  *pnToken = (int)(zOut - pCsr->zToken); +  *piStart = (int)(zStart - pCsr->aInput); +  *piEnd = (int)(zEnd - pCsr->aInput);    *piPos = pCsr->iToken++;    return SQLITE_OK;  } @@ -390,4 +390,4 @@ void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){  }  #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */ +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ diff --git a/ext/fts3/fts3_unicode2.c b/ext/fts3/fts3_unicode2.c index 3c24569..20b7a25 100644 --- a/ext/fts3/fts3_unicode2.c +++ b/ext/fts3/fts3_unicode2.c @@ -15,7 +15,7 @@  ** DO NOT EDIT THIS MACHINE GENERATED FILE.  */ -#if defined(SQLITE_ENABLE_FTS4_UNICODE61) +#ifndef SQLITE_DISABLE_FTS3_UNICODE  #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)  #include <assert.h> @@ -39,7 +39,7 @@ int sqlite3FtsUnicodeIsalnum(int c){    ** C. It is not possible to represent a range larger than 1023 codepoints     ** using this format.    */ -  const static unsigned int aEntry[] = { +  static const unsigned int aEntry[] = {      0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,      0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,      0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, @@ -101,28 +101,27 @@ int sqlite3FtsUnicodeIsalnum(int c){      0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,      0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,      0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, -    0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, -    0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, -    0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, -    0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, -    0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, -    0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, -    0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, -    0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, -    0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, -    0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, -    0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, -    0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, -    0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, -    0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, -    0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, -    0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, -    0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, -    0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, -    0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, -    0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, -    0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, -    0x43FFF401, +    0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, +    0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, +    0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, +    0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, +    0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, +    0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, +    0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, +    0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, +    0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, +    0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, +    0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, +    0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, +    0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, +    0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, +    0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, +    0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, +    0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, +    0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, +    0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, +    0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, +    0x380400F0,    };    static const unsigned int aAscii[4] = {      0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, @@ -132,7 +131,7 @@ int sqlite3FtsUnicodeIsalnum(int c){      return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );    }else if( c<(1<<22) ){      unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; -    int iRes; +    int iRes = 0;      int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;      int iLo = 0;      while( iHi>=iLo ){ @@ -203,7 +202,7 @@ static int remove_diacritic(int c){    }    assert( key>=aDia[iRes] );    return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); -}; +}  /* @@ -363,4 +362,4 @@ int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){    return ret;  }  #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ -#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */ +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index 269d1dd..0da08c6 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -193,6 +193,7 @@ struct SegmentWriter {    int nSize;                      /* Size of allocation at aData */    int nData;                      /* Bytes of data in aData */    char *aData;                    /* Pointer to block from malloc() */ +  i64 nLeafData;                  /* Number of bytes of leaf data written */  };  /* @@ -268,6 +269,10 @@ struct SegmentNode {  #define SQL_SELECT_INDEXES            35  #define SQL_SELECT_MXLEVEL            36 +#define SQL_SELECT_LEVEL_RANGE2       37 +#define SQL_UPDATE_LEVEL_IDX          38 +#define SQL_UPDATE_LEVEL              39 +  /*  ** This function is used to obtain an SQLite prepared statement handle  ** for the statement identified by the second argument. If successful, @@ -369,7 +374,18 @@ static int fts3SqlStmt(  /* SQL_SELECT_MXLEVEL  **   Return the largest relative level in the FTS index or indexes.  */ -/* 36 */  "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'" +/* 36 */  "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + +          /* Return segments in order from oldest to newest.*/  +/* 37 */  "SELECT level, idx, end_block " +            "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " +            "ORDER BY level DESC, idx ASC", + +          /* Update statements used while promoting segments */ +/* 38 */  "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " +            "WHERE level=? AND idx=?", +/* 39 */  "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" +    };    int rc = SQLITE_OK;    sqlite3_stmt *pStmt; @@ -489,37 +505,30 @@ static void fts3SqlExec(  /* -** This function ensures that the caller has obtained a shared-cache -** table-lock on the %_content table. This is required before reading -** data from the fts3 table. If this lock is not acquired first, then -** the caller may end up holding read-locks on the %_segments and %_segdir -** tables, but no read-lock on the %_content table. If this happens  -** a second connection will be able to write to the fts3 table, but -** attempting to commit those writes might return SQLITE_LOCKED or -** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain  -** write-locks on the %_segments and %_segdir ** tables).  -** -** We try to avoid this because if FTS3 returns any error when committing -** a transaction, the whole transaction will be rolled back. And this is -** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can -** still happen if the user reads data directly from the %_segments or -** %_segdir tables instead of going through FTS3 though. +** This function ensures that the caller has obtained an exclusive  +** shared-cache table-lock on the %_segdir table. This is required before  +** writing data to the fts3 table. If this lock is not acquired first, then +** the caller may end up attempting to take this lock as part of committing +** a transaction, causing SQLite to return SQLITE_LOCKED or  +** LOCKED_SHAREDCACHEto a COMMIT command.  ** -** This reasoning does not apply to a content=xxx table. +** It is best to avoid this because if FTS3 returns any error when  +** committing a transaction, the whole transaction will be rolled back.  +** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE.  +** It can still happen if the user locks the underlying tables directly  +** instead of accessing them via FTS.  */ -int sqlite3Fts3ReadLock(Fts3Table *p){ -  int rc;                         /* Return code */ -  sqlite3_stmt *pStmt;            /* Statement used to obtain lock */ - -  if( p->zContentTbl==0 ){ -    rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0); +static int fts3Writelock(Fts3Table *p){ +  int rc = SQLITE_OK; +   +  if( p->nPendingData==0 ){ +    sqlite3_stmt *pStmt; +    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0);      if( rc==SQLITE_OK ){        sqlite3_bind_null(pStmt, 1);        sqlite3_step(pStmt);        rc = sqlite3_reset(pStmt);      } -  }else{ -    rc = SQLITE_OK;    }    return rc; @@ -907,12 +916,15 @@ static int fts3InsertTerms(  ){    int i;                          /* Iterator variable */    for(i=2; i<p->nColumn+2; i++){ -    const char *zText = (const char *)sqlite3_value_text(apVal[i]); -    int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]); -    if( rc!=SQLITE_OK ){ -      return rc; +    int iCol = i-2; +    if( p->abNotindexed[iCol]==0 ){ +      const char *zText = (const char *)sqlite3_value_text(apVal[i]); +      int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); +      if( rc!=SQLITE_OK ){ +        return rc; +      } +      aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);      } -    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);    }    return SQLITE_OK;  } @@ -1059,9 +1071,12 @@ static void fts3DeleteTerms(        int iLangid = langidFromSelect(p, pSelect);        rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));        for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ -        const char *zText = (const char *)sqlite3_column_text(pSelect, i); -        rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]); -        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); +        int iCol = i-1; +        if( p->abNotindexed[iCol]==0 ){ +          const char *zText = (const char *)sqlite3_column_text(pSelect, i); +          rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); +          aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); +        }        }        if( rc!=SQLITE_OK ){          sqlite3_reset(pSelect); @@ -1345,8 +1360,8 @@ static int fts3SegReaderNext(    /* Because of the FTS3_NODE_PADDING bytes of padding, the following is     ** safe (no risk of overread) even if the node data is corrupted. */ -  pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix); -  pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix); +  pNext += fts3GetVarint32(pNext, &nPrefix); +  pNext += fts3GetVarint32(pNext, &nSuffix);    if( nPrefix<0 || nSuffix<=0      || &pNext[nSuffix]>&pReader->aNode[pReader->nNode]     ){ @@ -1369,7 +1384,7 @@ static int fts3SegReaderNext(    memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);    pReader->nTerm = nPrefix+nSuffix;    pNext += nSuffix; -  pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist); +  pNext += fts3GetVarint32(pNext, &pReader->nDoclist);    pReader->aDoclist = pNext;    pReader->pOffsetList = 0; @@ -1462,7 +1477,7 @@ static int fts3SegReaderNextDocid(        /* The following line of code (and the "p++" below the while() loop) is        ** normally all that is required to move pointer p to the desired         ** position. The exception is if this node is being loaded from disk -      ** incrementally and pointer "p" now points to the first byte passed +      ** incrementally and pointer "p" now points to the first byte past        ** the populated part of pReader->aNode[].        */        while( *p | c ) c = *p++ & 0x80; @@ -1911,6 +1926,7 @@ static int fts3WriteSegdir(    sqlite3_int64 iStartBlock,      /* Value for "start_block" field */    sqlite3_int64 iLeafEndBlock,    /* Value for "leaves_end_block" field */    sqlite3_int64 iEndBlock,        /* Value for "end_block" field */ +  sqlite3_int64 nLeafData,        /* Bytes of leaf data in segment */    char *zRoot,                    /* Blob value for "root" field */    int nRoot                       /* Number of bytes in buffer zRoot */  ){ @@ -1921,7 +1937,13 @@ static int fts3WriteSegdir(      sqlite3_bind_int(pStmt, 2, iIdx);      sqlite3_bind_int64(pStmt, 3, iStartBlock);      sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); -    sqlite3_bind_int64(pStmt, 5, iEndBlock); +    if( nLeafData==0 ){ +      sqlite3_bind_int64(pStmt, 5, iEndBlock); +    }else{ +      char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); +      if( !zEnd ) return SQLITE_NOMEM; +      sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); +    }      sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);      sqlite3_step(pStmt);      rc = sqlite3_reset(pStmt); @@ -2247,6 +2269,9 @@ static int fts3SegWriterAdd(        nDoclist;                             /* Doclist data */    } +  /* Increase the total number of bytes written to account for the new entry. */ +  pWriter->nLeafData += nReq; +    /* If the buffer currently allocated is too small for this entry, realloc    ** the buffer to make it large enough.    */ @@ -2318,13 +2343,13 @@ static int fts3SegWriterFlush(            pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);      }      if( rc==SQLITE_OK ){ -      rc = fts3WriteSegdir( -          p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot); +      rc = fts3WriteSegdir(p, iLevel, iIdx,  +          pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot);      }    }else{      /* The entire tree fits on the root node. Write it to the segdir table. */ -    rc = fts3WriteSegdir( -        p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData); +    rc = fts3WriteSegdir(p, iLevel, iIdx,  +        0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);    }    p->nLeafAdd++;    return rc; @@ -2409,6 +2434,37 @@ static int fts3SegmentMaxLevel(  }  /* +** iAbsLevel is an absolute level that may be assumed to exist within +** the database. This function checks if it is the largest level number +** within its index. Assuming no error occurs, *pbMax is set to 1 if +** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK +** is returned. If an error occurs, an error code is returned and the +** final value of *pbMax is undefined. +*/ +static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ + +  /* Set pStmt to the compiled version of: +  ** +  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? +  ** +  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). +  */ +  sqlite3_stmt *pStmt; +  int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); +  if( rc!=SQLITE_OK ) return rc; +  sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); +  sqlite3_bind_int64(pStmt, 2,  +      ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL +  ); + +  *pbMax = 0; +  if( SQLITE_ROW==sqlite3_step(pStmt) ){ +    *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; +  } +  return sqlite3_reset(pStmt); +} + +/*  ** Delete all entries in the %_segments table associated with the segment  ** opened with seg-reader pSeg. This function does not affect the contents  ** of the %_segdir table. @@ -2530,7 +2586,7 @@ static void fts3ColumnFilter(        break;      }      p = &pList[1]; -    p += sqlite3Fts3GetVarint32(p, &iCurrent); +    p += fts3GetVarint32(p, &iCurrent);    }    if( bZero && &pList[nList]!=pEnd ){ @@ -2849,8 +2905,8 @@ int sqlite3Fts3SegReaderStep(        fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp);        while( apSegment[0]->pOffsetList ){          int j;                    /* Number of segments that share a docid */ -        char *pList; -        int nList; +        char *pList = 0; +        int nList = 0;          int nByte;          sqlite3_int64 iDocid = apSegment[0]->iDocid;          fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); @@ -2944,6 +3000,140 @@ void sqlite3Fts3SegReaderFinish(  }  /* +** Decode the "end_block" field, selected by column iCol of the SELECT  +** statement passed as the first argument.  +** +** The "end_block" field may contain either an integer, or a text field +** containing the text representation of two non-negative integers separated  +** by one or more space (0x20) characters. In the first case, set *piEndBlock  +** to the integer value and *pnByte to zero before returning. In the second,  +** set *piEndBlock to the first value and *pnByte to the second. +*/ +static void fts3ReadEndBlockField( +  sqlite3_stmt *pStmt,  +  int iCol,  +  i64 *piEndBlock, +  i64 *pnByte +){ +  const unsigned char *zText = sqlite3_column_text(pStmt, iCol); +  if( zText ){ +    int i; +    int iMul = 1; +    i64 iVal = 0; +    for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ +      iVal = iVal*10 + (zText[i] - '0'); +    } +    *piEndBlock = iVal; +    while( zText[i]==' ' ) i++; +    iVal = 0; +    if( zText[i]=='-' ){ +      i++; +      iMul = -1; +    } +    for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ +      iVal = iVal*10 + (zText[i] - '0'); +    } +    *pnByte = (iVal * (i64)iMul); +  } +} + + +/* +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. +*/ +static int fts3PromoteSegments( +  Fts3Table *p,                   /* FTS table handle */ +  sqlite3_int64 iAbsLevel,        /* Absolute level just updated */ +  sqlite3_int64 nByte             /* Size of new segment at iAbsLevel */ +){ +  int rc = SQLITE_OK; +  sqlite3_stmt *pRange; + +  rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); + +  if( rc==SQLITE_OK ){ +    int bOk = 0; +    i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; +    i64 nLimit = (nByte*3)/2; + +    /* Loop through all entries in the %_segdir table corresponding to  +    ** segments in this index on levels greater than iAbsLevel. If there is +    ** at least one such segment, and it is possible to determine that all  +    ** such segments are smaller than nLimit bytes in size, they will be  +    ** promoted to level iAbsLevel.  */ +    sqlite3_bind_int64(pRange, 1, iAbsLevel+1); +    sqlite3_bind_int64(pRange, 2, iLast); +    while( SQLITE_ROW==sqlite3_step(pRange) ){ +      i64 nSize = 0, dummy; +      fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); +      if( nSize<=0 || nSize>nLimit ){ +        /* If nSize==0, then the %_segdir.end_block field does not not  +        ** contain a size value. This happens if it was written by an +        ** old version of FTS. In this case it is not possible to determine +        ** the size of the segment, and so segment promotion does not +        ** take place.  */ +        bOk = 0; +        break; +      } +      bOk = 1; +    } +    rc = sqlite3_reset(pRange); + +    if( bOk ){ +      int iIdx = 0; +      sqlite3_stmt *pUpdate1; +      sqlite3_stmt *pUpdate2; + +      if( rc==SQLITE_OK ){ +        rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); +      } +      if( rc==SQLITE_OK ){ +        rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); +      } + +      if( rc==SQLITE_OK ){ + +        /* Loop through all %_segdir entries for segments in this index with +        ** levels equal to or greater than iAbsLevel. As each entry is visited, +        ** updated it to set (level = -1) and (idx = N), where N is 0 for the +        ** oldest segment in the range, 1 for the next oldest, and so on. +        ** +        ** In other words, move all segments being promoted to level -1, +        ** setting the "idx" fields as appropriate to keep them in the same +        ** order. The contents of level -1 (which is never used, except +        ** transiently here), will be moved back to level iAbsLevel below.  */ +        sqlite3_bind_int64(pRange, 1, iAbsLevel); +        while( SQLITE_ROW==sqlite3_step(pRange) ){ +          sqlite3_bind_int(pUpdate1, 1, iIdx++); +          sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); +          sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); +          sqlite3_step(pUpdate1); +          rc = sqlite3_reset(pUpdate1); +          if( rc!=SQLITE_OK ){ +            sqlite3_reset(pRange); +            break; +          } +        } +      } +      if( rc==SQLITE_OK ){ +        rc = sqlite3_reset(pRange); +      } + +      /* Move level -1 to level iAbsLevel */ +      if( rc==SQLITE_OK ){ +        sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); +        sqlite3_step(pUpdate2); +        rc = sqlite3_reset(pUpdate2); +      } +    } +  } + + +  return rc; +} + +/*  ** Merge all level iLevel segments in the database into a single   ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a  ** single segment with a level equal to the numerically largest level  @@ -2967,6 +3157,7 @@ static int fts3SegmentMerge(    Fts3SegFilter filter;           /* Segment term filter condition */    Fts3MultiSegReader csr;         /* Cursor to iterate through level(s) */    int bIgnoreEmpty = 0;           /* True to ignore empty segments */ +  i64 iMaxLevel = 0;              /* Max level number for this index/langid */    assert( iLevel==FTS3_SEGCURSOR_ALL         || iLevel==FTS3_SEGCURSOR_PENDING @@ -2978,6 +3169,11 @@ static int fts3SegmentMerge(    rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);    if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; +  if( iLevel!=FTS3_SEGCURSOR_PENDING ){ +    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); +    if( rc!=SQLITE_OK ) goto finished; +  } +    if( iLevel==FTS3_SEGCURSOR_ALL ){      /* This call is to merge all segments in the database to a single      ** segment. The level of the new segment is equal to the numerically @@ -2987,21 +3183,21 @@ static int fts3SegmentMerge(        rc = SQLITE_DONE;        goto finished;      } -    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel); +    iNewLevel = iMaxLevel;      bIgnoreEmpty = 1; -  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ -    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0); -    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);    }else{      /* This call is to merge all segments at level iLevel. find the next      ** available segment index at level iLevel+1. The call to      ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to       ** a single iLevel+2 segment if necessary.  */ -    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); +    assert( FTS3_SEGCURSOR_PENDING==-1 );      iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); +    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); +    bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);    }    if( rc!=SQLITE_OK ) goto finished; +    assert( csr.nSegment>0 );    assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );    assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); @@ -3018,7 +3214,7 @@ static int fts3SegmentMerge(          csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);    }    if( rc!=SQLITE_OK ) goto finished; -  assert( pWriter ); +  assert( pWriter || bIgnoreEmpty );    if( iLevel!=FTS3_SEGCURSOR_PENDING ){      rc = fts3DeleteSegdir( @@ -3026,7 +3222,14 @@ static int fts3SegmentMerge(      );      if( rc!=SQLITE_OK ) goto finished;    } -  rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); +  if( pWriter ){ +    rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); +    if( rc==SQLITE_OK ){ +      if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){ +        rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); +      } +    } +  }   finished:    fts3SegWriterFree(pWriter); @@ -3036,7 +3239,7 @@ static int fts3SegmentMerge(  /*  -** Flush the contents of pendingTerms to level 0 segments. +** Flush the contents of pendingTerms to level 0 segments.   */  int sqlite3Fts3PendingTermsFlush(Fts3Table *p){    int rc = SQLITE_OK; @@ -3052,14 +3255,19 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){    ** estimate the number of leaf blocks of content to be written    */    if( rc==SQLITE_OK && p->bHasStat -   && p->bAutoincrmerge==0xff && p->nLeafAdd>0 +   && p->nAutoincrmerge==0xff && p->nLeafAdd>0    ){      sqlite3_stmt *pStmt = 0;      rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);      if( rc==SQLITE_OK ){        sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);        rc = sqlite3_step(pStmt); -      p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0)); +      if( rc==SQLITE_ROW ){ +        p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); +        if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; +      }else if( rc==SQLITE_DONE ){ +        p->nAutoincrmerge = 0; +      }        rc = sqlite3_reset(pStmt);      }    } @@ -3303,9 +3511,11 @@ static int fts3DoRebuild(Fts3Table *p){        rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));        memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));        for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ -        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); -        rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); -        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); +        if( p->abNotindexed[iCol]==0 ){ +          const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); +          rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); +          aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); +        }        }        if( p->bHasDocsize ){          fts3InsertDocsize(&rc, p, aSz); @@ -3425,6 +3635,8 @@ struct IncrmergeWriter {    int iIdx;                       /* Index of *output* segment in iAbsLevel+1 */    sqlite3_int64 iStart;           /* Block number of first allocated block */    sqlite3_int64 iEnd;             /* Block number of last allocated block */ +  sqlite3_int64 nLeafData;        /* Bytes of leaf page data so far */ +  u8 bNoLeafData;                 /* If true, store 0 for segment size */    NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];  }; @@ -3493,9 +3705,9 @@ static int nodeReaderNext(NodeReader *p){      p->aNode = 0;    }else{      if( bFirst==0 ){ -      p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); +      p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);      } -    p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); +    p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);      blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);      if( rc==SQLITE_OK ){ @@ -3503,7 +3715,7 @@ static int nodeReaderNext(NodeReader *p){        p->term.n = nPrefix+nSuffix;        p->iOff += nSuffix;        if( p->iChild==0 ){ -        p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); +        p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);          p->aDoclist = &p->aNode[p->iOff];          p->iOff += p->nDoclist;        } @@ -3763,8 +3975,8 @@ static int fts3IncrmergeAppend(      nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;    } +  pWriter->nLeafData += nSpace;    blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); -    if( rc==SQLITE_OK ){      if( pLeaf->block.n==0 ){        pLeaf->block.n = 1; @@ -3863,6 +4075,7 @@ static void fts3IncrmergeRelease(          pWriter->iStart,                    /* start_block */          pWriter->aNodeWriter[0].iBlock,     /* leaves_end_block */          pWriter->iEnd,                      /* end_block */ +        (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0),   /* end_block */          pRoot->block.a, pRoot->block.n      /* root */      );    } @@ -3964,7 +4177,11 @@ static int fts3IncrmergeLoad(      if( sqlite3_step(pSelect)==SQLITE_ROW ){        iStart = sqlite3_column_int64(pSelect, 1);        iLeafEnd = sqlite3_column_int64(pSelect, 2); -      iEnd = sqlite3_column_int64(pSelect, 3); +      fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); +      if( pWriter->nLeafData<0 ){ +        pWriter->nLeafData = pWriter->nLeafData * -1; +      } +      pWriter->bNoLeafData = (pWriter->nLeafData==0);        nRoot = sqlite3_column_bytes(pSelect, 4);        aRoot = sqlite3_column_blob(pSelect, 4);      }else{ @@ -4555,7 +4772,7 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){    pHint->n = i;    i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); -  i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput); +  i += fts3GetVarint32(&pHint->a[i], pnInput);    if( i!=nHint ) return SQLITE_CORRUPT_VTAB;    return SQLITE_OK; @@ -4565,11 +4782,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){  /*  ** Attempt an incremental merge that writes nMerge leaf blocks.  ** -** Incremental merges happen nMin segments at a time. The two -** segments to be merged are the nMin oldest segments (the ones with -** the smallest indexes) in the highest level that contains at least -** nMin segments. Multiple merges might occur in an attempt to write the  -** quota of nMerge leaf blocks. +** Incremental merges happen nMin segments at a time. The segments  +** to be merged are the nMin oldest segments (the ones with the smallest  +** values for the _segdir.idx field) in the highest level that contains  +** at least nMin segments. Multiple merges might occur in an attempt to  +** write the quota of nMerge leaf blocks.  */  int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){    int rc;                         /* Return code */ @@ -4594,6 +4811,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){      const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;      sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */      int bUseHint = 0;             /* True if attempting to append */ +    int iIdx = 0;                 /* Largest idx in level (iAbsLevel+1) */      /* Search the %_segdir table for the absolute level with the smallest      ** relative level number that contains at least nMin segments, if any. @@ -4647,6 +4865,19 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){      ** to start work on some other level.  */      memset(pWriter, 0, nAlloc);      pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + +    if( rc==SQLITE_OK ){ +      rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); +      assert( bUseHint==1 || bUseHint==0 ); +      if( iIdx==0 || (bUseHint && iIdx==1) ){ +        int bIgnore = 0; +        rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); +        if( bIgnore ){ +          pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; +        } +      } +    } +      if( rc==SQLITE_OK ){        rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);      } @@ -4654,16 +4885,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){       && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))       && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))      ){ -      int iIdx = 0;               /* Largest idx in level (iAbsLevel+1) */ -      rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); -      if( rc==SQLITE_OK ){ -        if( bUseHint && iIdx>0 ){ -          const char *zKey = pCsr->zTerm; -          int nKey = pCsr->nTerm; -          rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); -        }else{ -          rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); -        } +      if( bUseHint && iIdx>0 ){ +        const char *zKey = pCsr->zTerm; +        int nKey = pCsr->nTerm; +        rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); +      }else{ +        rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);        }        if( rc==SQLITE_OK && pWriter->nLeafEst ){ @@ -4685,7 +4912,13 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){          }        } +      if( nSeg!=0 ){ +        pWriter->nLeafData = pWriter->nLeafData * -1; +      }        fts3IncrmergeRelease(p, pWriter, &rc); +      if( nSeg==0 && pWriter->bNoLeafData==0 ){ +        fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); +      }      }      sqlite3Fts3SegReaderFinish(pCsr); @@ -4772,16 +5005,19 @@ static int fts3DoAutoincrmerge(  ){    int rc = SQLITE_OK;    sqlite3_stmt *pStmt = 0; -  p->bAutoincrmerge = fts3Getint(&zParam)!=0; +  p->nAutoincrmerge = fts3Getint(&zParam); +  if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ +    p->nAutoincrmerge = 8; +  }    if( !p->bHasStat ){      assert( p->bFts4==0 );      sqlite3Fts3CreateStatTable(&rc, p);      if( rc ) return rc;    }    rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); -  if( rc ) return rc;; +  if( rc ) return rc;    sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); -  sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge); +  sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);    sqlite3_step(pStmt);    rc = sqlite3_reset(pStmt);    return rc; @@ -4938,34 +5174,36 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){        int iCol;        for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ -        const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); -        int nText = sqlite3_column_bytes(pStmt, iCol+1); -        sqlite3_tokenizer_cursor *pT = 0; - -        rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT); -        while( rc==SQLITE_OK ){ -          char const *zToken;       /* Buffer containing token */ -          int nToken = 0;           /* Number of bytes in token */ -          int iDum1 = 0, iDum2 = 0; /* Dummy variables */ -          int iPos = 0;             /* Position of token in zText */ - -          rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); -          if( rc==SQLITE_OK ){ -            int i; -            cksum2 = cksum2 ^ fts3ChecksumEntry( -                zToken, nToken, iLang, 0, iDocid, iCol, iPos -            ); -            for(i=1; i<p->nIndex; i++){ -              if( p->aIndex[i].nPrefix<=nToken ){ -                cksum2 = cksum2 ^ fts3ChecksumEntry( -                  zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos -                ); +        if( p->abNotindexed[iCol]==0 ){ +          const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); +          int nText = sqlite3_column_bytes(pStmt, iCol+1); +          sqlite3_tokenizer_cursor *pT = 0; + +          rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); +          while( rc==SQLITE_OK ){ +            char const *zToken;       /* Buffer containing token */ +            int nToken = 0;           /* Number of bytes in token */ +            int iDum1 = 0, iDum2 = 0; /* Dummy variables */ +            int iPos = 0;             /* Position of token in zText */ + +            rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); +            if( rc==SQLITE_OK ){ +              int i; +              cksum2 = cksum2 ^ fts3ChecksumEntry( +                  zToken, nToken, iLang, 0, iDocid, iCol, iPos +              ); +              for(i=1; i<p->nIndex; i++){ +                if( p->aIndex[i].nPrefix<=nToken ){ +                  cksum2 = cksum2 ^ fts3ChecksumEntry( +                      zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos +                  ); +                }                }              }            } +          if( pT ) pModule->xClose(pT); +          if( rc==SQLITE_DONE ) rc = SQLITE_OK;          } -        if( pT ) pModule->xClose(pT); -        if( rc==SQLITE_DONE ) rc = SQLITE_OK;        }      } @@ -5049,6 +5287,9 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){    }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){      p->nMaxPendingData = atoi(&zVal[11]);      rc = SQLITE_OK; +  }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ +    p->bNoIncrDoclist = atoi(&zVal[21]); +    rc = SQLITE_OK;  #endif    }else{      rc = SQLITE_ERROR; @@ -5108,32 +5349,34 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){      iDocid = sqlite3_column_int64(pCsr->pStmt, 0);      for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ -      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); -      sqlite3_tokenizer_cursor *pTC = 0; -   -      rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); -      while( rc==SQLITE_OK ){ -        char const *zToken;       /* Buffer containing token */ -        int nToken = 0;           /* Number of bytes in token */ -        int iDum1 = 0, iDum2 = 0; /* Dummy variables */ -        int iPos = 0;             /* Position of token in zText */ -   -        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); -        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ -          Fts3PhraseToken *pPT = pDef->pToken; -          if( (pDef->iCol>=p->nColumn || pDef->iCol==i) -           && (pPT->bFirst==0 || iPos==0) -           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) -           && (0==memcmp(zToken, pPT->z, pPT->n)) -          ){ -            fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); +      if( p->abNotindexed[i]==0 ){ +        const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); +        sqlite3_tokenizer_cursor *pTC = 0; + +        rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); +        while( rc==SQLITE_OK ){ +          char const *zToken;       /* Buffer containing token */ +          int nToken = 0;           /* Number of bytes in token */ +          int iDum1 = 0, iDum2 = 0; /* Dummy variables */ +          int iPos = 0;             /* Position of token in zText */ + +          rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); +          for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ +            Fts3PhraseToken *pPT = pDef->pToken; +            if( (pDef->iCol>=p->nColumn || pDef->iCol==i) +                && (pPT->bFirst==0 || iPos==0) +                && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) +                && (0==memcmp(zToken, pPT->z, pPT->n)) +              ){ +              fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); +            }            }          } +        if( pTC ) pModule->xClose(pTC); +        if( rc==SQLITE_DONE ) rc = SQLITE_OK;        } -      if( pTC ) pModule->xClose(pTC); -      if( rc==SQLITE_DONE ) rc = SQLITE_OK;      } -   +      for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){        if( pDef->pList ){          rc = fts3PendingListAppendVarint(&pDef->pList, 0); @@ -5265,6 +5508,10 @@ int sqlite3Fts3UpdateMethod(    int nChng = 0;                  /* Net change in number of documents */    int bInsertDone = 0; +  /* At this point it must be known if the %_stat table exists or not. +  ** So bHasStat may not be 2.  */ +  assert( p->bHasStat==0 || p->bHasStat==1 ); +    assert( p->pSegments==0 );    assert(         nArg==1                     /* DELETE operations */ @@ -5297,6 +5544,9 @@ int sqlite3Fts3UpdateMethod(    aSzIns = &aSzDel[p->nColumn+1];    memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); +  rc = fts3Writelock(p); +  if( rc!=SQLITE_OK ) goto update_out; +    /* If this is an INSERT operation, or an UPDATE that modifies the rowid    ** value, then this operation requires constraint handling.    ** diff --git a/ext/fts3/tool/fts3view.c b/ext/fts3/tool/fts3view.c index 479ae98..3dc1ba8 100644 --- a/ext/fts3/tool/fts3view.c +++ b/ext/fts3/tool/fts3view.c @@ -376,7 +376,7 @@ static void showSegmentStats(sqlite3 *db, const char *zTab){    sqlite3_finalize(pStmt);    nLeaf = nSeg - nIdx;    printf("Leaf segments larger than %5d bytes.... %9d   %5.2f%%\n", -         pgsz-45, n, n*100.0/nLeaf); +         pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);    pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);    mxLevel = 0; @@ -554,7 +554,7 @@ static void decodeSegment(    sqlite3_int64 n;    sqlite3_int64 iDocsz;    int iHeight; -  int i = 0; +  sqlite3_int64 i = 0;    int cnt = 0;    char zTerm[1000]; @@ -576,12 +576,12 @@ static void decodeSegment(        fprintf(stderr, "term to long\n");        exit(1);      } -    memcpy(zTerm+iPrefix, aData+i, nTerm); +    memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);      zTerm[iPrefix+nTerm] = 0;      i += nTerm;      if( iHeight==0 ){        i += getVarint(aData+i, &iDocsz); -      printf("term: %-25s doclist %7lld bytes offset %d\n", zTerm, iDocsz, i); +      printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);        i += iDocsz;      }else{        printf("term: %-25s child %lld\n", zTerm, ++iChild); @@ -749,18 +749,19 @@ static void decodeDoclist(  */  static void showDoclist(sqlite3 *db, const char *zTab){    const unsigned char *aData; -  sqlite3_int64 offset, nData; +  sqlite3_int64 offset; +  int nData;    sqlite3_stmt *pStmt;    offset = atoi64(azExtra[1]); -  nData = atoi64(azExtra[2]); +  nData = atoi(azExtra[2]);    pStmt = prepareToGetSegment(db, zTab, azExtra[0]);    if( sqlite3_step(pStmt)!=SQLITE_ROW ){      sqlite3_finalize(pStmt);      return;    }    aData = sqlite3_column_blob(pStmt, 0); -  printf("Doclist at %s offset %lld of size %lld bytes:\n", +  printf("Doclist at %s offset %lld of size %d bytes:\n",           azExtra[0], offset, nData);    if( findOption("raw", 0, 0)!=0 ){      printBlob(aData+offset, nData); diff --git a/ext/fts3/unicode/mkunicode.tcl b/ext/fts3/unicode/mkunicode.tcl index 0d58e8a..c3083ee 100644 --- a/ext/fts3/unicode/mkunicode.tcl +++ b/ext/fts3/unicode/mkunicode.tcl @@ -160,7 +160,7 @@ proc print_rd {map} {    }    assert( key>=aDia[iRes] );    return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);} -  puts "\};" +  puts "\}"  }  proc print_isdiacritic {zFunc map} { @@ -239,7 +239,10 @@ proc an_load_unicodedata_text {zName} {      foreach $lField $fields {}      set iCode [expr "0x$code"] -    set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}] +    set bAlnum [expr { +         [lsearch {L N} [string range $general_category 0 0]] >= 0 +      || $general_category=="Co" +    }]      if { !$bAlnum } { lappend lRet $iCode }    } @@ -295,7 +298,7 @@ proc an_print_range_array {lRange} {    ** using this format.    */    }] -  puts -nonewline "  const static unsigned int aEntry\[\] = \{" +  puts -nonewline "  static const unsigned int aEntry\[\] = \{"    set i 0    foreach range $lRange {      foreach {iFirst nRange} $range {} @@ -346,7 +349,7 @@ proc print_isalnum {zFunc lRange} {      return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );    }else if( c<(1<<22) ){      unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; -    int iRes; +    int iRes = 0;      int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;      int iLo = 0;      while( iHi>=iLo ){ @@ -360,7 +363,7 @@ proc print_isalnum {zFunc lRange} {      }      assert( aEntry[0]<key );      assert( key>=aEntry[iRes] ); -    return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); +    return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));    }    return 1;}    puts "\}" @@ -729,7 +732,7 @@ proc print_fileheader {} {  */    }]    puts "" -  puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)" +  puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"    puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"    puts ""    puts "#include <assert.h>" | 
