From 569c6676a6ddb0ff73821d7693b5e18ddef809b9 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Thu, 16 Oct 2014 22:51:35 -0400 Subject: Imported Upstream version 3.2.0 --- src/btree.c | 759 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 402 insertions(+), 357 deletions(-) (limited to 'src/btree.c') diff --git a/src/btree.c b/src/btree.c index 3ca6058..60bc7de 100644 --- a/src/btree.c +++ b/src/btree.c @@ -162,7 +162,7 @@ static int hasSharedCacheTableLock( ** the correct locks are held. So do not bother - just return true. ** This case does not come up very often anyhow. */ - if( isIndex && (!pSchema || (pSchema->flags&DB_SchemaLoaded)==0) ){ + if( isIndex && (!pSchema || (pSchema->schemaFlags&DB_SchemaLoaded)==0) ){ return 1; } @@ -446,16 +446,11 @@ static int cursorHoldsMutex(BtCursor *p){ } #endif - -#ifndef SQLITE_OMIT_INCRBLOB /* -** Invalidate the overflow page-list cache for cursor pCur, if any. +** Invalidate the overflow cache of the cursor passed as the first argument. +** on the shared btree structure pBt. */ -static void invalidateOverflowCache(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - sqlite3_free(pCur->aOverflow); - pCur->aOverflow = 0; -} +#define invalidateOverflowCache(pCur) (pCur->curFlags &= ~BTCF_ValidOvfl) /* ** Invalidate the overflow page-list cache for all cursors opened @@ -469,6 +464,7 @@ static void invalidateAllOverflowCache(BtShared *pBt){ } } +#ifndef SQLITE_OMIT_INCRBLOB /* ** This function is called before modifying the contents of a table ** to invalidate any incrblob cursors that are open on the @@ -491,16 +487,14 @@ static void invalidateIncrblobCursors( BtShared *pBt = pBtree->pBt; assert( sqlite3BtreeHoldsMutex(pBtree) ); for(p=pBt->pCursor; p; p=p->pNext){ - if( p->isIncrblobHandle && (isClearTable || p->info.nKey==iRow) ){ + if( (p->curFlags & BTCF_Incrblob)!=0 && (isClearTable || p->info.nKey==iRow) ){ p->eState = CURSOR_INVALID; } } } #else - /* Stub functions when INCRBLOB is omitted */ - #define invalidateOverflowCache(x) - #define invalidateAllOverflowCache(x) + /* Stub function when INCRBLOB is omitted */ #define invalidateIncrblobCursors(x,y,z) #endif /* SQLITE_OMIT_INCRBLOB */ @@ -684,7 +678,7 @@ static int btreeMoveto( ){ int rc; /* Status code */ UnpackedRecord *pIdxKey; /* Unpacked index key */ - char aSpace[150]; /* Temp space for pIdxKey - to avoid a malloc */ + char aSpace[200]; /* Temp space for pIdxKey - to avoid a malloc */ char *pFree = 0; if( pKey ){ @@ -694,6 +688,10 @@ static int btreeMoveto( ); if( pIdxKey==0 ) return SQLITE_NOMEM; sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey); + if( pIdxKey->nField==0 ){ + sqlite3DbFree(pCur->pKeyInfo->db, pFree); + return SQLITE_CORRUPT_BKPT; + } }else{ pIdxKey = 0; } @@ -724,6 +722,9 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ sqlite3_free(pCur->pKey); pCur->pKey = 0; assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_INVALID ); + if( pCur->skipNext && pCur->eState==CURSOR_VALID ){ + pCur->eState = CURSOR_SKIPNEXT; + } } return rc; } @@ -739,20 +740,32 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ ** at is deleted out from under them. ** ** This routine returns an error code if something goes wrong. The -** integer *pHasMoved is set to one if the cursor has moved and 0 if not. +** integer *pHasMoved is set as follows: +** +** 0: The cursor is unchanged +** 1: The cursor is still pointing at the same row, but the pointers +** returned by sqlite3BtreeKeyFetch() or sqlite3BtreeDataFetch() +** might now be invalid because of a balance() or other change to the +** b-tree. +** 2: The cursor is no longer pointing to the row. The row might have +** been deleted out from under the cursor. */ int sqlite3BtreeCursorHasMoved(BtCursor *pCur, int *pHasMoved){ int rc; + if( pCur->eState==CURSOR_VALID ){ + *pHasMoved = 0; + return SQLITE_OK; + } rc = restoreCursorPosition(pCur); if( rc ){ - *pHasMoved = 1; + *pHasMoved = 2; return rc; } - if( pCur->eState!=CURSOR_VALID || pCur->skipNext!=0 ){ - *pHasMoved = 1; + if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){ + *pHasMoved = 2; }else{ - *pHasMoved = 0; + *pHasMoved = 1; } return SQLITE_OK; } @@ -937,7 +950,8 @@ static void btreeParseCellPtr( assert( n==4-4*pPage->leaf ); if( pPage->intKey ){ if( pPage->hasData ){ - n += getVarint32(&pCell[n], nPayload); + assert( n==0 ); + n = getVarint32(pCell, nPayload); }else{ nPayload = 0; } @@ -1215,7 +1229,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ }else if( gap+2<=top ){ /* Search the freelist looking for a free slot big enough to satisfy ** the request. The allocation is made from the first free slot in - ** the list that is large enough to accomadate it. + ** the list that is large enough to accommodate it. */ int pc, addr; for(addr=hdr+1; (pc = get2byte(&data[addr]))>0; addr=pc){ @@ -1534,13 +1548,12 @@ static void zeroPage(MemPage *pPage, int flags){ memset(&data[hdr], 0, pBt->usableSize - hdr); } data[hdr] = (char)flags; - first = hdr + 8 + 4*((flags&PTF_LEAF)==0 ?1:0); + first = hdr + ((flags&PTF_LEAF)==0 ? 12 : 8); memset(&data[hdr+1], 0, 4); data[hdr+7] = 0; put2byte(&data[hdr+5], pBt->usableSize); pPage->nFree = (u16)(pBt->usableSize - first); decodeFlags(pPage, flags); - pPage->hdrOffset = hdr; pPage->cellOffset = first; pPage->aDataEnd = &data[pBt->usableSize]; pPage->aCellIdx = &data[first]; @@ -1581,15 +1594,12 @@ static int btreeGetPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ - int noContent, /* Do not load page content if true */ - int bReadonly /* True if a read-only (mmap) page is ok */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ ){ int rc; DbPage *pDbPage; - int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) - | (bReadonly ? PAGER_ACQUIRE_READONLY : 0); - assert( noContent==0 || bReadonly==0 ); + assert( flags==0 || flags==PAGER_GET_NOCONTENT || flags==PAGER_GET_READONLY ); assert( sqlite3_mutex_held(pBt->mutex) ); rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); if( rc ) return rc; @@ -1622,7 +1632,7 @@ static Pgno btreePagecount(BtShared *pBt){ u32 sqlite3BtreeLastPage(Btree *p){ assert( sqlite3BtreeHoldsMutex(p) ); assert( ((p->pBt->nPage)&0x8000000)==0 ); - return (int)btreePagecount(p->pBt); + return btreePagecount(p->pBt); } /* @@ -1637,16 +1647,17 @@ static int getAndInitPage( BtShared *pBt, /* The database file */ Pgno pgno, /* Number of the page to get */ MemPage **ppPage, /* Write the page pointer here */ - int bReadonly /* True if a read-only (mmap) page is ok */ + int bReadonly /* PAGER_GET_READONLY or 0 */ ){ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); + assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly); - if( rc==SQLITE_OK ){ + rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); + if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); @@ -1667,10 +1678,11 @@ static void releasePage(MemPage *pPage){ if( pPage ){ assert( pPage->aData ); assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnref(pPage->pDbPage); + sqlite3PagerUnrefNotNull(pPage->pDbPage); } } @@ -2050,6 +2062,18 @@ static int removeFromSharingList(BtShared *pBt){ static void allocateTempSpace(BtShared *pBt){ if( !pBt->pTmpSpace ){ pBt->pTmpSpace = sqlite3PageMalloc( pBt->pageSize ); + + /* One of the uses of pBt->pTmpSpace is to format cells before + ** inserting them into a leaf page (function fillInCell()). If + ** a cell is less than 4 bytes in size, it is rounded up to 4 bytes + ** by the various routines that manipulate binary cells. Which + ** can mean that fillInCell() only initializes the first 2 or 3 + ** bytes of pTmpSpace, but that the first 4 bytes are copied from + ** it into a database page. This is not actually a problem, but it + ** does cause a valgrind error when the 1 or 2 bytes of unitialized + ** data is passed to system call write(). So to avoid this error, + ** zero the first 4 bytes of temp space here. */ + if( pBt->pTmpSpace ) memset(pBt->pTmpSpace, 0, 4); } } @@ -2143,6 +2167,7 @@ int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ return SQLITE_OK; } +#if SQLITE_MAX_MMAP_SIZE>0 /* ** Change the limit on the amount of the database file that may be ** memory mapped. @@ -2155,6 +2180,7 @@ int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ sqlite3BtreeLeave(p); return SQLITE_OK; } +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ /* ** Change the way data is synced to disk in order to increase or decrease @@ -2165,17 +2191,14 @@ int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ ** probability of damage to near zero but with a write performance reduction. */ #ifndef SQLITE_OMIT_PAGER_PRAGMAS -int sqlite3BtreeSetSafetyLevel( +int sqlite3BtreeSetPagerFlags( Btree *p, /* The btree to set the safety level on */ - int level, /* PRAGMA synchronous. 1=OFF, 2=NORMAL, 3=FULL */ - int fullSync, /* PRAGMA fullfsync. */ - int ckptFullSync /* PRAGMA checkpoint_fullfync */ + unsigned pgFlags /* Various PAGER_* flags */ ){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); - assert( level>=1 && level<=3 ); sqlite3BtreeEnter(p); - sqlite3PagerSetSafetyLevel(pBt->pPager, level, fullSync, ckptFullSync); + sqlite3PagerSetFlags(pBt->pPager, pgFlags); sqlite3BtreeLeave(p); return SQLITE_OK; } @@ -2381,7 +2404,7 @@ static int lockBtree(BtShared *pBt){ assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; - rc = btreeGetPage(pBt, 1, &pPage1, 0, 0); + rc = btreeGetPage(pBt, 1, &pPage1, 0); if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is @@ -2534,7 +2557,8 @@ static int countValidCursors(BtShared *pBt, int wrOnly){ BtCursor *pCur; int r = 0; for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){ - if( (wrOnly==0 || pCur->wrFlag) && pCur->eState!=CURSOR_FAULT ) r++; + if( (wrOnly==0 || (pCur->curFlags & BTCF_WriteFlag)!=0) + && pCur->eState!=CURSOR_FAULT ) r++; } return r; } @@ -2668,7 +2692,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){ goto trans_begun; } - assert( IfNotOmitAV(pBt->bDoTruncate)==0 ); + assert( pBt->inTransaction==TRANS_WRITE || IfNotOmitAV(pBt->bDoTruncate)==0 ); /* Write transactions are not possible on a read-only database */ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 && wrflag ){ @@ -2963,7 +2987,7 @@ static int relocatePage( ** iPtrPage. */ if( eType!=PTRMAP_ROOTPAGE ){ - rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0); + rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -3047,7 +3071,7 @@ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ - rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0); + rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -3158,7 +3182,7 @@ int sqlite3BtreeIncrVacuum(Btree *p){ /* ** This routine is called prior to sqlite3PagerCommit when a transaction -** is commited for an auto-vacuum database. +** is committed for an auto-vacuum database. ** ** If SQLITE_OK is returned, then *pnTrunc is set to the number of pages ** the database file should be truncated to during the commit process. @@ -3273,12 +3297,13 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zMaster){ */ static void btreeEndTransaction(Btree *p){ BtShared *pBt = p->pBt; + sqlite3 *db = p->db; assert( sqlite3BtreeHoldsMutex(p) ); #ifndef SQLITE_OMIT_AUTOVACUUM pBt->bDoTruncate = 0; #endif - if( p->inTrans>TRANS_NONE && p->db->activeVdbeCnt>1 ){ + if( p->inTrans>TRANS_NONE && db->nVdbeRead>1 ){ /* If there are other active statements that belong to this database ** handle, downgrade to a read-only transaction. The other statements ** may still be reading from the database. */ @@ -3445,7 +3470,7 @@ int sqlite3BtreeRollback(Btree *p, int tripCode){ /* The rollback may have destroyed the pPage1->aData value. So ** call btreeGetPage() on page 1 again to make ** sure pPage1->aData is set correctly. */ - if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){ + if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ int nPage = get4byte(28+(u8*)pPage1->aData); testcase( nPage==0 ); if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); @@ -3608,14 +3633,14 @@ static int btreeCursor( pCur->pKeyInfo = pKeyInfo; pCur->pBtree = p; pCur->pBt = pBt; - pCur->wrFlag = (u8)wrFlag; + assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); + pCur->curFlags = wrFlag; pCur->pNext = pBt->pCursor; if( pCur->pNext ){ pCur->pNext->pPrev = pCur; } pBt->pCursor = pCur; pCur->eState = CURSOR_INVALID; - pCur->cachedRowid = 0; return SQLITE_OK; } int sqlite3BtreeCursor( @@ -3656,36 +3681,6 @@ void sqlite3BtreeCursorZero(BtCursor *p){ memset(p, 0, offsetof(BtCursor, iPage)); } -/* -** Set the cached rowid value of every cursor in the same database file -** as pCur and having the same root page number as pCur. The value is -** set to iRowid. -** -** Only positive rowid values are considered valid for this cache. -** The cache is initialized to zero, indicating an invalid cache. -** A btree will work fine with zero or negative rowids. We just cannot -** cache zero or negative rowids, which means tables that use zero or -** negative rowids might run a little slower. But in practice, zero -** or negative rowids are very uncommon so this should not be a problem. -*/ -void sqlite3BtreeSetCachedRowid(BtCursor *pCur, sqlite3_int64 iRowid){ - BtCursor *p; - for(p=pCur->pBt->pCursor; p; p=p->pNext){ - if( p->pgnoRoot==pCur->pgnoRoot ) p->cachedRowid = iRowid; - } - assert( pCur->cachedRowid==iRowid ); -} - -/* -** Return the cached rowid for the given cursor. A negative or zero -** return value indicates that the rowid cache is invalid and should be -** ignored. If the rowid cache has never before been set, then a -** zero is returned. -*/ -sqlite3_int64 sqlite3BtreeGetCachedRowid(BtCursor *pCur){ - return pCur->cachedRowid; -} - /* ** Close a cursor. The read lock on the database file is released ** when the last cursor is closed. @@ -3709,7 +3704,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ releasePage(pCur->apPage[i]); } unlockBtreeIfUnused(pBt); - invalidateOverflowCache(pCur); + sqlite3DbFree(pBtree->db, pCur->aOverflow); /* sqlite3_free(pCur); */ sqlite3BtreeLeave(pBtree); } @@ -3737,7 +3732,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ int iPage = pCur->iPage; memset(&info, 0, sizeof(info)); btreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info); - assert( memcmp(&info, &pCur->info, sizeof(info))==0 ); + assert( CORRUPT_DB || memcmp(&info, &pCur->info, sizeof(info))==0 ); } #else #define assertCellInfo(x) @@ -3748,7 +3743,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ if( pCur->info.nSize==0 ){ int iPage = pCur->iPage; btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); - pCur->validNKey = 1; + pCur->curFlags |= BTCF_ValidNKey; }else{ assertCellInfo(pCur); } @@ -3758,8 +3753,8 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ #define getCellInfo(pCur) \ if( pCur->info.nSize==0 ){ \ int iPage = pCur->iPage; \ - btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ - pCur->validNKey = 1; \ + btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ + pCur->curFlags |= BTCF_ValidNKey; \ }else{ \ assertCellInfo(pCur); \ } @@ -3880,7 +3875,7 @@ static int getOverflowPage( assert( next==0 || rc==SQLITE_DONE ); if( rc==SQLITE_OK ){ - rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0)); + rc = btreeGetPage(pBt, ovfl, &pPage, (ppPage==0) ? PAGER_GET_READONLY : 0); assert( rc==SQLITE_OK || pPage==0 ); if( rc==SQLITE_OK ){ next = get4byte(pPage->aData); @@ -3930,10 +3925,12 @@ static int copyPayload( /* ** This function is used to read or overwrite payload information -** for the entry that the pCur cursor is pointing to. If the eOp -** parameter is 0, this is a read operation (data copied into -** buffer pBuf). If it is non-zero, a write (data copied from -** buffer pBuf). +** for the entry that the pCur cursor is pointing to. The eOp +** argument is interpreted as follows: +** +** 0: The operation is a read. Populate the overflow cache. +** 1: The operation is a write. Populate the overflow cache. +** 2: The operation is a read. Do not populate the overflow cache. ** ** A total of "amt" bytes are read or written beginning at "offset". ** Data is read to or from the buffer pBuf. @@ -3941,11 +3938,11 @@ static int copyPayload( ** The content being read or written might appear on the main page ** or be scattered out on multiple overflow pages. ** -** If the BtCursor.isIncrblobHandle flag is set, and the current -** cursor entry uses one or more overflow pages, this function -** allocates space for and lazily popluates the overflow page-list -** cache array (BtCursor.aOverflow). Subsequent calls use this -** cache to make seeking to the supplied offset more efficient. +** If the current cursor entry uses one or more overflow pages and the +** eOp argument is not 2, this function may allocate space for and lazily +** popluates the overflow page-list cache array (BtCursor.aOverflow). +** Subsequent calls use this cache to make seeking to the supplied offset +** more efficient. ** ** Once an overflow page-list cache has been allocated, it may be ** invalidated if some other cursor writes to the same table, or if @@ -3969,15 +3966,22 @@ static int accessPayload( int iIdx = 0; MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ +#ifdef SQLITE_DIRECT_OVERFLOW_READ + int bEnd; /* True if reading to end of data */ +#endif assert( pPage ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->aiIdx[pCur->iPage]nCell ); assert( cursorHoldsMutex(pCur) ); + assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */ getCellInfo(pCur); aPayload = pCur->info.pCell + pCur->info.nHeader; nKey = (pPage->intKey ? 0 : (int)pCur->info.nKey); +#ifdef SQLITE_DIRECT_OVERFLOW_READ + bEnd = (offset+amt==nKey+pCur->info.nData); +#endif if( NEVER(offset+amt > nKey+pCur->info.nData) || &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] @@ -3992,7 +3996,7 @@ static int accessPayload( if( a+offset>pCur->info.nLocal ){ a = pCur->info.nLocal - offset; } - rc = copyPayload(&aPayload[offset], pBuf, a, eOp, pPage->pDbPage); + rc = copyPayload(&aPayload[offset], pBuf, a, (eOp & 0x01), pPage->pDbPage); offset = 0; pBuf += a; amt -= a; @@ -4006,21 +4010,30 @@ static int accessPayload( nextPage = get4byte(&aPayload[pCur->info.nLocal]); -#ifndef SQLITE_OMIT_INCRBLOB - /* If the isIncrblobHandle flag is set and the BtCursor.aOverflow[] - ** has not been allocated, allocate it now. The array is sized at - ** one entry for each overflow page in the overflow chain. The - ** page number of the first overflow page is stored in aOverflow[0], - ** etc. A value of 0 in the aOverflow[] array means "not yet known" - ** (the cache is lazily populated). + /* If the BtCursor.aOverflow[] has not been allocated, allocate it now. + ** Except, do not allocate aOverflow[] for eOp==2. + ** + ** The aOverflow[] array is sized at one entry for each overflow page + ** in the overflow chain. The page number of the first overflow page is + ** stored in aOverflow[0], etc. A value of 0 in the aOverflow[] array + ** means "not yet known" (the cache is lazily populated). */ - if( pCur->isIncrblobHandle && !pCur->aOverflow ){ + if( eOp!=2 && (pCur->curFlags & BTCF_ValidOvfl)==0 ){ int nOvfl = (pCur->info.nPayload-pCur->info.nLocal+ovflSize-1)/ovflSize; - pCur->aOverflow = (Pgno *)sqlite3MallocZero(sizeof(Pgno)*nOvfl); - /* nOvfl is always positive. If it were zero, fetchPayload would have - ** been used instead of this routine. */ - if( ALWAYS(nOvfl) && !pCur->aOverflow ){ - rc = SQLITE_NOMEM; + if( nOvfl>pCur->nOvflAlloc ){ + Pgno *aNew = (Pgno*)sqlite3DbRealloc( + pCur->pBtree->db, pCur->aOverflow, nOvfl*2*sizeof(Pgno) + ); + if( aNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pCur->nOvflAlloc = nOvfl*2; + pCur->aOverflow = aNew; + } + } + if( rc==SQLITE_OK ){ + memset(pCur->aOverflow, 0, nOvfl*sizeof(Pgno)); + pCur->curFlags |= BTCF_ValidOvfl; } } @@ -4028,22 +4041,19 @@ static int accessPayload( ** entry for the first required overflow page is valid, skip ** directly to it. */ - if( pCur->aOverflow && pCur->aOverflow[offset/ovflSize] ){ + if( (pCur->curFlags & BTCF_ValidOvfl)!=0 && pCur->aOverflow[offset/ovflSize] ){ iIdx = (offset/ovflSize); nextPage = pCur->aOverflow[iIdx]; offset = (offset%ovflSize); } -#endif for( ; rc==SQLITE_OK && amt>0 && nextPage; iIdx++){ -#ifndef SQLITE_OMIT_INCRBLOB /* If required, populate the overflow page-list cache. */ - if( pCur->aOverflow ){ + if( (pCur->curFlags & BTCF_ValidOvfl)!=0 ){ assert(!pCur->aOverflow[iIdx] || pCur->aOverflow[iIdx]==nextPage); pCur->aOverflow[iIdx] = nextPage; } -#endif if( offset>=ovflSize ){ /* The only reason to read this page is to obtain the page @@ -4051,13 +4061,17 @@ static int accessPayload( ** data is not required. So first try to lookup the overflow ** page-list cache, if any, then fall back to the getOverflowPage() ** function. + ** + ** Note that the aOverflow[] array must be allocated because eOp!=2 + ** here. If eOp==2, then offset==0 and this branch is never taken. */ -#ifndef SQLITE_OMIT_INCRBLOB - if( pCur->aOverflow && pCur->aOverflow[iIdx+1] ){ + assert( eOp!=2 ); + assert( pCur->curFlags & BTCF_ValidOvfl ); + if( pCur->aOverflow[iIdx+1] ){ nextPage = pCur->aOverflow[iIdx+1]; - } else -#endif + }else{ rc = getOverflowPage(pBt, nextPage, 0, &nextPage); + } offset -= ovflSize; }else{ /* Need to read this page properly. It contains some of the @@ -4079,13 +4093,15 @@ static int accessPayload( ** 3) the database is file-backed, and ** 4) there is no open write-transaction, and ** 5) the database is not a WAL database, + ** 6) all data from the page is being read. ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds ** up loading large records that span many overflow pages. */ - if( eOp==0 /* (1) */ + if( (eOp&0x01)==0 /* (1) */ && offset==0 /* (2) */ + && (bEnd || a==ovflSize) /* (6) */ && pBt->inTransaction==TRANS_READ /* (4) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ && pBt->pPage1->aData[19]==0x01 /* (5) */ @@ -4102,12 +4118,12 @@ static int accessPayload( { DbPage *pDbPage; rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, - (eOp==0 ? PAGER_ACQUIRE_READONLY : 0) + ((eOp&0x01)==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); - rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); + rc = copyPayload(&aPayload[offset+4], pBuf, a, (eOp&0x01), pDbPage); sqlite3PagerUnref(pDbPage); offset = 0; } @@ -4176,10 +4192,10 @@ int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ /* ** Return a pointer to payload information from the entry that the ** pCur cursor is pointing to. The pointer is to the beginning of -** the key if skipKey==0 and it points to the beginning of data if -** skipKey==1. The number of bytes of available key/data is written -** into *pAmt. If *pAmt==0, then the value returned will not be -** a valid pointer. +** the key if index btrees (pPage->intKey==0) and is the data for +** table btrees (pPage->intKey==1). The number of bytes of available +** key/data is written into *pAmt. If *pAmt==0, then the value +** returned will not be a valid pointer. ** ** This routine is an optimization. It is common for the entire key ** and data to fit on the local page and for there to be no overflow @@ -4192,41 +4208,18 @@ int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ ** page of the database. The data might change or move the next time ** any btree routine is called. */ -static const unsigned char *fetchPayload( +static const void *fetchPayload( BtCursor *pCur, /* Cursor pointing to entry to read from */ - int *pAmt, /* Write the number of available bytes here */ - int skipKey /* read beginning at data if this is true */ + u32 *pAmt /* Write the number of available bytes here */ ){ - unsigned char *aPayload; - MemPage *pPage; - u32 nKey; - u32 nLocal; - assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]); assert( pCur->eState==CURSOR_VALID ); + assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); assert( cursorHoldsMutex(pCur) ); - pPage = pCur->apPage[pCur->iPage]; - assert( pCur->aiIdx[pCur->iPage]nCell ); - if( NEVER(pCur->info.nSize==0) ){ - btreeParseCell(pCur->apPage[pCur->iPage], pCur->aiIdx[pCur->iPage], - &pCur->info); - } - aPayload = pCur->info.pCell; - aPayload += pCur->info.nHeader; - if( pPage->intKey ){ - nKey = 0; - }else{ - nKey = (int)pCur->info.nKey; - } - if( skipKey ){ - aPayload += nKey; - nLocal = pCur->info.nLocal - nKey; - }else{ - nLocal = pCur->info.nLocal; - assert( nLocal<=nKey ); - } - *pAmt = nLocal; - return aPayload; + assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); + assert( pCur->info.nSize>0 ); + *pAmt = pCur->info.nLocal; + return (void*)(pCur->info.pCell + pCur->info.nHeader); } @@ -4244,23 +4237,11 @@ static const unsigned char *fetchPayload( ** These routines is used to get quick access to key and data ** in the common case where no overflow pages are used. */ -const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){ - const void *p = 0; - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert( cursorHoldsMutex(pCur) ); - if( ALWAYS(pCur->eState==CURSOR_VALID) ){ - p = (const void*)fetchPayload(pCur, pAmt, 0); - } - return p; +const void *sqlite3BtreeKeyFetch(BtCursor *pCur, u32 *pAmt){ + return fetchPayload(pCur, pAmt); } -const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){ - const void *p = 0; - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - assert( cursorHoldsMutex(pCur) ); - if( ALWAYS(pCur->eState==CURSOR_VALID) ){ - p = (const void*)fetchPayload(pCur, pAmt, 1); - } - return p; +const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ + return fetchPayload(pCur, pAmt); } @@ -4286,14 +4267,15 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0)); + rc = getAndInitPage(pBt, newPgno, &pNewPage, + (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); if( rc ) return rc; pCur->apPage[i+1] = pNewPage; pCur->aiIdx[i+1] = 0; pCur->iPage++; pCur->info.nSize = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ return SQLITE_CORRUPT_BKPT; } @@ -4351,7 +4333,7 @@ static void moveToParent(BtCursor *pCur){ releasePage(pCur->apPage[pCur->iPage]); pCur->iPage--; pCur->info.nSize = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); } /* @@ -4378,8 +4360,6 @@ static void moveToParent(BtCursor *pCur){ static int moveToRoot(BtCursor *pCur){ MemPage *pRoot; int rc = SQLITE_OK; - Btree *p = pCur->pBtree; - BtShared *pBt = p->pBt; assert( cursorHoldsMutex(pCur) ); assert( CURSOR_INVALID < CURSOR_REQUIRESEEK ); @@ -4394,55 +4374,51 @@ static int moveToRoot(BtCursor *pCur){ } if( pCur->iPage>=0 ){ - int i; - for(i=1; i<=pCur->iPage; i++){ - releasePage(pCur->apPage[i]); - } - pCur->iPage = 0; + while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ - rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], pCur->wrFlag==0); + rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], + (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } pCur->iPage = 0; - - /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor - ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is - ** NULL, the caller expects a table b-tree. If this is not the case, - ** return an SQLITE_CORRUPT error. */ - assert( pCur->apPage[0]->intKey==1 || pCur->apPage[0]->intKey==0 ); - if( (pCur->pKeyInfo==0)!=pCur->apPage[0]->intKey ){ - return SQLITE_CORRUPT_BKPT; - } } - - /* Assert that the root page is of the correct type. This must be the - ** case as the call to this function that loaded the root-page (either - ** this call or a previous invocation) would have detected corruption - ** if the assumption were not true, and it is not possible for the flags - ** byte to have been modified while this cursor is holding a reference - ** to the page. */ pRoot = pCur->apPage[0]; assert( pRoot->pgno==pCur->pgnoRoot ); - assert( pRoot->isInit && (pCur->pKeyInfo==0)==pRoot->intKey ); + + /* If pCur->pKeyInfo is not NULL, then the caller that opened this cursor + ** expected to open it on an index b-tree. Otherwise, if pKeyInfo is + ** NULL, the caller expects a table b-tree. If this is not the case, + ** return an SQLITE_CORRUPT error. + ** + ** Earlier versions of SQLite assumed that this test could not fail + ** if the root page was already loaded when this function was called (i.e. + ** if pCur->iPage>=0). But this is not so if the database is corrupted + ** in such a way that page pRoot is linked into a second b-tree table + ** (or the freelist). */ + assert( pRoot->intKey==1 || pRoot->intKey==0 ); + if( pRoot->isInit==0 || (pCur->pKeyInfo==0)!=pRoot->intKey ){ + return SQLITE_CORRUPT_BKPT; + } pCur->aiIdx[0] = 0; pCur->info.nSize = 0; - pCur->atLast = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidNKey|BTCF_ValidOvfl); - if( pRoot->nCell==0 && !pRoot->leaf ){ + if( pRoot->nCell>0 ){ + pCur->eState = CURSOR_VALID; + }else if( !pRoot->leaf ){ Pgno subpage; if( pRoot->pgno!=1 ) return SQLITE_CORRUPT_BKPT; subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]); pCur->eState = CURSOR_VALID; rc = moveToChild(pCur, subpage); }else{ - pCur->eState = ((pRoot->nCell>0)?CURSOR_VALID:CURSOR_INVALID); + pCur->eState = CURSOR_INVALID; } return rc; } @@ -4494,7 +4470,7 @@ static int moveToRightmost(BtCursor *pCur){ if( rc==SQLITE_OK ){ pCur->aiIdx[pCur->iPage] = pPage->nCell-1; pCur->info.nSize = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~BTCF_ValidNKey; } return rc; } @@ -4533,7 +4509,7 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); /* If the cursor already points to the last entry, this is a no-op. */ - if( CURSOR_VALID==pCur->eState && pCur->atLast ){ + if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){ #ifdef SQLITE_DEBUG /* This block serves to assert() that the cursor really does point ** to the last entry in the b-tree. */ @@ -4556,7 +4532,12 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){ assert( pCur->eState==CURSOR_VALID ); *pRes = 0; rc = moveToRightmost(pCur); - pCur->atLast = rc==SQLITE_OK ?1:0; + if( rc==SQLITE_OK ){ + pCur->curFlags |= BTCF_AtLast; + }else{ + pCur->curFlags &= ~BTCF_AtLast; + } + } } return rc; @@ -4598,6 +4579,7 @@ int sqlite3BtreeMovetoUnpacked( int *pRes /* Write search results here */ ){ int rc; + RecordCompare xRecordCompare; assert( cursorHoldsMutex(pCur) ); assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); @@ -4606,19 +4588,30 @@ int sqlite3BtreeMovetoUnpacked( /* If the cursor is already positioned at the point we are trying ** to move to, then just return without doing any work */ - if( pCur->eState==CURSOR_VALID && pCur->validNKey + if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 && pCur->apPage[0]->intKey ){ if( pCur->info.nKey==intKey ){ *pRes = 0; return SQLITE_OK; } - if( pCur->atLast && pCur->info.nKeycurFlags & BTCF_AtLast)!=0 && pCur->info.nKeyisCorrupt = 0; + assert( pIdxKey->default_rc==1 + || pIdxKey->default_rc==0 + || pIdxKey->default_rc==-1 + ); + }else{ + xRecordCompare = 0; /* All keys are integers */ + } + rc = moveToRoot(pCur); if( rc ){ return rc; @@ -4633,10 +4626,10 @@ int sqlite3BtreeMovetoUnpacked( } assert( pCur->apPage[0]->intKey || pIdxKey ); for(;;){ - int lwr, upr, idx; + int lwr, upr, idx, c; Pgno chldPg; MemPage *pPage = pCur->apPage[pCur->iPage]; - int c; + u8 *pCell; /* Pointer to current cell in pPage */ /* pPage->nCell must be greater than zero. If this is the root-page ** the cursor would have been INVALID above and this for(;;) loop @@ -4648,35 +4641,47 @@ int sqlite3BtreeMovetoUnpacked( assert( pPage->intKey==(pIdxKey==0) ); lwr = 0; upr = pPage->nCell-1; - if( biasRight ){ - pCur->aiIdx[pCur->iPage] = (u16)(idx = upr); - }else{ - pCur->aiIdx[pCur->iPage] = (u16)(idx = (upr+lwr)/2); - } - for(;;){ - u8 *pCell; /* Pointer to current cell in pPage */ - - assert( idx==pCur->aiIdx[pCur->iPage] ); - pCur->info.nSize = 0; - pCell = findCell(pPage, idx) + pPage->childPtrSize; - if( pPage->intKey ){ + assert( biasRight==0 || biasRight==1 ); + idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */ + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( xRecordCompare==0 ){ + for(;;){ i64 nCellKey; + pCell = findCell(pPage, idx) + pPage->childPtrSize; if( pPage->hasData ){ - u32 dummy; - pCell += getVarint32(pCell, dummy); + while( 0x80 <= *(pCell++) ){ + if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; + } } getVarint(pCell, (u64*)&nCellKey); - if( nCellKey==intKey ){ - c = 0; - }else if( nCellKeyupr ){ c = -1; break; } + }else if( nCellKey>intKey ){ + upr = idx-1; + if( lwr>upr ){ c = +1; break; } }else{ - assert( nCellKey>intKey ); - c = +1; + assert( nCellKey==intKey ); + pCur->curFlags |= BTCF_ValidNKey; + pCur->info.nKey = nCellKey; + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( !pPage->leaf ){ + lwr = idx; + goto moveto_next_layer; + }else{ + *pRes = 0; + rc = SQLITE_OK; + goto moveto_finish; + } } - pCur->validNKey = 1; - pCur->info.nKey = nCellKey; - }else{ + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */ + } + }else{ + for(;;){ + int nCell; + pCell = findCell(pPage, idx) + pPage->childPtrSize; + /* The maximum supported page-size is 65536 bytes. This means that ** the maximum number of record bytes stored on an index B-Tree ** page is less than 16384 bytes and may be stored as a 2-byte @@ -4685,23 +4690,20 @@ int sqlite3BtreeMovetoUnpacked( ** stored entirely within the b-tree page by inspecting the first ** 2 bytes of the cell. */ - int nCell = pCell[0]; - if( nCell<=pPage->max1bytePayload - /* && (pCell+nCell)aDataEnd */ - ){ + nCell = pCell[0]; + if( nCell<=pPage->max1bytePayload ){ /* This branch runs if the record-size field of the cell is a ** single byte varint and the record fits entirely on the main ** b-tree page. */ testcase( pCell+nCell+1==pPage->aDataEnd ); - c = sqlite3VdbeRecordCompare(nCell, (void*)&pCell[1], pIdxKey); + c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey, 0); }else if( !(pCell[1] & 0x80) && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal - /* && (pCell+nCell+2)<=pPage->aDataEnd */ ){ /* The record-size field is a 2 byte varint and the record ** fits entirely on the main b-tree page. */ testcase( pCell+nCell+2==pPage->aDataEnd ); - c = sqlite3VdbeRecordCompare(nCell, (void*)&pCell[2], pIdxKey); + c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey, 0); }else{ /* The record flows over onto one or more overflow pages. In ** this case the whole cell needs to be parsed, a buffer allocated @@ -4716,57 +4718,55 @@ int sqlite3BtreeMovetoUnpacked( rc = SQLITE_NOMEM; goto moveto_finish; } - rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0); + pCur->aiIdx[pCur->iPage] = (u16)idx; + rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 2); if( rc ){ sqlite3_free(pCellKey); goto moveto_finish; } - c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey); + c = xRecordCompare(nCell, pCellKey, pIdxKey, 0); sqlite3_free(pCellKey); } - } - if( c==0 ){ - if( pPage->intKey && !pPage->leaf ){ - lwr = idx; - break; + assert( pIdxKey->isCorrupt==0 || c==0 ); + if( c<0 ){ + lwr = idx+1; + }else if( c>0 ){ + upr = idx-1; }else{ + assert( c==0 ); *pRes = 0; rc = SQLITE_OK; + pCur->aiIdx[pCur->iPage] = (u16)idx; + if( pIdxKey->isCorrupt ) rc = SQLITE_CORRUPT; goto moveto_finish; } + if( lwr>upr ) break; + assert( lwr+upr>=0 ); + idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */ } - if( c<0 ){ - lwr = idx+1; - }else{ - upr = idx-1; - } - if( lwr>upr ){ - break; - } - pCur->aiIdx[pCur->iPage] = (u16)(idx = (lwr+upr)/2); } assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) ); assert( pPage->isInit ); if( pPage->leaf ){ - chldPg = 0; - }else if( lwr>=pPage->nCell ){ - chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); - }else{ - chldPg = get4byte(findCell(pPage, lwr)); - } - if( chldPg==0 ){ assert( pCur->aiIdx[pCur->iPage]apPage[pCur->iPage]->nCell ); + pCur->aiIdx[pCur->iPage] = (u16)idx; *pRes = c; rc = SQLITE_OK; goto moveto_finish; } +moveto_next_layer: + if( lwr>=pPage->nCell ){ + chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]); + }else{ + chldPg = get4byte(findCell(pPage, lwr)); + } pCur->aiIdx[pCur->iPage] = (u16)lwr; - pCur->info.nSize = 0; - pCur->validNKey = 0; rc = moveToChild(pCur, chldPg); - if( rc ) goto moveto_finish; + if( rc ) break; } moveto_finish: + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); return rc; } @@ -4791,6 +4791,15 @@ int sqlite3BtreeEof(BtCursor *pCur){ ** successful then set *pRes=0. If the cursor ** was already pointing to the last entry in the database before ** this routine was called, then set *pRes=1. +** +** The calling function will set *pRes to 0 or 1. The initial *pRes value +** will be 1 if the cursor being stepped corresponds to an SQL index and +** if this routine could have been skipped if that SQL index had been +** a unique index. Otherwise the caller will have set *pRes to zero. +** Zero is the common case. The btree implementation is free to use the +** initial *pRes value as a hint to improve performance, but the current +** SQLite btree implementation does not. (Note that the comdb2 btree +** implementation does use this hint, however.) */ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ int rc; @@ -4798,21 +4807,31 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ MemPage *pPage; assert( cursorHoldsMutex(pCur) ); - rc = restoreCursorPosition(pCur); - if( rc!=SQLITE_OK ){ - return rc; - } assert( pRes!=0 ); - if( CURSOR_INVALID==pCur->eState ){ - *pRes = 1; - return SQLITE_OK; - } - if( pCur->skipNext>0 ){ - pCur->skipNext = 0; - *pRes = 0; - return SQLITE_OK; + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + if( pCur->eState!=CURSOR_VALID ){ + invalidateOverflowCache(pCur); + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + *pRes = 0; + return rc; + } + if( CURSOR_INVALID==pCur->eState ){ + *pRes = 1; + return SQLITE_OK; + } + if( pCur->skipNext ){ + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); + pCur->eState = CURSOR_VALID; + if( pCur->skipNext>0 ){ + pCur->skipNext = 0; + *pRes = 0; + return SQLITE_OK; + } + pCur->skipNext = 0; + } } - pCur->skipNext = 0; pPage = pCur->apPage[pCur->iPage]; idx = ++pCur->aiIdx[pCur->iPage]; @@ -4826,11 +4845,14 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ testcase( idx>pPage->nCell ); pCur->info.nSize = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); if( idx>=pPage->nCell ){ if( !pPage->leaf ){ rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); - if( rc ) return rc; + if( rc ){ + *pRes = 0; + return rc; + } rc = moveToLeftmost(pCur); *pRes = 0; return rc; @@ -4866,27 +4888,48 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ ** successful then set *pRes=0. If the cursor ** was already pointing to the first entry in the database before ** this routine was called, then set *pRes=1. +** +** The calling function will set *pRes to 0 or 1. The initial *pRes value +** will be 1 if the cursor being stepped corresponds to an SQL index and +** if this routine could have been skipped if that SQL index had been +** a unique index. Otherwise the caller will have set *pRes to zero. +** Zero is the common case. The btree implementation is free to use the +** initial *pRes value as a hint to improve performance, but the current +** SQLite btree implementation does not. (Note that the comdb2 btree +** implementation does use this hint, however.) */ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ int rc; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); - rc = restoreCursorPosition(pCur); - if( rc!=SQLITE_OK ){ - return rc; - } - pCur->atLast = 0; - if( CURSOR_INVALID==pCur->eState ){ - *pRes = 1; - return SQLITE_OK; - } - if( pCur->skipNext<0 ){ - pCur->skipNext = 0; - *pRes = 0; - return SQLITE_OK; + assert( pRes!=0 ); + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl); + if( pCur->eState!=CURSOR_VALID ){ + if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){ + rc = btreeRestoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + *pRes = 0; + return rc; + } + } + if( CURSOR_INVALID==pCur->eState ){ + *pRes = 1; + return SQLITE_OK; + } + if( pCur->skipNext ){ + assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT ); + pCur->eState = CURSOR_VALID; + if( pCur->skipNext<0 ){ + pCur->skipNext = 0; + *pRes = 0; + return SQLITE_OK; + } + pCur->skipNext = 0; + } } - pCur->skipNext = 0; pPage = pCur->apPage[pCur->iPage]; assert( pPage->isInit ); @@ -4894,6 +4937,7 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ int idx = pCur->aiIdx[pCur->iPage]; rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); if( rc ){ + *pRes = 0; return rc; } rc = moveToRightmost(pCur); @@ -4907,7 +4951,7 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ moveToParent(pCur); } pCur->info.nSize = 0; - pCur->validNKey = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); pCur->aiIdx[pCur->iPage]--; pPage = pCur->apPage[pCur->iPage]; @@ -5017,7 +5061,7 @@ static int allocateBtreePage( if( iTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); } if( rc ){ pTrunk = 0; @@ -5081,7 +5125,7 @@ static int allocateBtreePage( goto end_allocate_page; } testcase( iNewTrunk==mxPage ); - rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0); + rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } @@ -5160,8 +5204,8 @@ static int allocateBtreePage( memcpy(&aData[8+closest*4], &aData[4+k*4], 4); } put4byte(&aData[4], k-1); - noContent = !btreeGetHasContent(pBt, *pPgno); - rc = btreeGetPage(pBt, *pPgno, ppPage, noContent, 0); + noContent = !btreeGetHasContent(pBt, *pPgno) ? PAGER_GET_NOCONTENT : 0; + rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ @@ -5193,7 +5237,7 @@ static int allocateBtreePage( ** here are confined to those pages that lie between the end of the ** database image and the end of the database file. */ - int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate)); + int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate)) ? PAGER_GET_NOCONTENT : 0; rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc ) return rc; @@ -5209,7 +5253,7 @@ static int allocateBtreePage( MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0); + rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); @@ -5223,7 +5267,7 @@ static int allocateBtreePage( *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0); + rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ @@ -5240,6 +5284,7 @@ end_allocate_page: if( rc==SQLITE_OK ){ if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ releasePage(*ppPage); + *ppPage = 0; return SQLITE_CORRUPT_BKPT; } (*ppPage)->isInit = 0; @@ -5291,7 +5336,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ - if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) ) + if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) ){ goto freepage_out; @@ -5318,7 +5363,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ u32 nLeaf; /* Initial number of leaf cells on trunk page */ iTrunk = get4byte(&pPage1->aData[32]); - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); if( rc!=SQLITE_OK ){ goto freepage_out; } @@ -5364,7 +5409,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ ** first trunk in the free-list is full. Either way, the page being freed ** will become the new first trunk page in the free-list. */ - if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){ + if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ goto freepage_out; } rc = sqlite3PagerWrite(pPage->pDbPage); @@ -5501,7 +5546,7 @@ static int fillInCell( nHeader += 4; } if( pPage->hasData ){ - nHeader += putVarint(&pCell[nHeader], nData+nZero); + nHeader += putVarint32(&pCell[nHeader], nData+nZero); }else{ nData = nZero = 0; } @@ -5629,7 +5674,6 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ u32 pc; /* Offset to cell content of cell being deleted */ u8 *data; /* pPage->aData */ u8 *ptr; /* Used to move bytes around within data[] */ - u8 *endPtr; /* End of loop */ int rc; /* The return code */ int hdr; /* Beginning of the header. 0 most pages. 100 page 1 */ @@ -5654,13 +5698,8 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ *pRC = rc; return; } - endPtr = &pPage->aCellIdx[2*pPage->nCell - 2]; - assert( (SQLITE_PTR_TO_INT(ptr)&1)==0 ); /* ptr is always 2-byte aligned */ - while( ptrnCell--; + memmove(ptr, ptr+2, 2*(pPage->nCell - idx)); put2byte(&data[hdr+3], pPage->nCell); pPage->nFree += 2; } @@ -5697,15 +5736,13 @@ static void insertCell( int ins; /* Index in data[] where new cell pointer is inserted */ int cellOffset; /* Address of first cell pointer in data[] */ u8 *data; /* The content of the whole page */ - u8 *ptr; /* Used for moving information around in data[] */ - u8 *endPtr; /* End of the loop */ - int nSkip = (iChild ? 4 : 0); if( *pRC ) return; assert( i>=0 && i<=pPage->nCell+pPage->nOverflow ); - assert( pPage->nCell<=MX_CELL(pPage->pBt) && MX_CELL(pPage->pBt)<=10921 ); + assert( MX_CELL(pPage->pBt)<=10921 ); + assert( pPage->nCell<=MX_CELL(pPage->pBt) || CORRUPT_DB ); assert( pPage->nOverflow<=ArraySize(pPage->apOvfl) ); assert( ArraySize(pPage->apOvfl)==ArraySize(pPage->aiOvfl) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); @@ -5750,13 +5787,7 @@ static void insertCell( if( iChild ){ put4byte(&data[idx], iChild); } - ptr = &data[end]; - endPtr = &data[ins]; - assert( (SQLITE_PTR_TO_INT(ptr)&1)==0 ); /* ptr is always 2-byte aligned */ - while( ptr>endPtr ){ - *(u16*)ptr = *(u16*)&ptr[-2]; - ptr -= 2; - } + memmove(&data[ins+2], &data[ins], end-ins); put2byte(&data[ins], idx); put2byte(&data[pPage->hdrOffset+3], pPage->nCell); #ifndef SQLITE_OMIT_AUTOVACUUM @@ -6946,7 +6977,7 @@ int sqlite3BtreeInsert( } assert( cursorHoldsMutex(pCur) ); - assert( pCur->wrFlag && pBt->inTransaction==TRANS_WRITE + assert( (pCur->curFlags & BTCF_WriteFlag)!=0 && pBt->inTransaction==TRANS_WRITE && (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); @@ -6971,11 +7002,17 @@ int sqlite3BtreeInsert( rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); if( rc ) return rc; - /* If this is an insert into a table b-tree, invalidate any incrblob - ** cursors open on the row being replaced (assuming this is a replace - ** operation - if it is not, the following is a no-op). */ if( pCur->pKeyInfo==0 ){ + /* If this is an insert into a table b-tree, invalidate any incrblob + ** cursors open on the row being replaced */ invalidateIncrblobCursors(p, nKey, 0); + + /* If the cursor is currently on the last row and we are appending a + ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() + ** call */ + if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ + loc = -1; + } } if( !loc ){ @@ -7026,7 +7063,7 @@ int sqlite3BtreeInsert( /* If no error has occurred and pPage has an overflow cell, call balance() ** to redistribute the cells within the tree. Since balance() may move - ** the cursor, zero the BtCursor.info.nSize and BtCursor.validNKey + ** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey ** variables. ** ** Previous versions of SQLite called moveToRoot() to move the cursor @@ -7045,8 +7082,8 @@ int sqlite3BtreeInsert( ** row without seeking the cursor. This can be a big performance boost. */ pCur->info.nSize = 0; - pCur->validNKey = 0; if( rc==SQLITE_OK && pPage->nOverflow ){ + pCur->curFlags &= ~(BTCF_ValidNKey); rc = balance(pCur); /* Must make sure nOverflow is reset to zero even if the balance() @@ -7078,7 +7115,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ assert( cursorHoldsMutex(pCur) ); assert( pBt->inTransaction==TRANS_WRITE ); assert( (pBt->btsFlags & BTS_READ_ONLY)==0 ); - assert( pCur->wrFlag ); + assert( pCur->curFlags & BTCF_WriteFlag ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); assert( !hasReadConflicts(p, pCur->pgnoRoot) ); @@ -7101,7 +7138,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ ** sub-tree headed by the child page of the cell being deleted. This makes ** balancing the tree following the delete operation easier. */ if( !pPage->leaf ){ - int notUsed; + int notUsed = 0; rc = sqlite3BtreePrevious(pCur, ¬Used); if( rc ) return rc; } @@ -7263,7 +7300,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ } /* Move the page currently at pgnoRoot to pgnoMove. */ - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -7284,7 +7321,7 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ if( rc!=SQLITE_OK ){ return rc; } - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -7354,6 +7391,7 @@ static int clearDatabasePage( int rc; unsigned char *pCell; int i; + int hdr; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ @@ -7362,6 +7400,7 @@ static int clearDatabasePage( rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; + hdr = pPage->hdrOffset; for(i=0; inCell; i++){ pCell = findCell(pPage, i); if( !pPage->leaf ){ @@ -7372,7 +7411,7 @@ static int clearDatabasePage( if( rc ) goto cleardatabasepage_out; } if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), 1, pnChange); + rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); if( rc ) goto cleardatabasepage_out; }else if( pnChange ){ assert( pPage->intKey ); @@ -7381,7 +7420,7 @@ static int clearDatabasePage( if( freePageFlag ){ freePage(pPage, &rc); }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){ - zeroPage(pPage, pPage->aData[0] | PTF_LEAF); + zeroPage(pPage, pPage->aData[hdr] | PTF_LEAF); } cleardatabasepage_out: @@ -7421,6 +7460,15 @@ int sqlite3BtreeClearTable(Btree *p, int iTable, int *pnChange){ return rc; } +/* +** Delete all information from the single table that pCur is open on. +** +** This routine only work for pCur on an ephemeral table. +*/ +int sqlite3BtreeClearTableOfCursor(BtCursor *pCur){ + return sqlite3BtreeClearTable(pCur->pBtree, pCur->pgnoRoot, 0); +} + /* ** Erase all information in a table and add the root of the table to ** the freelist. Except, the root of the principle table (the one on @@ -7462,7 +7510,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ return SQLITE_LOCKED_SHAREDCACHE; } - rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0); + rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); if( rc ) return rc; rc = sqlite3BtreeClearTable(p, iTable, 0); if( rc ){ @@ -7497,7 +7545,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ */ MemPage *pMove; releasePage(pPage); - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -7507,7 +7555,7 @@ static int btreeDropTable(Btree *p, Pgno iTable, int *piMoved){ return rc; } pMove = 0; - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); freePage(pMove, &rc); releasePage(pMove); if( rc!=SQLITE_OK ){ @@ -7718,11 +7766,11 @@ static void checkAppendMsg( sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); } if( zMsg1 ){ - sqlite3StrAccumAppend(&pCheck->errMsg, zMsg1, -1); + sqlite3StrAccumAppendAll(&pCheck->errMsg, zMsg1); } sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); va_end(ap); - if( pCheck->errMsg.mallocFailed ){ + if( pCheck->errMsg.accError==STRACCUM_NOMEM ){ pCheck->mallocFailed = 1; } } @@ -7919,7 +7967,7 @@ static int checkTreePage( usableSize = pBt->usableSize; if( iPage==0 ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0; - if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){ + if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ checkAppendMsg(pCheck, zContext, "unable to get the page. error code=%d", rc); return 0; @@ -8380,7 +8428,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ int rc; assert( cursorHoldsMutex(pCsr) ); assert( sqlite3_mutex_held(pCsr->pBtree->db->mutex) ); - assert( pCsr->isIncrblobHandle ); + assert( pCsr->curFlags & BTCF_Incrblob ); rc = restoreCursorPosition(pCsr); if( rc!=SQLITE_OK ){ @@ -8409,7 +8457,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ ** (d) there are no conflicting read-locks, and ** (e) the cursor points at a valid row of an intKey table. */ - if( !pCsr->wrFlag ){ + if( (pCsr->curFlags & BTCF_WriteFlag)==0 ){ return SQLITE_READONLY; } assert( (pCsr->pBt->btsFlags & BTS_READ_ONLY)==0 @@ -8422,20 +8470,10 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ } /* -** Set a flag on this cursor to cache the locations of pages from the -** overflow list for the current row. This is used by cursors opened -** for incremental blob IO only. -** -** This function sets a flag only. The actual page location cache -** (stored in BtCursor.aOverflow[]) is allocated and used by function -** accessPayload() (the worker function for sqlite3BtreeData() and -** sqlite3BtreePutData()). +** Mark this cursor as an incremental blob cursor. */ -void sqlite3BtreeCacheOverflow(BtCursor *pCur){ - assert( cursorHoldsMutex(pCur) ); - assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) ); - invalidateOverflowCache(pCur); - pCur->isIncrblobHandle = 1; +void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ + pCur->curFlags |= BTCF_Incrblob; } #endif @@ -8483,3 +8521,10 @@ void sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ assert( mask==BTREE_BULKLOAD || mask==0 ); pCsr->hints = mask; } + +/* +** Return true if the given Btree is read-only. +*/ +int sqlite3BtreeIsReadonly(Btree *p){ + return (p->pBt->btsFlags & BTS_READ_ONLY)!=0; +} -- cgit v1.2.3