From 569c6676a6ddb0ff73821d7693b5e18ddef809b9 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Thu, 16 Oct 2014 22:51:35 -0400 Subject: Imported Upstream version 3.2.0 --- src/os_unix.c | 287 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 188 insertions(+), 99 deletions(-) (limited to 'src/os_unix.c') diff --git a/src/os_unix.c b/src/os_unix.c index abc23a4..b1a0bed 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -46,13 +46,6 @@ #include "sqliteInt.h" #if SQLITE_OS_UNIX /* This file is used on unix only */ -/* Use posix_fallocate() if it is available -*/ -#if !defined(HAVE_POSIX_FALLOCATE) \ - && (_XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L) -# define HAVE_POSIX_FALLOCATE 1 -#endif - /* ** There are various methods for file locking used for concurrency ** control: @@ -90,32 +83,6 @@ # endif #endif -/* -** These #defines should enable >2GB file support on Posix if the -** underlying operating system supports it. If the OS lacks -** large file support, these should be no-ops. -** -** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch -** on the compiler command line. This is necessary if you are compiling -** on a recent machine (ex: RedHat 7.2) but you want your code to work -** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2 -** without this option, LFS is enable. But LFS does not exist in the kernel -** in RedHat 6.0, so the code won't work. Hence, for maximum binary -** portability you should omit LFS. -** -** The previous paragraph was written in 2005. (This paragraph is written -** on 2008-11-28.) These days, all Linux kernels support large files, so -** you should probably leave LFS enabled. But some embedded platforms might -** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful. -*/ -#ifndef SQLITE_DISABLE_LFS -# define _LARGE_FILE 1 -# ifndef _FILE_OFFSET_BITS -# define _FILE_OFFSET_BITS 64 -# endif -# define _LARGEFILE_SOURCE 1 -#endif - /* ** standard include files. */ @@ -127,11 +94,10 @@ #include #include #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 -#include +# include #endif - -#if SQLITE_ENABLE_LOCKING_STYLE +#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS # include # if OS_VXWORKS # include @@ -225,11 +191,13 @@ struct unixFile { const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ +#if SQLITE_MAX_MMAP_SIZE>0 int nFetchOut; /* Number of outstanding xFetch refs */ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ +#endif #ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ @@ -265,6 +233,12 @@ struct unixFile { #endif }; +/* This variable holds the process id (pid) from when the xRandomness() +** method was called. If xOpen() is called from a different process id, +** indicating that a fork() has occurred, the PRNG will be reset. +*/ +static int randomnessPid = 0; + /* ** Allowed values for the unixFile.ctrlFlags bitmask: */ @@ -343,11 +317,16 @@ static int posixOpen(const char *zFile, int flags, int mode){ ** we are not running as root. */ static int posixFchown(int fd, uid_t uid, gid_t gid){ +#if OS_VXWORKS + return 0; +#else return geteuid() ? 0 : fchown(fd,uid,gid); +#endif } /* Forward reference */ static int openDirectory(const char*, int*); +static int unixGetpagesize(void); /* ** Many system calls are accessed through pointer-to-functions so that @@ -398,7 +377,7 @@ static struct unix_syscall { { "read", (sqlite3_syscall_ptr)read, 0 }, #define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent) -#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE +#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS) { "pread", (sqlite3_syscall_ptr)pread, 0 }, #else { "pread", (sqlite3_syscall_ptr)0, 0 }, @@ -415,7 +394,7 @@ static struct unix_syscall { { "write", (sqlite3_syscall_ptr)write, 0 }, #define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent) -#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE +#if defined(USE_PREAD) || (SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS) { "pwrite", (sqlite3_syscall_ptr)pwrite, 0 }, #else { "pwrite", (sqlite3_syscall_ptr)0, 0 }, @@ -456,6 +435,7 @@ static struct unix_syscall { { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, #define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) @@ -468,6 +448,10 @@ static struct unix_syscall { { "mremap", (sqlite3_syscall_ptr)0, 0 }, #endif #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) + { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, +#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) + +#endif }; /* End of the overrideable system calls */ @@ -554,6 +538,15 @@ static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ return 0; } +/* +** Do not accept any file descriptor less than this value, in order to avoid +** opening database file using file descriptors that are commonly used for +** standard input, output, and error. +*/ +#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR +# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3 +#endif + /* ** Invoke open(). Do so multiple times, until it either succeeds or ** fails for some reason other than EINTR. @@ -574,13 +567,23 @@ static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ static int robust_open(const char *z, int f, mode_t m){ int fd; mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS; - do{ + while(1){ #if defined(O_CLOEXEC) fd = osOpen(z,f|O_CLOEXEC,m2); #else fd = osOpen(z,f,m2); #endif - }while( fd<0 && errno==EINTR ); + if( fd<0 ){ + if( errno==EINTR ) continue; + break; + } + if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break; + osClose(fd); + sqlite3_log(SQLITE_WARNING, + "attempt to open \"%s\" as file descriptor %d", z, fd); + fd = -1; + if( osOpen("/dev/null", f, m)<0 ) break; + } if( fd>=0 ){ if( m!=0 ){ struct stat statbuf; @@ -761,16 +764,6 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) { case EPERM: return SQLITE_PERM; - /* EDEADLK is only possible if a call to fcntl(F_SETLKW) is made. And - ** this module never makes such a call. And the code in SQLite itself - ** asserts that SQLITE_IOERR_BLOCKED is never returned. For these reasons - ** this case is also commented out. If the system does set errno to EDEADLK, - ** the default SQLITE_IOERR_XXX code will be returned. */ -#if 0 - case EDEADLK: - return SQLITE_IOERR_BLOCKED; -#endif - #if EOPNOTSUPP!=ENOTSUP case EOPNOTSUPP: /* something went terribly awry, unless during file system support @@ -1299,6 +1292,19 @@ static int findInodeInfo( return SQLITE_OK; } +/* +** Return TRUE if pFile has been renamed or unlinked since it was first opened. +*/ +static int fileHasMoved(unixFile *pFile){ +#if OS_VXWORKS + return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId; +#else + struct stat buf; + return pFile->pInode!=0 && + (osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino); +#endif +} + /* ** Check a unixFile that is a database. Verify the following: @@ -1333,10 +1339,7 @@ static void verifyDbFile(unixFile *pFile){ pFile->ctrlFlags |= UNIXFILE_WARNED; return; } - if( pFile->pInode!=0 - && ((rc = osStat(pFile->zPath, &buf))!=0 - || buf.st_ino!=pFile->pInode->fileId.ino) - ){ + if( fileHasMoved(pFile) ){ sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath); pFile->ctrlFlags |= UNIXFILE_WARNED; return; @@ -1874,12 +1877,16 @@ end_unlock: ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ +#if SQLITE_MAX_MMAP_SIZE>0 assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); +#endif return posixUnlock(id, eFileLock, 0); } +#if SQLITE_MAX_MMAP_SIZE>0 static int unixMapfile(unixFile *pFd, i64 nByte); static void unixUnmapfile(unixFile *pFd); +#endif /* ** This function performs the parts of the "close file" operation @@ -1893,7 +1900,9 @@ static void unixUnmapfile(unixFile *pFd); */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; +#if SQLITE_MAX_MMAP_SIZE>0 unixUnmapfile(pFile); +#endif if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; @@ -1906,6 +1915,13 @@ static int closeUnixFile(sqlite3_file *id){ vxworksReleaseFileId(pFile->pId); pFile->pId = 0; } +#endif +#ifdef SQLITE_UNLINK_AFTER_CLOSE + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ + osUnlink(pFile->zPath); + sqlite3_free(*(char**)&pFile->zPath); + pFile->zPath = 0; + } #endif OSTRACE(("CLOSE %-3d\n", pFile->h)); OpenCounter(-1); @@ -2429,7 +2445,6 @@ static int semCheckReservedLock(sqlite3_file *id, int *pResOut) { /* Otherwise see if some other process holds it. */ if( !reserved ){ sem_t *pSem = pFile->pInode->pSem; - struct stat statBuf; if( sem_trywait(pSem)==-1 ){ int tErrno = errno; @@ -2482,7 +2497,6 @@ static int semCheckReservedLock(sqlite3_file *id, int *pResOut) { */ static int semLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; - int fd; sem_t *pSem = pFile->pInode->pSem; int rc = SQLITE_OK; @@ -3098,6 +3112,7 @@ static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ #endif TIMER_START; assert( cnt==(cnt&0x1ffff) ); + assert( id->h>2 ); cnt &= 0x1ffff; do{ #if defined(USE_PREAD) @@ -3212,6 +3227,7 @@ static int seekAndWriteFd( int rc = 0; /* Value returned by system call */ assert( nBuf==(nBuf&0x1ffff) ); + assert( fd>2 ); nBuf &= 0x1ffff; TIMER_START; @@ -3597,6 +3613,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ } #endif +#if SQLITE_MAX_MMAP_SIZE>0 /* If the file was just truncated to a size smaller than the currently ** mapped region, reduce the effective mapping size as well. SQLite will ** use read() and write() to access data beyond this point from now on. @@ -3604,6 +3621,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ if( nBytemmapSize ){ pFile->mmapSize = nByte; } +#endif return SQLITE_OK; } @@ -3693,6 +3711,7 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ } } +#if SQLITE_MAX_MMAP_SIZE>0 if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){ int rc; if( pFile->szChunk<=0 ){ @@ -3705,6 +3724,7 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ rc = unixMapfile(pFile, nByte); return rc; } +#endif return SQLITE_OK; } @@ -3773,18 +3793,28 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } return SQLITE_OK; } + case SQLITE_FCNTL_HAS_MOVED: { + *(int*)pArg = fileHasMoved(pFile); + return SQLITE_OK; + } +#if SQLITE_MAX_MMAP_SIZE>0 case SQLITE_FCNTL_MMAP_SIZE: { i64 newLimit = *(i64*)pArg; + int rc = SQLITE_OK; if( newLimit>sqlite3GlobalConfig.mxMmap ){ newLimit = sqlite3GlobalConfig.mxMmap; } *(i64*)pArg = pFile->mmapSizeMax; - if( newLimit>=0 ){ + if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ pFile->mmapSizeMax = newLimit; - if( newLimitmmapSize ) pFile->mmapSize = newLimit; + if( pFile->mmapSize>0 ){ + unixUnmapfile(pFile); + rc = unixMapfile(pFile, -1); + } } - return SQLITE_OK; + return rc; } +#endif #ifdef SQLITE_DEBUG /* The pager calls this method to signal that it has done ** a rollback and that the database is therefore unchanged and @@ -3929,8 +3959,25 @@ static int unixDeviceCharacteristics(sqlite3_file *id){ return rc; } -#ifndef SQLITE_OMIT_WAL +#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 + +/* +** Return the system page size. +** +** This function should not be called directly by other code in this file. +** Instead, it should be called via macro osGetpagesize(). +*/ +static int unixGetpagesize(void){ +#if defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */ +#ifndef SQLITE_OMIT_WAL /* ** Object used to represent an shared memory buffer. @@ -4047,7 +4094,7 @@ static int unixShmSystemLock( #ifdef SQLITE_DEBUG { u16 mask; OSTRACE(("SHM-LOCK ")); - mask = (1<<(ofst+n)) - (1<31 ? 0xffff : (1<<(ofst+n)) - (1<pInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ + int nShmPerMap = unixShmRegionPerMap(); int i; assert( p->pInode==pFd->pInode ); sqlite3_mutex_free(p->mutex); - for(i=0; inRegion; i++){ + for(i=0; inRegion; i+=nShmPerMap){ if( p->h>=0 ){ osMunmap(p->apRegion[i], p->szRegion); }else{ @@ -4302,6 +4366,8 @@ static int unixShmMap( unixShm *p; unixShmNode *pShmNode; int rc = SQLITE_OK; + int nShmPerMap = unixShmRegionPerMap(); + int nReqRegion; /* If the shared-memory file has not yet been opened, open it now. */ if( pDbFd->pShm==0 ){ @@ -4317,9 +4383,12 @@ static int unixShmMap( assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); - if( pShmNode->nRegion<=iRegion ){ + /* Minimum number of regions required to be mapped. */ + nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; + + if( pShmNode->nRegionszRegion = szRegion; @@ -4368,17 +4437,19 @@ static int unixShmMap( /* Map the requested memory region into this processes address space. */ apNew = (char **)sqlite3_realloc( - pShmNode->apRegion, (iRegion+1)*sizeof(char *) + pShmNode->apRegion, nReqRegion*sizeof(char *) ); if( !apNew ){ rc = SQLITE_IOERR_NOMEM; goto shmpage_out; } pShmNode->apRegion = apNew; - while(pShmNode->nRegion<=iRegion){ + while( pShmNode->nRegionh>=0 ){ - pMem = osMmap(0, szRegion, + pMem = osMmap(0, nMap, pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); @@ -4394,8 +4465,11 @@ static int unixShmMap( } memset(pMem, 0, szRegion); } - pShmNode->apRegion[pShmNode->nRegion] = pMem; - pShmNode->nRegion++; + + for(i=0; iapRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; + } + pShmNode->nRegion += nShmPerMap; } } @@ -4595,37 +4669,20 @@ static int unixShmUnmap( # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ +#if SQLITE_MAX_MMAP_SIZE>0 /* ** If it is currently memory mapped, unmap file pFd. */ static void unixUnmapfile(unixFile *pFd){ assert( pFd->nFetchOut==0 ); -#if SQLITE_MAX_MMAP_SIZE>0 if( pFd->pMapRegion ){ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); pFd->pMapRegion = 0; pFd->mmapSize = 0; pFd->mmapSizeActual = 0; } -#endif } -#if SQLITE_MAX_MMAP_SIZE>0 -/* -** Return the system page size. -*/ -static int unixGetPagesize(void){ -#if HAVE_MREMAP - return 512; -#elif defined(_BSD_SOURCE) - return getpagesize(); -#else - return (int)sysconf(_SC_PAGESIZE); -#endif -} -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -#if SQLITE_MAX_MMAP_SIZE>0 /* ** Attempt to set the size of the memory mapping maintained by file ** descriptor pFd to nNew bytes. Any existing mapping is discarded. @@ -4662,8 +4719,12 @@ static void unixRemapfile( if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; if( pOrig ){ - const int szSyspage = unixGetPagesize(); +#if HAVE_MREMAP + i64 nReuse = pFd->mmapSize; +#else + const int szSyspage = osGetpagesize(); i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); +#endif u8 *pReq = &pOrig[nReuse]; /* Unmap any pages of the existing mapping that cannot be reused. */ @@ -4710,7 +4771,6 @@ static void unixRemapfile( pFd->pMapRegion = (void *)pNew; pFd->mmapSize = pFd->mmapSizeActual = nNew; } -#endif /* ** Memory map or remap the file opened by file-descriptor pFd (if the file @@ -4729,7 +4789,6 @@ static void unixRemapfile( ** code otherwise. */ static int unixMapfile(unixFile *pFd, i64 nByte){ -#if SQLITE_MAX_MMAP_SIZE>0 i64 nMap = nByte; int rc; @@ -4755,10 +4814,10 @@ static int unixMapfile(unixFile *pFd, i64 nByte){ unixUnmapfile(pFd); } } -#endif return SQLITE_OK; } +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ /* ** If possible, return a pointer to a mapping of file fd starting at offset @@ -4804,6 +4863,7 @@ static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ ** may now be invalid and should be unmapped. */ static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ +#if SQLITE_MAX_MMAP_SIZE>0 unixFile *pFd = (unixFile *)fd; /* The underlying database file */ UNUSED_PARAMETER(iOff); @@ -4822,6 +4882,11 @@ static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ } assert( pFd->nFetchOut>=0 ); +#else + UNUSED_PARAMETER(fd); + UNUSED_PARAMETER(p); + UNUSED_PARAMETER(iOff); +#endif return SQLITE_OK; } @@ -5153,7 +5218,9 @@ static int fillInUnixFile( pNew->pVfs = pVfs; pNew->zPath = zFilename; pNew->ctrlFlags = (u8)ctrlFlags; +#if SQLITE_MAX_MMAP_SIZE>0 pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap; +#endif if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), "psow", SQLITE_POWERSAFE_OVERWRITE) ){ pNew->ctrlFlags |= UNIXFILE_PSOW; @@ -5308,6 +5375,7 @@ static int fillInUnixFile( */ static const char *unixTempFileDir(void){ static const char *azDirs[] = { + 0, 0, 0, "/var/tmp", @@ -5320,7 +5388,8 @@ static const char *unixTempFileDir(void){ const char *zDir = 0; azDirs[0] = sqlite3_temp_directory; - if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR"); + if( !azDirs[1] ) azDirs[1] = getenv("SQLITE_TMPDIR"); + if( !azDirs[2] ) azDirs[2] = getenv("TMPDIR"); for(i=0; i