diff options
Diffstat (limited to 'src/os_unix.c')
-rw-r--r-- | src/os_unix.c | 384 |
1 files changed, 250 insertions, 134 deletions
diff --git a/src/os_unix.c b/src/os_unix.c index 0ea6daf..c85e9b5 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -123,6 +123,7 @@ #include <sys/mman.h> #endif + #if SQLITE_ENABLE_LOCKING_STYLE # include <sys/ioctl.h> # if OS_VXWORKS @@ -164,8 +165,8 @@ #endif /* - ** Default permissions when creating auto proxy dir - */ +** Default permissions when creating auto proxy dir +*/ #ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS # define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755 #endif @@ -206,10 +207,11 @@ struct UnixUnusedFd { typedef struct unixFile unixFile; struct unixFile { sqlite3_io_methods const *pMethod; /* Always the first entry */ + sqlite3_vfs *pVfs; /* The VFS that created this unixFile */ unixInodeInfo *pInode; /* Info about locks on this inode */ int h; /* The file descriptor */ unsigned char eFileLock; /* The type of lock held on this fd */ - unsigned char ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ + unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */ int lastErrno; /* The unix errno from last I/O error */ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ @@ -223,7 +225,6 @@ struct unixFile { unsigned fsFlags; /* cached details from statfs() */ #endif #if OS_VXWORKS - int isDelete; /* Delete on close if true */ struct vxworksFileId *pId; /* Unique file ID */ #endif #ifndef NDEBUG @@ -257,6 +258,11 @@ struct unixFile { #else # define UNIXFILE_DIRSYNC 0x00 #endif +#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ +#define UNIXFILE_DELETE 0x20 /* Delete on close */ +#define UNIXFILE_URI 0x40 /* Filename might have query parameters */ +#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */ +#define UNIXFILE_CHOWN 0x100 /* File ownership was changed */ /* ** Include code that is common to all os_*.c files @@ -407,6 +413,18 @@ static struct unix_syscall { { "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 }, #define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent) + { "mkdir", (sqlite3_syscall_ptr)mkdir, 0 }, +#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent) + + { "rmdir", (sqlite3_syscall_ptr)rmdir, 0 }, +#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) + + { "fchown", (sqlite3_syscall_ptr)fchown, 0 }, +#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + + { "umask", (sqlite3_syscall_ptr)umask, 0 }, +#define osUmask ((mode_t(*)(mode_t))aSyscall[21].pCurrent) + }; /* End of the overrideable system calls */ /* @@ -493,12 +511,46 @@ static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){ } /* -** Retry open() calls that fail due to EINTR +** Invoke open(). Do so multiple times, until it either succeeds or +** fails for some reason other than EINTR. +** +** If the file creation mode "m" is 0 then set it to the default for +** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally +** 0644) as modified by the system umask. If m is not 0, then +** make the file creation mode be exactly m ignoring the umask. +** +** The m parameter will be non-zero only when creating -wal, -journal, +** and -shm files. We want those files to have *exactly* the same +** permissions as their original database, unadulterated by the umask. +** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a +** transaction crashes and leaves behind hot journals, then any +** process that is able to write to the database will also be able to +** recover the hot journals. */ -static int robust_open(const char *z, int f, int m){ - int rc; - do{ rc = osOpen(z,f,m); }while( rc<0 && errno==EINTR ); - return rc; +static int robust_open(const char *z, int f, mode_t m){ + int fd; + mode_t m2; + mode_t origM = 0; + if( m==0 ){ + m2 = SQLITE_DEFAULT_FILE_PERMISSIONS; + }else{ + m2 = m; + origM = osUmask(0); + } + do{ +#if defined(O_CLOEXEC) + fd = osOpen(z,f|O_CLOEXEC,m2); +#else + fd = osOpen(z,f,m2); +#endif + }while( fd<0 && errno==EINTR ); + if( m ){ + osUmask(origM); + } +#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0) + if( fd>=0 ) osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); +#endif + return fd; } /* @@ -1756,7 +1808,7 @@ static int closeUnixFile(sqlite3_file *id){ } #if OS_VXWORKS if( pFile->pId ){ - if( pFile->isDelete ){ + if( pFile->ctrlFlags & UNIXFILE_DELETE ){ osUnlink(pFile->pId->zCanonicalName); } vxworksReleaseFileId(pFile->pId); @@ -1845,8 +1897,8 @@ static int nolockClose(sqlite3_file *id) { ************************* Begin dot-file Locking ****************************** ** ** The dotfile locking implementation uses the existance of separate lock -** files in order to control access to the database. This works on just -** about every filesystem imaginable. But there are serious downsides: +** files (really a directory) to control access to the database. This works +** on just about every filesystem imaginable. But there are serious downsides: ** ** (1) There is zero concurrency. A single reader blocks all other ** connections from reading or writing the database. @@ -1857,15 +1909,15 @@ static int nolockClose(sqlite3_file *id) { ** Nevertheless, a dotlock is an appropriate locking mode for use if no ** other locking strategy is available. ** -** Dotfile locking works by creating a file in the same directory as the -** database and with the same name but with a ".lock" extension added. -** The existance of a lock file implies an EXCLUSIVE lock. All other lock -** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. +** Dotfile locking works by creating a subdirectory in the same directory as +** the database and with the same name but with a ".lock" extension added. +** The existance of a lock directory implies an EXCLUSIVE lock. All other +** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE. */ /* ** The file suffix added to the data base filename in order to create the -** lock file. +** lock directory. */ #define DOTLOCK_SUFFIX ".lock" @@ -1932,7 +1984,6 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) { */ static int dotlockLock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; - int fd; char *zLockFile = (char *)pFile->lockingContext; int rc = SQLITE_OK; @@ -1952,9 +2003,9 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { } /* grab an exclusive lock */ - fd = robust_open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600); - if( fd<0 ){ - /* failed to open/create the file, someone else may have stolen the lock */ + rc = osMkdir(zLockFile, 0777); + if( rc<0 ){ + /* failed to open/create the lock directory */ int tErrno = errno; if( EEXIST == tErrno ){ rc = SQLITE_BUSY; @@ -1966,7 +2017,6 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { } return rc; } - robust_close(pFile, fd, __LINE__); /* got it, set the type and return ok */ pFile->eFileLock = eFileLock; @@ -1985,6 +2035,7 @@ static int dotlockLock(sqlite3_file *id, int eFileLock) { static int dotlockUnlock(sqlite3_file *id, int eFileLock) { unixFile *pFile = (unixFile*)id; char *zLockFile = (char *)pFile->lockingContext; + int rc; assert( pFile ); OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock, @@ -2006,9 +2057,11 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) { /* To fully unlock the database, delete the lock file */ assert( eFileLock==NO_LOCK ); - if( osUnlink(zLockFile) ){ - int rc = 0; + rc = osRmdir(zLockFile); + if( rc<0 && errno==ENOTDIR ) rc = osUnlink(zLockFile); + if( rc<0 ){ int tErrno = errno; + rc = 0; if( ENOENT != tErrno ){ rc = SQLITE_IOERR_UNLOCK; } @@ -2944,35 +2997,48 @@ static int nfsUnlock(sqlite3_file *id, int eFileLock){ */ static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){ int got; + int prior = 0; #if (!defined(USE_PREAD) && !defined(USE_PREAD64)) i64 newOffset; #endif TIMER_START; + do{ #if defined(USE_PREAD) - do{ got = osPread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR ); - SimulateIOError( got = -1 ); + got = osPread(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #elif defined(USE_PREAD64) - do{ got = osPread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR); - SimulateIOError( got = -1 ); + got = osPread64(id->h, pBuf, cnt, offset); + SimulateIOError( got = -1 ); #else - newOffset = lseek(id->h, offset, SEEK_SET); - SimulateIOError( newOffset-- ); - if( newOffset!=offset ){ - if( newOffset == -1 ){ - ((unixFile*)id)->lastErrno = errno; - }else{ - ((unixFile*)id)->lastErrno = 0; + newOffset = lseek(id->h, offset, SEEK_SET); + SimulateIOError( newOffset-- ); + if( newOffset!=offset ){ + if( newOffset == -1 ){ + ((unixFile*)id)->lastErrno = errno; + }else{ + ((unixFile*)id)->lastErrno = 0; + } + return -1; } - return -1; - } - do{ got = osRead(id->h, pBuf, cnt); }while( got<0 && errno==EINTR ); + got = osRead(id->h, pBuf, cnt); #endif + if( got==cnt ) break; + if( got<0 ){ + if( errno==EINTR ){ got = 1; continue; } + prior = 0; + ((unixFile*)id)->lastErrno = errno; + break; + }else if( got>0 ){ + cnt -= got; + offset += got; + prior += got; + pBuf = (void*)(got + (char*)pBuf); + } + }while( got>0 ); TIMER_END; - if( got<0 ){ - ((unixFile*)id)->lastErrno = errno; - } - OSTRACE(("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED)); - return got; + OSTRACE(("READ %-3d %5d %7lld %llu\n", + id->h, got+prior, offset-prior, TIMER_ELAPSED)); + return got+prior; } /* @@ -3279,9 +3345,6 @@ static int openDirectory(const char *zFilename, int *pFd){ zDirname[ii] = '\0'; fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0); if( fd>=0 ){ -#ifdef FD_CLOEXEC - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname)); } } @@ -3364,7 +3427,7 @@ static int unixTruncate(sqlite3_file *id, i64 nByte){ ** actual file size after the operation may be larger than the requested ** size). */ - if( pFile->szChunk ){ + if( pFile->szChunk>0 ){ nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk; } @@ -3478,6 +3541,22 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){ } /* +** If *pArg is inititially negative then this is a query. Set *pArg to +** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set. +** +** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags. +*/ +static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){ + if( *pArg<0 ){ + *pArg = (pFile->ctrlFlags & mask)!=0; + }else if( (*pArg)==0 ){ + pFile->ctrlFlags &= ~mask; + }else{ + pFile->ctrlFlags |= mask; + } +} + +/* ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ @@ -3503,14 +3582,15 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return rc; } case SQLITE_FCNTL_PERSIST_WAL: { - int bPersist = *(int*)pArg; - if( bPersist<0 ){ - *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0; - }else if( bPersist==0 ){ - pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL; - }else{ - pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL; - } + unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_POWERSAFE_OVERWRITE: { + unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg); + return SQLITE_OK; + } + case SQLITE_FCNTL_VFSNAME: { + *(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName); return SQLITE_OK; } #ifndef NDEBUG @@ -3530,9 +3610,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return proxyFileControl(id,op,pArg); } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ - case SQLITE_FCNTL_SYNC_OMITTED: { - return SQLITE_OK; /* A no-op */ - } } return SQLITE_NOTFOUND; } @@ -3547,17 +3624,31 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ ** a database and its journal file) that the sector size will be the ** same for both. */ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); +static int unixSectorSize(sqlite3_file *pFile){ + (void)pFile; return SQLITE_DEFAULT_SECTOR_SIZE; } /* -** Return the device characteristics for the file. This is always 0 for unix. +** Return the device characteristics for the file. +** +** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. +** However, that choice is contraversial since technically the underlying +** file system does not always provide powersafe overwrites. (In other +** words, after a power-loss event, parts of the file that were never +** written might end up being altered.) However, non-PSOW behavior is very, +** very rare. And asserting PSOW makes a large reduction in the amount +** of required I/O for journaling, since a lot of padding is eliminated. +** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control +** available to turn it off and URI query parameter available to turn it off. */ -static int unixDeviceCharacteristics(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return 0; +static int unixDeviceCharacteristics(sqlite3_file *id){ + unixFile *p = (unixFile*)id; + if( p->ctrlFlags & UNIXFILE_PSOW ){ + return SQLITE_IOCAP_POWERSAFE_OVERWRITE; + }else{ + return 0; + } } #ifndef SQLITE_OMIT_WAL @@ -3803,8 +3894,7 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ /* Call fstat() to figure out the permissions on the database file. If ** a new *-shm file is created, an attempt will be made to create it - ** with the same permissions. The actual permissions the file is created - ** with are subject to the current umask setting. + ** with the same permissions. */ if( osFstat(pDbFd->h, &sStat) && pInode->bProcessLock==0 ){ rc = SQLITE_IOERR_FSTAT; @@ -3812,16 +3902,16 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } #ifdef SQLITE_SHM_DIRECTORY - nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 30; + nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31; #else - nShmFilename = 5 + (int)strlen(pDbFd->zPath); + nShmFilename = 6 + (int)strlen(pDbFd->zPath); #endif pShmNode = sqlite3_malloc( sizeof(*pShmNode) + nShmFilename ); if( pShmNode==0 ){ rc = SQLITE_NOMEM; goto shm_open_err; } - memset(pShmNode, 0, sizeof(*pShmNode)); + memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename); zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1]; #ifdef SQLITE_SHM_DIRECTORY sqlite3_snprintf(nShmFilename, zShmFilename, @@ -3841,19 +3931,26 @@ static int unixOpenSharedMemory(unixFile *pDbFd){ } if( pInode->bProcessLock==0 ){ - const char *zRO; int openFlags = O_RDWR | O_CREAT; - zRO = sqlite3_uri_parameter(pDbFd->zPath, "readonly_shm"); - if( zRO && sqlite3GetBoolean(zRO) ){ + if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){ openFlags = O_RDONLY; pShmNode->isReadonly = 1; } pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777)); if( pShmNode->h<0 ){ - if( pShmNode->h<0 ){ - rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); - goto shm_open_err; - } + rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename); + goto shm_open_err; + } + + /* If this process is running as root, make sure that the SHM file + ** is owned by the same user that owns the original database. Otherwise, + ** the original owner will not be able to connect. If this process is + ** not root, the following fchown() will fail, but we don't care. The + ** if(){..} and the UNIXFILE_CHOWN flag are purely to silence compiler + ** warnings. + */ + if( osFchown(pShmNode->h, sStat.st_uid, sStat.st_gid)==0 ){ + pDbFd->ctrlFlags |= UNIXFILE_CHOWN; } /* Check to see if another process is holding the dead-man switch. @@ -4506,12 +4603,9 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*); static int fillInUnixFile( sqlite3_vfs *pVfs, /* Pointer to vfs object */ int h, /* Open file descriptor of file being opened */ - int syncDir, /* True to sync directory on first sync */ sqlite3_file *pId, /* Write to the unixFile structure here */ const char *zFilename, /* Name of the file being opened */ - int noLock, /* Omit locking if true */ - int isDelete, /* Delete on close if true */ - int isReadOnly /* True if the file is opened read-only */ + int ctrlFlags /* Zero or more UNIXFILE_* values */ ){ const sqlite3_io_methods *pLockingStyle; unixFile *pNew = (unixFile *)pId; @@ -4519,11 +4613,6 @@ static int fillInUnixFile( assert( pNew->pInode==NULL ); - /* Parameter isDelete is only used on vxworks. Express this explicitly - ** here to prevent compiler warnings about unused parameters. - */ - UNUSED_PARAMETER(isDelete); - /* Usually the path zFilename should not be a relative pathname. The ** exception is when opening the proxy "conch" file in builds that ** include the special Apple locking styles. @@ -4536,32 +4625,30 @@ static int fillInUnixFile( #endif /* No locking occurs in temporary files */ - assert( zFilename!=0 || noLock ); + assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 ); OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; + pNew->pVfs = pVfs; pNew->zPath = zFilename; - if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ - pNew->ctrlFlags = UNIXFILE_EXCL; - }else{ - pNew->ctrlFlags = 0; - } - if( isReadOnly ){ - pNew->ctrlFlags |= UNIXFILE_RDONLY; + pNew->ctrlFlags = (u8)ctrlFlags; + if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), + "psow", SQLITE_POWERSAFE_OVERWRITE) ){ + pNew->ctrlFlags |= UNIXFILE_PSOW; } - if( syncDir ){ - pNew->ctrlFlags |= UNIXFILE_DIRSYNC; + if( memcmp(pVfs->zName,"unix-excl",10)==0 ){ + pNew->ctrlFlags |= UNIXFILE_EXCL; } #if OS_VXWORKS pNew->pId = vxworksFindFileId(zFilename); if( pNew->pId==0 ){ - noLock = 1; + ctrlFlags |= UNIXFILE_NOLOCK; rc = SQLITE_NOMEM; } #endif - if( noLock ){ + if( ctrlFlags & UNIXFILE_NOLOCK ){ pLockingStyle = &nolockIoMethods; }else{ pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew); @@ -4682,7 +4769,7 @@ static int fillInUnixFile( osUnlink(zFilename); isDelete = 0; } - pNew->isDelete = isDelete; + if( isDelete ) pNew->ctrlFlags |= UNIXFILE_DELETE; #endif if( rc!=SQLITE_OK ){ if( h>=0 ) robust_close(pNew, h, __LINE__); @@ -4747,18 +4834,19 @@ static int unixGetTempname(int nBuf, char *zBuf){ /* Check that the output buffer is large enough for the temporary file ** name. If it is not, return SQLITE_ERROR. */ - if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= (size_t)nBuf ){ + if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 18) >= (size_t)nBuf ){ return SQLITE_ERROR; } do{ - sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); + sqlite3_snprintf(nBuf-18, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir); j = (int)strlen(zBuf); sqlite3_randomness(15, &zBuf[j]); for(i=0; i<15; i++, j++){ zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ]; } zBuf[j] = 0; + zBuf[j+1] = 0; }while( osAccess(zBuf,0)==0 ); return SQLITE_OK; } @@ -4837,12 +4925,10 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ ** written to *pMode. If an IO error occurs, an SQLite error code is ** returned and the value of *pMode is not modified. ** -** If the file being opened is a temporary file, it is always created with -** the octal permissions 0600 (read/writable by owner only). If the file -** is a database or master journal file, it is created with the permissions -** mask SQLITE_DEFAULT_FILE_PERMISSIONS. -** -** Finally, if the file being opened is a WAL or regular journal file, then +** In most cases cases, this routine sets *pMode to 0, which will become +** an indication to robust_open() to create the file using +** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask. +** But if the file being opened is a WAL or regular journal file, then ** this function queries the file-system for the permissions on the ** corresponding database file and sets *pMode to this value. Whenever ** possible, WAL and journal files are created using the same permissions @@ -4856,10 +4942,14 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){ static int findCreateFileMode( const char *zPath, /* Path of file (possibly) being created */ int flags, /* Flags passed as 4th argument to xOpen() */ - mode_t *pMode /* OUT: Permissions to open file with */ + mode_t *pMode, /* OUT: Permissions to open file with */ + uid_t *pUid, /* OUT: uid to set on the file */ + gid_t *pGid /* OUT: gid to set on the file */ ){ int rc = SQLITE_OK; /* Return Code */ - *pMode = SQLITE_DEFAULT_FILE_PERMISSIONS; + *pMode = 0; + *pUid = 0; + *pGid = 0; if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ char zDb[MAX_PATHNAME+1]; /* Database file path */ int nDb; /* Number of valid bytes in zDb */ @@ -4879,7 +4969,7 @@ static int findCreateFileMode( */ nDb = sqlite3Strlen30(zPath) - 1; #ifdef SQLITE_ENABLE_8_3_NAMES - while( nDb>0 && !sqlite3Isalnum(zPath[nDb]) ) nDb--; + while( nDb>0 && sqlite3Isalnum(zPath[nDb]) ) nDb--; if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK; #else while( zPath[nDb]!='-' ){ @@ -4893,6 +4983,8 @@ static int findCreateFileMode( if( 0==osStat(zDb, &sStat) ){ *pMode = sStat.st_mode & 0777; + *pUid = sStat.st_uid; + *pGid = sStat.st_gid; }else{ rc = SQLITE_IOERR_FSTAT; } @@ -4937,6 +5029,7 @@ static int unixOpen( int eType = flags&0xFFFFFF00; /* Type of file to open */ int noLock; /* True to omit locking primitives */ int rc = SQLITE_OK; /* Function Return Code */ + int ctrlFlags = 0; /* UNIXFILE_* flags */ int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); @@ -4963,7 +5056,7 @@ static int unixOpen( /* If argument zPath is a NULL pointer, this function is required to open ** a temporary file. Use this buffer to store the file name in. */ - char zTmpname[MAX_PATHNAME+1]; + char zTmpname[MAX_PATHNAME+2]; const char *zName = zPath; /* Check the following statements are true: @@ -5006,14 +5099,24 @@ static int unixOpen( } } p->pUnused = pUnused; + + /* Database filenames are double-zero terminated if they are not + ** URIs with parameters. Hence, they can always be passed into + ** sqlite3_uri_parameter(). */ + assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 ); + }else if( !zName ){ /* If zName is NULL, the upper layer is requesting a temp file. */ assert(isDelete && !syncDir); - rc = unixGetTempname(MAX_PATHNAME+1, zTmpname); + rc = unixGetTempname(MAX_PATHNAME+2, zTmpname); if( rc!=SQLITE_OK ){ return rc; } zName = zTmpname; + + /* Generated temporary filenames are always double-zero terminated + ** for use by sqlite3_uri_parameter(). */ + assert( zName[strlen(zName)+1]==0 ); } /* Determine the value of the flags parameter passed to POSIX function @@ -5028,7 +5131,9 @@ static int unixOpen( if( fd<0 ){ mode_t openMode; /* Permissions to create file with */ - rc = findCreateFileMode(zName, flags, &openMode); + uid_t uid; /* Userid for the file */ + gid_t gid; /* Groupid for the file */ + rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid); if( rc!=SQLITE_OK ){ assert( !p->pUnused ); assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL ); @@ -5049,6 +5154,17 @@ static int unixOpen( rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName); goto open_finished; } + + /* If this process is running as root and if creating a new rollback + ** journal or WAL file, set the ownership of the journal or WAL to be + ** the same as the original database. If we are not running as root, + ** then the fchown() call will fail, but that's ok. The "if(){}" and + ** the setting of the UNIXFILE_CHOWN flag are purely to silence compiler + ** warnings from gcc. + */ + if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){ + if( osFchown(fd, uid, gid)==0 ){ p->ctrlFlags |= UNIXFILE_CHOWN; } + } } assert( fd>=0 ); if( pOutFlags ){ @@ -5073,10 +5189,6 @@ static int unixOpen( } #endif -#ifdef FD_CLOEXEC - osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC); -#endif - noLock = eType!=SQLITE_OPEN_MAIN_DB; @@ -5090,7 +5202,14 @@ static int unixOpen( ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS; } #endif - + + /* Set up appropriate ctrlFlags */ + if( isDelete ) ctrlFlags |= UNIXFILE_DELETE; + if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY; + if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK; + if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC; + if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI; + #if SQLITE_ENABLE_LOCKING_STYLE #if SQLITE_PREFER_PROXY_LOCKING isAutoProxy = 1; @@ -5120,8 +5239,7 @@ static int unixOpen( useProxy = !(fsInfo.f_flags&MNT_LOCAL); } if( useProxy ){ - rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock, - isDelete, isReadonly); + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); if( rc==SQLITE_OK ){ rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:"); if( rc!=SQLITE_OK ){ @@ -5138,8 +5256,8 @@ static int unixOpen( } #endif - rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock, - isDelete, isReadonly); + rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags); + open_finished: if( rc!=SQLITE_OK ){ sqlite3_free(p->pUnused); @@ -5164,7 +5282,7 @@ static int unixDelete( return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath); } #ifndef SQLITE_DISABLE_DIRSYNC - if( dirSync ){ + if( (dirSync & 1)!=0 ){ int fd; rc = osOpenDirectory(zPath, &fd); if( rc==SQLITE_OK ){ @@ -5354,7 +5472,7 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ memset(zBuf, 0, nBuf); #if !defined(SQLITE_TEST) { - int pid, fd; + int pid, fd, got; fd = robust_open("/dev/urandom", O_RDONLY, 0); if( fd<0 ){ time_t t; @@ -5365,7 +5483,7 @@ static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){ assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf ); nBuf = sizeof(t) + sizeof(pid); }else{ - do{ nBuf = osRead(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR ); + do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR ); robust_close(0, fd, __LINE__); } } @@ -5715,7 +5833,7 @@ static int proxyCreateLockPath(const char *lockPath){ if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/') || (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){ buf[i]='\0'; - if( mkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ + if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){ int err=errno; if( err!=EEXIST ) { OSTRACE(("CREATELOCKPATH FAILED creating %s, " @@ -5769,17 +5887,17 @@ static int proxyCreateUnixFile( } } if( fd<0 ){ - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); terrno = errno; if( fd<0 && errno==ENOENT && islockfile ){ if( proxyCreateLockPath(path) == SQLITE_OK ){ - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); } } } if( fd<0 ){ openFlags = O_RDONLY; - fd = robust_open(path, openFlags, SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(path, openFlags, 0); terrno = errno; } if( fd<0 ){ @@ -5810,7 +5928,7 @@ static int proxyCreateUnixFile( pUnused->flags = openFlags; pNew->pUnused = pUnused; - rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0); + rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0); if( rc==SQLITE_OK ){ *ppFile = pNew; return SQLITE_OK; @@ -5903,8 +6021,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){ goto end_breaklock; } /* write it out to the temporary break file */ - fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), - SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0); if( fd<0 ){ sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno); goto end_breaklock; @@ -6181,8 +6298,7 @@ static int proxyTakeConch(unixFile *pFile){ robust_close(pFile, pFile->h, __LINE__); } pFile->h = -1; - fd = robust_open(pCtx->dbPath, pFile->openFlags, - SQLITE_DEFAULT_FILE_PERMISSIONS); + fd = robust_open(pCtx->dbPath, pFile->openFlags, 0); OSTRACE(("TRANSPROXY: OPEN %d\n", fd)); if( fd>=0 ){ pFile->h = fd; @@ -6751,7 +6867,7 @@ int sqlite3_os_init(void){ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==18 ); + assert( ArraySize(aSyscall)==22 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |