summaryrefslogtreecommitdiff
path: root/ext/misc/totype.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/misc/totype.c')
-rw-r--r--ext/misc/totype.c512
1 files changed, 512 insertions, 0 deletions
diff --git a/ext/misc/totype.c b/ext/misc/totype.c
new file mode 100644
index 0000000..5dc99f3
--- /dev/null
+++ b/ext/misc/totype.c
@@ -0,0 +1,512 @@
+/*
+** 2013-10-14
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This SQLite extension implements functions tointeger(X) and toreal(X).
+**
+** If X is an integer, real, or string value that can be
+** losslessly represented as an integer, then tointeger(X)
+** returns the corresponding integer value.
+** If X is an 8-byte BLOB then that blob is interpreted as
+** a signed two-compliment little-endian encoding of an integer
+** and tointeger(X) returns the corresponding integer value.
+** Otherwise tointeger(X) return NULL.
+**
+** If X is an integer, real, or string value that can be
+** convert into a real number, preserving at least 15 digits
+** of precision, then toreal(X) returns the corresponding real value.
+** If X is an 8-byte BLOB then that blob is interpreted as
+** a 64-bit IEEE754 big-endian floating point value
+** and toreal(X) returns the corresponding real value.
+** Otherwise toreal(X) return NULL.
+**
+** Note that tointeger(X) of an 8-byte BLOB assumes a little-endian
+** encoding whereas toreal(X) of an 8-byte BLOB assumes a big-endian
+** encoding.
+*/
+#include "sqlite3ext.h"
+SQLITE_EXTENSION_INIT1
+#include <assert.h>
+#include <string.h>
+
+/*
+** Determine if this is running on a big-endian or little-endian
+** processor
+*/
+#if defined(i386) || defined(__i386__) || defined(_M_IX86)\
+ || defined(__x86_64) || defined(__x86_64__)
+# define TOTYPE_BIGENDIAN 0
+# define TOTYPE_LITTLEENDIAN 1
+#else
+ const int totype_one = 1;
+# define TOTYPE_BIGENDIAN (*(char *)(&totype_one)==0)
+# define TOTYPE_LITTLEENDIAN (*(char *)(&totype_one)==1)
+#endif
+
+/*
+** Constants for the largest and smallest possible 64-bit signed integers.
+** These macros are designed to work correctly on both 32-bit and 64-bit
+** compilers.
+*/
+#ifndef LARGEST_INT64
+# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
+#endif
+
+#ifndef SMALLEST_INT64
+# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
+#endif
+
+/*
+** Return TRUE if character c is a whitespace character
+*/
+static int totypeIsspace(unsigned char c){
+ return c==' ' || c=='\t' || c=='\n' || c=='\v' || c=='\f' || c=='\r';
+}
+
+/*
+** Return TRUE if character c is a digit
+*/
+static int totypeIsdigit(unsigned char c){
+ return c>='0' && c<='9';
+}
+
+/*
+** Compare the 19-character string zNum against the text representation
+** value 2^63: 9223372036854775808. Return negative, zero, or positive
+** if zNum is less than, equal to, or greater than the string.
+** Note that zNum must contain exactly 19 characters.
+**
+** Unlike memcmp() this routine is guaranteed to return the difference
+** in the values of the last digit if the only difference is in the
+** last digit. So, for example,
+**
+** totypeCompare2pow63("9223372036854775800")
+**
+** will return -8.
+*/
+static int totypeCompare2pow63(const char *zNum){
+ int c = 0;
+ int i;
+ /* 012345678901234567 */
+ const char *pow63 = "922337203685477580";
+ for(i=0; c==0 && i<18; i++){
+ c = (zNum[i]-pow63[i])*10;
+ }
+ if( c==0 ){
+ c = zNum[18] - '8';
+ }
+ return c;
+}
+
+/*
+** Convert zNum to a 64-bit signed integer.
+**
+** If the zNum value is representable as a 64-bit twos-complement
+** integer, then write that value into *pNum and return 0.
+**
+** If zNum is exactly 9223372036854665808, return 2. This special
+** case is broken out because while 9223372036854665808 cannot be a
+** signed 64-bit integer, its negative -9223372036854665808 can be.
+**
+** If zNum is too big for a 64-bit integer and is not
+** 9223372036854665808 or if zNum contains any non-numeric text,
+** then return 1.
+**
+** The string is not necessarily zero-terminated.
+*/
+static int totypeAtoi64(const char *zNum, sqlite3_int64 *pNum, int length){
+ sqlite3_uint64 u = 0;
+ int neg = 0; /* assume positive */
+ int i;
+ int c = 0;
+ int nonNum = 0;
+ const char *zStart;
+ const char *zEnd = zNum + length;
+
+ while( zNum<zEnd && totypeIsspace(*zNum) ) zNum++;
+ if( zNum<zEnd ){
+ if( *zNum=='-' ){
+ neg = 1;
+ zNum++;
+ }else if( *zNum=='+' ){
+ zNum++;
+ }
+ }
+ zStart = zNum;
+ while( zNum<zEnd && zNum[0]=='0' ){ zNum++; } /* Skip leading zeros. */
+ for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i++){
+ u = u*10 + c - '0';
+ }
+ if( u>LARGEST_INT64 ){
+ *pNum = SMALLEST_INT64;
+ }else if( neg ){
+ *pNum = -(sqlite3_int64)u;
+ }else{
+ *pNum = (sqlite3_int64)u;
+ }
+ if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19 || nonNum ){
+ /* zNum is empty or contains non-numeric text or is longer
+ ** than 19 digits (thus guaranteeing that it is too large) */
+ return 1;
+ }else if( i<19 ){
+ /* Less than 19 digits, so we know that it fits in 64 bits */
+ assert( u<=LARGEST_INT64 );
+ return 0;
+ }else{
+ /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */
+ c = totypeCompare2pow63(zNum);
+ if( c<0 ){
+ /* zNum is less than 9223372036854775808 so it fits */
+ assert( u<=LARGEST_INT64 );
+ return 0;
+ }else if( c>0 ){
+ /* zNum is greater than 9223372036854775808 so it overflows */
+ return 1;
+ }else{
+ /* zNum is exactly 9223372036854775808. Fits if negative. The
+ ** special case 2 overflow if positive */
+ assert( u-1==LARGEST_INT64 );
+ assert( (*pNum)==SMALLEST_INT64 );
+ return neg ? 0 : 2;
+ }
+ }
+}
+
+/*
+** The string z[] is an text representation of a real number.
+** Convert this string to a double and write it into *pResult.
+**
+** The string is not necessarily zero-terminated.
+**
+** Return TRUE if the result is a valid real number (or integer) and FALSE
+** if the string is empty or contains extraneous text. Valid numbers
+** are in one of these formats:
+**
+** [+-]digits[E[+-]digits]
+** [+-]digits.[digits][E[+-]digits]
+** [+-].digits[E[+-]digits]
+**
+** Leading and trailing whitespace is ignored for the purpose of determining
+** validity.
+**
+** If some prefix of the input string is a valid number, this routine
+** returns FALSE but it still converts the prefix and writes the result
+** into *pResult.
+*/
+static int totypeAtoF(const char *z, double *pResult, int length){
+ const char *zEnd = z + length;
+ /* sign * significand * (10 ^ (esign * exponent)) */
+ int sign = 1; /* sign of significand */
+ sqlite3_int64 s = 0; /* significand */
+ int d = 0; /* adjust exponent for shifting decimal point */
+ int esign = 1; /* sign of exponent */
+ int e = 0; /* exponent */
+ int eValid = 1; /* True exponent is either not used or is well-formed */
+ double result;
+ int nDigits = 0;
+ int nonNum = 0;
+
+ *pResult = 0.0; /* Default return value, in case of an error */
+
+ /* skip leading spaces */
+ while( z<zEnd && totypeIsspace(*z) ) z++;
+ if( z>=zEnd ) return 0;
+
+ /* get sign of significand */
+ if( *z=='-' ){
+ sign = -1;
+ z++;
+ }else if( *z=='+' ){
+ z++;
+ }
+
+ /* skip leading zeroes */
+ while( z<zEnd && z[0]=='0' ) z++, nDigits++;
+
+ /* copy max significant digits to significand */
+ while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
+ s = s*10 + (*z - '0');
+ z++, nDigits++;
+ }
+
+ /* skip non-significant significand digits
+ ** (increase exponent by d to shift decimal left) */
+ while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++, d++;
+ if( z>=zEnd ) goto totype_atof_calc;
+
+ /* if decimal point is present */
+ if( *z=='.' ){
+ z++;
+ /* copy digits from after decimal to significand
+ ** (decrease exponent by d to shift decimal right) */
+ while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
+ s = s*10 + (*z - '0');
+ z++, nDigits++, d--;
+ }
+ /* skip non-significant digits */
+ while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++;
+ }
+ if( z>=zEnd ) goto totype_atof_calc;
+
+ /* if exponent is present */
+ if( *z=='e' || *z=='E' ){
+ z++;
+ eValid = 0;
+ if( z>=zEnd ) goto totype_atof_calc;
+ /* get sign of exponent */
+ if( *z=='-' ){
+ esign = -1;
+ z++;
+ }else if( *z=='+' ){
+ z++;
+ }
+ /* copy digits to exponent */
+ while( z<zEnd && totypeIsdigit(*z) ){
+ e = e<10000 ? (e*10 + (*z - '0')) : 10000;
+ z++;
+ eValid = 1;
+ }
+ }
+
+ /* skip trailing spaces */
+ if( nDigits && eValid ){
+ while( z<zEnd && totypeIsspace(*z) ) z++;
+ }
+
+totype_atof_calc:
+ /* adjust exponent by d, and update sign */
+ e = (e*esign) + d;
+ if( e<0 ) {
+ esign = -1;
+ e *= -1;
+ } else {
+ esign = 1;
+ }
+
+ /* if 0 significand */
+ if( !s ) {
+ /* In the IEEE 754 standard, zero is signed.
+ ** Add the sign if we've seen at least one digit */
+ result = (sign<0 && nDigits) ? -(double)0 : (double)0;
+ } else {
+ /* attempt to reduce exponent */
+ if( esign>0 ){
+ while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10;
+ }else{
+ while( !(s%10) && e>0 ) e--,s/=10;
+ }
+
+ /* adjust the sign of significand */
+ s = sign<0 ? -s : s;
+
+ /* if exponent, scale significand as appropriate
+ ** and store in result. */
+ if( e ){
+ double scale = 1.0;
+ /* attempt to handle extremely small/large numbers better */
+ if( e>307 && e<342 ){
+ while( e%308 ) { scale *= 1.0e+1; e -= 1; }
+ if( esign<0 ){
+ result = s / scale;
+ result /= 1.0e+308;
+ }else{
+ result = s * scale;
+ result *= 1.0e+308;
+ }
+ }else if( e>=342 ){
+ if( esign<0 ){
+ result = 0.0*s;
+ }else{
+ result = 1e308*1e308*s; /* Infinity */
+ }
+ }else{
+ /* 1.0e+22 is the largest power of 10 than can be
+ ** represented exactly. */
+ while( e%22 ) { scale *= 1.0e+1; e -= 1; }
+ while( e>0 ) { scale *= 1.0e+22; e -= 22; }
+ if( esign<0 ){
+ result = s / scale;
+ }else{
+ result = s * scale;
+ }
+ }
+ } else {
+ result = (double)s;
+ }
+ }
+
+ /* store the result */
+ *pResult = result;
+
+ /* return true if number and no extra non-whitespace chracters after */
+ return z>=zEnd && nDigits>0 && eValid && nonNum==0;
+}
+
+/*
+** tointeger(X): If X is any value (integer, double, blob, or string) that
+** can be losslessly converted into an integer, then make the conversion and
+** return the result. Otherwise, return NULL.
+*/
+static void tointegerFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ assert( argc==1 );
+ (void)argc;
+ switch( sqlite3_value_type(argv[0]) ){
+ case SQLITE_FLOAT: {
+ double rVal = sqlite3_value_double(argv[0]);
+ sqlite3_int64 iVal = (sqlite3_int64)rVal;
+ if( rVal==(double)iVal ){
+ sqlite3_result_int64(context, iVal);
+ }
+ break;
+ }
+ case SQLITE_INTEGER: {
+ sqlite3_result_int64(context, sqlite3_value_int64(argv[0]));
+ break;
+ }
+ case SQLITE_BLOB: {
+ const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
+ if( zBlob ){
+ int nBlob = sqlite3_value_bytes(argv[0]);
+ if( nBlob==sizeof(sqlite3_int64) ){
+ sqlite3_int64 iVal;
+ if( TOTYPE_BIGENDIAN ){
+ int i;
+ unsigned char zBlobRev[sizeof(sqlite3_int64)];
+ for(i=0; i<sizeof(sqlite3_int64); i++){
+ zBlobRev[i] = zBlob[sizeof(sqlite3_int64)-1-i];
+ }
+ memcpy(&iVal, zBlobRev, sizeof(sqlite3_int64));
+ }else{
+ memcpy(&iVal, zBlob, sizeof(sqlite3_int64));
+ }
+ sqlite3_result_int64(context, iVal);
+ }
+ }
+ break;
+ }
+ case SQLITE_TEXT: {
+ const unsigned char *zStr = sqlite3_value_text(argv[0]);
+ if( zStr ){
+ int nStr = sqlite3_value_bytes(argv[0]);
+ if( nStr && !totypeIsspace(zStr[0]) ){
+ sqlite3_int64 iVal;
+ if( !totypeAtoi64((const char*)zStr, &iVal, nStr) ){
+ sqlite3_result_int64(context, iVal);
+ }
+ }
+ }
+ break;
+ }
+ default: {
+ assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
+ break;
+ }
+ }
+}
+
+/*
+** toreal(X): If X is any value (integer, double, blob, or string) that can
+** be losslessly converted into a real number, then do so and return that
+** real number. Otherwise return NULL.
+*/
+#if defined(_MSC_VER)
+#pragma warning(disable: 4748)
+#pragma optimize("", off)
+#endif
+static void torealFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ assert( argc==1 );
+ (void)argc;
+ switch( sqlite3_value_type(argv[0]) ){
+ case SQLITE_FLOAT: {
+ sqlite3_result_double(context, sqlite3_value_double(argv[0]));
+ break;
+ }
+ case SQLITE_INTEGER: {
+ sqlite3_int64 iVal = sqlite3_value_int64(argv[0]);
+ double rVal = (double)iVal;
+ if( iVal==(sqlite3_int64)rVal ){
+ sqlite3_result_double(context, rVal);
+ }
+ break;
+ }
+ case SQLITE_BLOB: {
+ const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
+ if( zBlob ){
+ int nBlob = sqlite3_value_bytes(argv[0]);
+ if( nBlob==sizeof(double) ){
+ double rVal;
+ if( TOTYPE_LITTLEENDIAN ){
+ int i;
+ unsigned char zBlobRev[sizeof(double)];
+ for(i=0; i<sizeof(double); i++){
+ zBlobRev[i] = zBlob[sizeof(double)-1-i];
+ }
+ memcpy(&rVal, zBlobRev, sizeof(double));
+ }else{
+ memcpy(&rVal, zBlob, sizeof(double));
+ }
+ sqlite3_result_double(context, rVal);
+ }
+ }
+ break;
+ }
+ case SQLITE_TEXT: {
+ const unsigned char *zStr = sqlite3_value_text(argv[0]);
+ if( zStr ){
+ int nStr = sqlite3_value_bytes(argv[0]);
+ if( nStr && !totypeIsspace(zStr[0]) && !totypeIsspace(zStr[nStr-1]) ){
+ double rVal;
+ if( totypeAtoF((const char*)zStr, &rVal, nStr) ){
+ sqlite3_result_double(context, rVal);
+ return;
+ }
+ }
+ }
+ break;
+ }
+ default: {
+ assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
+ break;
+ }
+ }
+}
+#if defined(_MSC_VER)
+#pragma optimize("", on)
+#pragma warning(default: 4748)
+#endif
+
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+int sqlite3_totype_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+ int rc = SQLITE_OK;
+ SQLITE_EXTENSION_INIT2(pApi);
+ (void)pzErrMsg; /* Unused parameter */
+ rc = sqlite3_create_function(db, "tointeger", 1, SQLITE_UTF8, 0,
+ tointegerFunc, 0, 0);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "toreal", 1, SQLITE_UTF8, 0,
+ torealFunc, 0, 0);
+ }
+ return rc;
+}