summaryrefslogtreecommitdiff
path: root/couchjs/c_src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'couchjs/c_src/utf8.c')
-rw-r--r--couchjs/c_src/utf8.c291
1 files changed, 291 insertions, 0 deletions
diff --git a/couchjs/c_src/utf8.c b/couchjs/c_src/utf8.c
new file mode 100644
index 00000000..ace6badb
--- /dev/null
+++ b/couchjs/c_src/utf8.c
@@ -0,0 +1,291 @@
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy of
+// the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations under
+// the License.
+
+#include "config.h"
+#include "sm.h"
+
+static int
+enc_char(uint8 *utf8Buffer, uint32 ucs4Char)
+{
+ int utf8Length = 1;
+
+ if (ucs4Char < 0x80)
+ {
+ *utf8Buffer = (uint8)ucs4Char;
+ }
+ else
+ {
+ int i;
+ uint32 a = ucs4Char >> 11;
+ utf8Length = 2;
+ while(a)
+ {
+ a >>= 5;
+ utf8Length++;
+ }
+ i = utf8Length;
+ while(--i)
+ {
+ utf8Buffer[i] = (uint8)((ucs4Char & 0x3F) | 0x80);
+ ucs4Char >>= 6;
+ }
+ *utf8Buffer = (uint8)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
+ }
+
+ return utf8Length;
+}
+
+static JSBool
+enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
+{
+ size_t i;
+ size_t utf8Len;
+ size_t dstlen = *dstlenp;
+ size_t origDstlen = dstlen;
+ jschar c;
+ jschar c2;
+ uint32 v;
+ uint8 utf8buf[6];
+
+ if(!dst)
+ {
+ dstlen = origDstlen = (size_t) -1;
+ }
+
+ while(srclen)
+ {
+ c = *src++;
+ srclen--;
+
+ if((c >= 0xDC00) && (c <= 0xDFFF)) goto bad_surrogate;
+
+ if(c < 0xD800 || c > 0xDBFF)
+ {
+ v = c;
+ }
+ else
+ {
+ if(srclen < 1) goto buffer_too_small;
+ c2 = *src++;
+ srclen--;
+ if ((c2 < 0xDC00) || (c2 > 0xDFFF))
+ {
+ c = c2;
+ goto bad_surrogate;
+ }
+ v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
+ }
+ if(v < 0x0080)
+ {
+ /* no encoding necessary - performance hack */
+ if(!dstlen) goto buffer_too_small;
+ if(dst) *dst++ = (char) v;
+ utf8Len = 1;
+ }
+ else
+ {
+ utf8Len = enc_char(utf8buf, v);
+ if(utf8Len > dstlen) goto buffer_too_small;
+ if(dst)
+ {
+ for (i = 0; i < utf8Len; i++)
+ {
+ *dst++ = (char) utf8buf[i];
+ }
+ }
+ }
+ dstlen -= utf8Len;
+ }
+
+ *dstlenp = (origDstlen - dstlen);
+ return JS_TRUE;
+
+bad_surrogate:
+ *dstlenp = (origDstlen - dstlen);
+ return JS_FALSE;
+
+buffer_too_small:
+ *dstlenp = (origDstlen - dstlen);
+ return JS_FALSE;
+}
+
+char*
+enc_string(JSContext* cx, jsval arg, size_t* buflen)
+{
+ JSString* str = NULL;
+ const jschar* src = NULL;
+ char* bytes = NULL;
+ size_t srclen = 0;
+ size_t byteslen = 0;
+
+ str = JS_ValueToString(cx, arg);
+ if(!str) goto error;
+
+#ifdef SM185
+ src = JS_GetStringCharsAndLength(cx, str, &srclen);
+#else
+ src = JS_GetStringChars(str);
+ srclen = JS_GetStringLength(str);
+#endif
+
+ if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error;
+
+ bytes = (char*) JS_malloc(cx, (byteslen) + 1);
+ bytes[byteslen] = 0;
+
+ if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error;
+
+ if(buflen) *buflen = byteslen;
+ goto success;
+
+error:
+ if(bytes != NULL) JS_free(cx, bytes);
+ bytes = NULL;
+
+success:
+ return bytes;
+}
+
+static uint32
+dec_char(const uint8 *utf8Buffer, int utf8Length)
+{
+ uint32 ucs4Char;
+ uint32 minucs4Char;
+
+ /* from Unicode 3.1, non-shortest form is illegal */
+ static const uint32 minucs4Table[] = {
+ 0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000
+ };
+
+ if (utf8Length == 1)
+ {
+ ucs4Char = *utf8Buffer;
+ }
+ else
+ {
+ ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1);
+ minucs4Char = minucs4Table[utf8Length-2];
+ while(--utf8Length)
+ {
+ ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F);
+ }
+ if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF)
+ {
+ ucs4Char = 0xFFFD;
+ }
+ }
+
+ return ucs4Char;
+}
+
+static JSBool
+dec_charbuf(const char *src, size_t srclen, jschar *dst, size_t *dstlenp)
+{
+ uint32 v;
+ size_t offset = 0;
+ size_t j;
+ size_t n;
+ size_t dstlen = *dstlenp;
+ size_t origDstlen = dstlen;
+
+ if(!dst) dstlen = origDstlen = (size_t) -1;
+
+ while(srclen)
+ {
+ v = (uint8) *src;
+ n = 1;
+
+ if(v & 0x80)
+ {
+ while(v & (0x80 >> n))
+ {
+ n++;
+ }
+
+ if(n > srclen) goto buffer_too_small;
+ if(n == 1 || n > 6) goto bad_character;
+
+ for(j = 1; j < n; j++)
+ {
+ if((src[j] & 0xC0) != 0x80) goto bad_character;
+ }
+
+ v = dec_char((const uint8 *) src, n);
+ if(v >= 0x10000)
+ {
+ v -= 0x10000;
+
+ if(v > 0xFFFFF || dstlen < 2)
+ {
+ *dstlenp = (origDstlen - dstlen);
+ return JS_FALSE;
+ }
+
+ if(dstlen < 2) goto buffer_too_small;
+
+ if(dst)
+ {
+ *dst++ = (jschar)((v >> 10) + 0xD800);
+ v = (jschar)((v & 0x3FF) + 0xDC00);
+ }
+ dstlen--;
+ }
+ }
+
+ if(!dstlen) goto buffer_too_small;
+ if(dst) *dst++ = (jschar) v;
+
+ dstlen--;
+ offset += n;
+ src += n;
+ srclen -= n;
+ }
+
+ *dstlenp = (origDstlen - dstlen);
+ return JS_TRUE;
+
+bad_character:
+ *dstlenp = (origDstlen - dstlen);
+ return JS_FALSE;
+
+buffer_too_small:
+ *dstlenp = (origDstlen - dstlen);
+ return JS_FALSE;
+}
+
+JSString*
+dec_string(JSContext* cx, const char* bytes, size_t byteslen)
+{
+ JSString* str = NULL;
+ jschar* chars = NULL;
+ size_t charslen;
+
+ if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error;
+
+ chars = (jschar*) JS_malloc(cx, (charslen + 1) * sizeof(jschar));
+ if(!chars) return NULL;
+ chars[charslen] = 0;
+
+ if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error;
+
+ str = JS_NewUCString(cx, chars, charslen - 1);
+ if(!str) goto error;
+
+ goto success;
+
+error:
+ if(chars != NULL) JS_free(cx, chars);
+ str = NULL;
+
+success:
+ return str;
+}