summaryrefslogtreecommitdiff
path: root/ext/fts3/fts3_test.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts3/fts3_test.c')
-rw-r--r--ext/fts3/fts3_test.c213
1 files changed, 212 insertions, 1 deletions
diff --git a/ext/fts3/fts3_test.c b/ext/fts3/fts3_test.c
index 72735f3..4da0b8f 100644
--- a/ext/fts3/fts3_test.c
+++ b/ext/fts3/fts3_test.c
@@ -13,13 +13,17 @@
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
+**
+** As of March 2012, it also contains a version 1 tokenizer used for testing
+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/
#include <tcl.h>
#include <string.h>
#include <assert.h>
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST)
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
#include "fts3Int.h"
@@ -158,6 +162,8 @@ static int fts3_near_match_cmd(
Tcl_Obj **apExprToken;
int nExprToken;
+ UNUSED_PARAMETER(clientData);
+
/* Must have 3 or more arguments. */
if( objc<3 || (objc%2)==0 ){
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
@@ -311,14 +317,219 @@ static int fts3_configure_incr_load_cmd(
Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet);
#endif
+ UNUSED_PARAMETER(clientData);
+ return TCL_OK;
+}
+
+#ifdef SQLITE_ENABLE_FTS3
+/**************************************************************************
+** Beginning of test tokenizer code.
+**
+** For language 0, this tokenizer is similar to the default 'simple'
+** tokenizer. For other languages L, the following:
+**
+** * Odd numbered languages are case-sensitive. Even numbered
+** languages are not.
+**
+** * Language ids 100 or greater are considered an error.
+**
+** The implementation assumes that the input contains only ASCII characters
+** (i.e. those that may be encoded in UTF-8 using a single byte).
+*/
+typedef struct test_tokenizer {
+ sqlite3_tokenizer base;
+} test_tokenizer;
+
+typedef struct test_tokenizer_cursor {
+ sqlite3_tokenizer_cursor base;
+ const char *aInput; /* Input being tokenized */
+ int nInput; /* Size of the input in bytes */
+ int iInput; /* Current offset in aInput */
+ int iToken; /* Index of next token to be returned */
+ char *aBuffer; /* Buffer containing current token */
+ int nBuffer; /* Number of bytes allocated at pToken */
+ int iLangid; /* Configured language id */
+} test_tokenizer_cursor;
+
+static int testTokenizerCreate(
+ int argc, const char * const *argv,
+ sqlite3_tokenizer **ppTokenizer
+){
+ test_tokenizer *pNew;
+ UNUSED_PARAMETER(argc);
+ UNUSED_PARAMETER(argv);
+
+ pNew = sqlite3_malloc(sizeof(test_tokenizer));
+ if( !pNew ) return SQLITE_NOMEM;
+ memset(pNew, 0, sizeof(test_tokenizer));
+
+ *ppTokenizer = (sqlite3_tokenizer *)pNew;
+ return SQLITE_OK;
+}
+
+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
+ test_tokenizer *p = (test_tokenizer *)pTokenizer;
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+static int testTokenizerOpen(
+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */
+ const char *pInput, int nBytes, /* String to be tokenized */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
+){
+ int rc = SQLITE_OK; /* Return code */
+ test_tokenizer_cursor *pCsr; /* New cursor object */
+
+ UNUSED_PARAMETER(pTokenizer);
+
+ pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
+ if( pCsr==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pCsr, 0, sizeof(test_tokenizer_cursor));
+ pCsr->aInput = pInput;
+ if( nBytes<0 ){
+ pCsr->nInput = (int)strlen(pInput);
+ }else{
+ pCsr->nInput = nBytes;
+ }
+ }
+
+ *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+ return rc;
+}
+
+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ sqlite3_free(pCsr->aBuffer);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+static int testIsTokenChar(char c){
+ return (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+static int testTolower(char c){
+ char ret = c;
+ if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
+ return ret;
+}
+
+static int testTokenizerNext(
+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
+ const char **ppToken, /* OUT: *ppToken is the token text */
+ int *pnBytes, /* OUT: Number of bytes in token */
+ int *piStartOffset, /* OUT: Starting offset of token */
+ int *piEndOffset, /* OUT: Ending offset of token */
+ int *piPosition /* OUT: Position integer of token */
+){
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ int rc = SQLITE_OK;
+ const char *p;
+ const char *pEnd;
+
+ p = &pCsr->aInput[pCsr->iInput];
+ pEnd = &pCsr->aInput[pCsr->nInput];
+
+ /* Skip past any white-space */
+ assert( p<=pEnd );
+ while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
+
+ if( p==pEnd ){
+ rc = SQLITE_DONE;
+ }else{
+ /* Advance to the end of the token */
+ const char *pToken = p;
+ int nToken;
+ while( p<pEnd && testIsTokenChar(*p) ) p++;
+ nToken = (int)(p-pToken);
+
+ /* Copy the token into the buffer */
+ if( nToken>pCsr->nBuffer ){
+ sqlite3_free(pCsr->aBuffer);
+ pCsr->aBuffer = sqlite3_malloc(nToken);
+ }
+ if( pCsr->aBuffer==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int i;
+
+ if( pCsr->iLangid & 0x00000001 ){
+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
+ }else{
+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
+ }
+ pCsr->iToken++;
+ pCsr->iInput = (int)(p - pCsr->aInput);
+
+ *ppToken = pCsr->aBuffer;
+ *pnBytes = nToken;
+ *piStartOffset = (int)(pToken - pCsr->aInput);
+ *piEndOffset = (int)(p - pCsr->aInput);
+ *piPosition = pCsr->iToken;
+ }
+ }
+
+ return rc;
+}
+
+static int testTokenizerLanguage(
+ sqlite3_tokenizer_cursor *pCursor,
+ int iLangid
+){
+ int rc = SQLITE_OK;
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ pCsr->iLangid = iLangid;
+ if( pCsr->iLangid>=100 ){
+ rc = SQLITE_ERROR;
+ }
+ return rc;
+}
+#endif
+
+static int fts3_test_tokenizer_cmd(
+ ClientData clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+ static const sqlite3_tokenizer_module testTokenizerModule = {
+ 1,
+ testTokenizerCreate,
+ testTokenizerDestroy,
+ testTokenizerOpen,
+ testTokenizerClose,
+ testTokenizerNext,
+ testTokenizerLanguage
+ };
+ const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
+ if( objc!=1 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "");
+ return TCL_ERROR;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
+ (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
+ ));
+#endif
+ UNUSED_PARAMETER(clientData);
return TCL_OK;
}
+/*
+** End of tokenizer code.
+**************************************************************************/
+
int Sqlitetestfts3_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
Tcl_CreateObjCommand(interp,
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
);
+ Tcl_CreateObjCommand(
+ interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
+ );
return TCL_OK;
}
+#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
#endif /* ifdef SQLITE_TEST */