summaryrefslogtreecommitdiff
path: root/src/fulltext/lucene
diff options
context:
space:
mode:
Diffstat (limited to 'src/fulltext/lucene')
-rw-r--r--src/fulltext/lucene/CouchConfig.java62
-rw-r--r--src/fulltext/lucene/CouchDbDirFilter.java30
-rw-r--r--src/fulltext/lucene/LuceneIndexer.java355
-rw-r--r--src/fulltext/lucene/LuceneSearcher.java90
-rw-r--r--src/fulltext/lucene/readme.txt41
5 files changed, 0 insertions, 578 deletions
diff --git a/src/fulltext/lucene/CouchConfig.java b/src/fulltext/lucene/CouchConfig.java
deleted file mode 100644
index 5f4d84ce..00000000
--- a/src/fulltext/lucene/CouchConfig.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-import java.util.*;
-
-
-class CouchConfig
-{
-/* private CouchDocument[] documents;
-*/
- private Hashtable documents;
- private long updateSequence;
-
- public CouchConfig()
- {
- documents = new Hashtable();
- updateSequence = 0;
- }
-
- public void setUpdateSequence(long newUpdateSequence)
- {
- updateSequence = newUpdateSequence;
- }
-
- public long getUpdateSequence()
- {
- return updateSequence;
- }
-
- public void addDocument(com.fourspaces.couchdb.Document document)
- {
- String field;
-// System.out.println(document);
- field = document.getString("__couchdb_database");
-// System.out.println(field);
- if(field != null) {
- documents.put(field, document);
- }
- }
-
- public Hashtable getDocuments()
- {
- return documents;
- }
-
- public boolean hasDb(String db)
- {
- return documents.containsKey(db);
- }
-}
diff --git a/src/fulltext/lucene/CouchDbDirFilter.java b/src/fulltext/lucene/CouchDbDirFilter.java
deleted file mode 100644
index 6b002ce5..00000000
--- a/src/fulltext/lucene/CouchDbDirFilter.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-/*
-
-LuceneIndexer creates a lucene index by intrementally fetching changes from a a
-Apache CouchDB server. It is managed by the Apache CouchDB daemon.
-
-*/
-import java.io.*;
-
-class CouchDbDirFilter implements FilenameFilter
-{
- public boolean accept(File dir, String name)
- {
- return new File(dir, name).isFile();
- }
-}
diff --git a/src/fulltext/lucene/LuceneIndexer.java b/src/fulltext/lucene/LuceneIndexer.java
deleted file mode 100644
index 07040610..00000000
--- a/src/fulltext/lucene/LuceneIndexer.java
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-/*
-
-LuceneIndexer creates a lucene index by incrementally fetching changes from a a
-Apache CouchDB server. It is managed by the Apache CouchDB daemon.
-
-I know this is Java and there should be a lot of OO going on, but it
-isn't. Sorry about that.
-
-*/
-
-//basics
-import java.io.*;
-import java.net.*;
-import java.util.*;
-import java.nio.channels.FileChannel;
-import java.nio.ByteBuffer;
-import java.lang.reflect.*;
-
-
-//couchdb4j
-//import com.fourspaces.couchdb.*;
-
-//xml
-import org.xml.sax.*;
-import org.xml.sax.helpers.*;
-import javax.xml.parsers.*;
-
-//lucene
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.TermQuery;
-
-public class LuceneIndexer
-{
- private static CouchConfig configuration;
- private static com.fourspaces.couchdb.Session s;
-
- public static void main(String[] args) throws Exception
- {
-/* BufferedWriter out = new BufferedWriter(new FileWriter("LuceneIndexer.log"));
- out.write("indexer started");out.flush();
-*/
- String db;
-/* out.write("indexer about to read config");out.flush();*/
- connect();
- readConfig();
-
-/* out.write("indexer read config: " + configuration.getDocuments());out.flush();*/
-
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
- try {
- while((db = in.readLine()) != null) {
-/* out.write("indexer got a poke");out.flush();*/
-
- if(db.equals("couchdbfulltext")) {
-/* System.out.println("refresh config");
-
-*/ readConfig();
-/* out.write("indexer refreshed config");out.flush();*/
-
- }
-
-/* out.write("indexer has table: " + db + "?");*/
-
- if(!configuration.hasDb(db)) {
-/* out.write("... no wait for input");out.flush();*/
-
- continue;
- }
-
-/* out.write("yeppa");out.flush();*/
-
-
- createIndexDir(db);
- indexChanges(db);
-/* System.out.println(db + " to revision: " + revision);*/
- }
- } catch (IOException e) {
-/* out.write("indexer caught IO exception: " + e.getMessage());out.flush();*/
-
- }
-/* System.out.println("Lucene Indexer stopped");*/
-/* out.write("indexer stopped");out.flush();*/
-
-/* out.close();*/
-
- }
-
- public static void connect() throws Exception
- {
- s = null;
- com.fourspaces.couchdb.Session s = new com.fourspaces.couchdb.Session("locahost", 5984);
- }
-
- public static void readConfig() throws Exception
- {
- //get all docs in /$ftconfig
- //return array of config docs
- configuration = null;
- configuration = new CouchConfig();
- com.fourspaces.couchdb.Database db = s.getDatabase("couchdbfulltext");
- com.fourspaces.couchdb.ViewResults changedDocuments = db.getAllDocuments(0);
-
- for (com.fourspaces.couchdb.Document d: changedDocuments.getResults()) {
- configuration.addDocument(d);
- }
-
-/* for(int i = 0; i < changedDocuments.length; i++) {
- CouchDocument document = changedDocuments[i];
- document = loadDocumentData(document, "couchdbfulltext");
- configuration.addDocument(document);
- }
-*/ }
-
- public static void indexChanges(String db) throws Exception
- {
-// System.out.println("Updating index for '" + db + "' from revision: " + revision);
- int sequence = -1;
- try {
- com.fourspaces.couchdb.Database _db = s.getDatabase(db);
- sequence = _db.getUpdateSeq();
- com.fourspaces.couchdb.ViewResults changedDocuments = _db.getAllDocuments(sequence);
-
- if(changedDocuments.size() == 0) {
-// System.out.println("Index is up-to date at sequence_id: " + revision);
- return;
- }
-
- boolean delete = false;
-
- for (com.fourspaces.couchdb.Document d: changedDocuments.getResults()) {
- delete = d.getBoolean("delete");
- documentAddToIndex(db, d, delete);
- }
-/* for(int idx = 0; idx < changedDocuments.length; idx++) {
- com.fourspaces.couchdb.Document document = changedDocuments[idx];
- sequence = document.getUpdateSequence();
- delete = document.getDelete();
-// System.out.println("Doing: " + document + " with squence: " + sequence + " delete: "+document.getDelete() + " hash code:" + document.hashCode());
-
- document = loadDocumentData(document, db);
- // System.out.println(changedDocuments[idx]);
- // remove from lucene if exists, add to lucene.
-
- documentAddToIndex(db, document, delete);
- }
-*/ // CouchDocument document = getDocumentByRevision(db, revision);
- setRevisionForDb(db, sequence);
- } catch(Exception e) {
-// System.out.println("Warning: " + db + " says: " + e.getMessage());
- }
- }
-
- public static void documentAddToIndex(String db, com.fourspaces.couchdb.Document document, boolean delete) throws IOException
- {
- String index = "Lucene/Index/" + db;
- boolean create = true;
-
-/* System.out.println("DEBUG: delete: " + delete);*/
-/* System.out.println("DEBUG: create index? " + create);*/
-
- if(IndexReader.indexExists(index)) {
- create = false;
- Term term = new Term("__couchdb_document_id", document.getId());
-/* System.out.println("DEBUG: Deleting: " + document + " with term:" + term);*/
- IndexReader reader = IndexReader.open(index);
- reader.deleteDocuments(term);
-/* System.out.println("DEBUG: reader has deletions: " + reader.hasDeletions());*/
-
- reader.close();
- }
-
- if(!delete) {
- Analyzer analyzer = new SimpleAnalyzer();
-
- IndexWriter writer = new IndexWriter(index, analyzer, create);
- writer.setUseCompoundFile(true);
-
-/* Collection fields = document.keys();*/
- Document luceneDocument = new Document();
-
-/* Set tmpKeys = fields.keySet();
- Object keys[] = tmpKeys.toArray();
-*/ String keywords = "";
-
- for (Iterator it = document.keys(); it.hasNext(); ) {
- Object key = it.next();
- String value = document.getString((String)key);
-
- if(key.equals("__couchdb_document_id") || key.equals("__couchdb_document_revision")) {
- luceneDocument.add(new Field((String)key, value, Field.Store.YES, Field.Index.UN_TOKENIZED));
- } else {
- luceneDocument.add(new Field((String)key, value, Field.Store.YES, Field.Index.TOKENIZED));
- keywords = keywords + " " + value;
- }
- }
- if(keywords.length() > 0) {
- luceneDocument.add(new Field("__couchdb_keywords", keywords, Field.Store.YES, Field.Index.TOKENIZED));
- }
-
-
-/* for(int idx = 0; idx < keys.length; idx++) {
- // System.out.println("DEBUG: Add Field: "+ keys[idx] + " with value: " + fields.get(keys[idx]));
- Hashtable field = (Hashtable)fields.get(keys[idx]);
- if(field == null) {return;}
- for(int fieldIdx = 0; fieldIdx < field.size(); fieldIdx++) {
- String value = (String)field.get(fieldIdx);
- if(value == null) {
- value = "";
- }
- // System.out.println("DEBUG: fieldIdx:" + fieldIdx + " and value: "+ value);
- String key = (String)keys[idx];
- if(key.equals("__couchdb_document_id") || key.equals("__couchdb_document_revision")) {
- luceneDocument.add(new Field(key, value, Field.Store.YES, Field.Index.UN_TOKENIZED));
- } else {
- luceneDocument.add(new Field(key, value, Field.Store.YES, Field.Index.TOKENIZED));
- keywords = keywords + " " + value;
- }
- }
-*/// }
- writer.addDocument(luceneDocument);
- writer.optimize();
- writer.close();
- }
- }
-
-
- private static void setRevisionForDb(String db, long revision) throws Exception
- {
- File dbFile = new File("Lucene/State/" + db);
-
- RandomAccessFile stateFile = new RandomAccessFile("Lucene/State/" + db, "rwd");
- stateFile.writeBytes(String.valueOf(revision));
- return;
- }
-
- private static String[] getDBs()
- {
- File dbRoot = new File("db_root");
- if(!dbRoot.isDirectory()) {
- return new String[0];
- }
-
- String[] dbs = dbRoot.list(new CouchDbDirFilter());
-
- return dbs;
- }
-
- private static long getRevisionForDb(String db) throws Exception
- {
-
- File dbFile = new File("Lucene/State/" + db);
- if(!dbFile.exists()) {
- return 0;
- }
-
-
- RandomAccessFile stateFile = new RandomAccessFile("Lucene/State/" + db, "r");
- String revision = stateFile.readLine();
-// System.out.println("rev: " + revision);
- return (long)Integer.parseInt(revision);
- }
-
- private static void createIndexDir(String db)
- {
- File indexDir = new File("Lucene/Index/" + db);
- if(!indexDir.exists()) {
- indexDir.mkdirs();
- System.out.println("Created Index Directory");
- }
-
- File stateDir = new File("Lucene/State");
- if(!stateDir.exists()) {
- stateDir.mkdirs();
- System.out.println("Created State Directory");
- }
- }
-
- private static XMLReader getParser(SAXCouchDocumentBuilder documentBuilder) throws Exception
- {
- SAXParserFactory factory = SAXParserFactory.newInstance();
- SAXParser saxParser = factory.newSAXParser();
- XMLReader parser = saxParser.getXMLReader();
- parser.setContentHandler(documentBuilder);
- return parser;
- }
-
- private static BufferedInputStream getUrlStream(String address) throws Exception
- {
- URL url = new URL(address);
- InputStream inStream = url.openStream();
- return new BufferedInputStream(inStream);
- }
-
- public static com.fourspaces.couchdb.ViewResults getChangedDocumentsSinceRevision(String db, int revision) throws Exception
- {
- //BufferedInputStream inBuffer = getUrlStream("http://localhost:5984/" + db + "/_all_docs_by_update_seq?startkey=" + revision);
-
- com.fourspaces.couchdb.ViewResults newDocs = s.getDatabase(db).getAllDocuments(revision);
-
- return newDocs;
- //return CouchDocument[]
-
-/* CouchDocument[] returnValue = {};
-*/ //setup xml parser
-/* SAXCouchDocumentBuilder documentBuilder = new SAXCouchDocumentBuilder();
- XMLReader parser = getParser(documentBuilder);
- // Repeat until end of file
- parser.parse(new InputSource(inBuffer));
-
-
- return documentBuilder.getDocuments();
-*/ }
-
-
- public static CouchDocument loadDocumentData(CouchDocument document, String db) throws Exception
- {
- BufferedInputStream inBuffer = getUrlStream("http://localhost:5984/" + db + "/" + document.getDocId() + "?rev=" + document.getRevision());
-
- //setup xml parser
- SAXCouchDocumentBuilder documentBuilder = new SAXCouchDocumentBuilder();
- XMLReader parser = getParser(documentBuilder);
-
- // Repeat until end of file
- parser.parse(new InputSource(inBuffer));
-
- return documentBuilder.getDocument();
- }
-}
diff --git a/src/fulltext/lucene/LuceneSearcher.java b/src/fulltext/lucene/LuceneSearcher.java
deleted file mode 100644
index a5ccbe89..00000000
--- a/src/fulltext/lucene/LuceneSearcher.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-/*
-
-LuceneSearcher searches a lucene index.
-
-It is managed by the Apache CouchDB daemon.
-
-*/
-
-//basics
-import java.io.*;
-
-//lucene
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader;
-
-import org.apache.lucene.document.Document;
-
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.Query;
-
-/*
-protocol:
-Queries will look like this:
-
-databasename\n
-the full text query\n
-
-Then the java reader will read the lines and respond
-by outputing each document result:
-ok\n
-docid1\n
-score1\n
-docid2\n
-score2\n
-docid3\n
-score3\n
-\n
-
-or:
-
-error\n
-error_id\n
-error message\n
-
-*/
-public class LuceneSearcher
-{
- public static void main(String[] args) throws Exception
- {
-
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
-
- String db = "";
- String queryString = "";
-
- while(((db = in.readLine()) != null) && ((queryString = in.readLine()) != null)) {
-
- IndexSearcher searcher = new IndexSearcher("Lucene/Index/" + db);
-
- Query query = new TermQuery(new Term("__couchdb_keywords", queryString));
-
- Hits hits = searcher.search(query);
-
- System.out.println("ok");
- for(int i = 0; i < hits.length(); i++) {
- Document d = hits.doc(i);
- System.out.println(d.get("__couchdb_document_id"));
- System.out.println(hits.score(i));
- }
- System.out.println();
- }
- }
-}
diff --git a/src/fulltext/lucene/readme.txt b/src/fulltext/lucene/readme.txt
deleted file mode 100644
index c115534c..00000000
--- a/src/fulltext/lucene/readme.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-This is still work in progress and has not been integrated into the build
-process. Good luck though :)
-
-This document describes how to use the LuceneIndexer with Apache CouchDB.
-
-Requirements:
-Apache CouchDB 0.6.4 or newer.
-Java Development Kit (JDK) 1.5
-Lucene 2.0.0 or newer
-couchdb4j (http://code.google.com/p/couchdb4j/)
-
-
-If you don't already have it,
-download lucene-core-2.0.0.jar from a mirror
-A list of mirrors can be found at
-http://www.apache.org/dyn/closer.cgi/lucene/java/
-
-Add the following line to your couch.ini:
-LuceneServer=/usr/bin/java -cp "./bin/:./lib/lucene-core.jar" LuceneIndexer=...
-
-Adjust the version number and the path to java, if needed.
-If you have lucene installed already, remove the
-'-cp "./bin/:./Lucene/lucene-core-2.0.0.jar"' part.
-
-Put lucene-core.jar and cocuhdb4j.jar into $CouchDbDir/lib
-
-Launch Apache CouchDB.
-
-The indexer will populate $CouchDbDir/Lucene/Index with an index for
-all documents in all databases.
-(indexes per database will be added soon).
-
-To see that the data is actually stored in there,
-use luke from http://www.getopt.org/luke/
-
-To use the actual index, you could use the PHP 5 Lucene Demo in the Zend Framework
-(http://framework.zend.com) or any other Lucene implementation in your favourite
-language.
-
-If you have any questions, please visit:
-http://couchdb.com/CouchDB/CouchDBWeb.nsf/vDissByDate