/*
* IndexKeyWords.java
*
* Created on April 16, 2007, 4:00 PM
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package cometvis.index;
/**
*
* @author louiebagz
*/
import org.apache.commons.digester.Digester;
import org.xml.sax.SAXException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.Analyzer;
//import org.apache.lucene.analysis.WhitespaceAnalyzer;
//import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
/**
* Parses the contents of keywords XML file and indexes all
* medical keywords found in it. The name of the file to parse must be
* specified as the first command line argument.
*/
public class IndexKeyWords
{
private static IndexWriter writer;
private static String sourceFile = "src/cometvis/datasource/keywords.xml";
/**
* Adds the keywords to the index.
*
* @param kw the KeyWords to add to the index
*/
public void addKeyWords(KeyWords kw) throws IOException
{
System.out.println("Adding " + kw.getLineNum() + " " + kw.getKeyWord());
Document keyWordDocument = new Document();
keyWordDocument.add(Field.Text("lineNum", kw.getLineNum()));
//keyWordDocument.add(Field.Text("kw", kw.getKeyWord()));
List list = kw.getKeyWord();
for (Iterator i = list.iterator(); i.hasNext();)
{
String listElement = i.next();
keyWordDocument.add(Field.Text("kw", listElement));
}
writer.addDocument(keyWordDocument);
}
/**
* Created an index to add KeyWords to, configures Digester rules and
* actions, parses the XML file specified as the first argument.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, SAXException
{
//String indexDir =
// System.getProperty("java.io.tmpdir", "tmp") +
// System.getProperty("file.separator") + "chatFile";
//Analyzer analyzer = new WhitespaceAnalyzer();
File indexDir = new File("/Volumes/Bagz/My Thesis/Index/KeyWordsIndex");
Analyzer analyzer = new StandardAnalyzer();
boolean createFlag = true;
// IndexWriter to use for adding chats to the index
writer = new IndexWriter(indexDir, analyzer, createFlag);
// instantiate Digester and disable XML validation
Digester digester = new Digester();
digester.setValidating(false);
// instantiate Search class
digester.addObjectCreate("medWords", IndexKeyWords.class );
// instantiate Chat class
digester.addObjectCreate("medWords/keyWords", KeyWords.class );
// set type property of KeyWords instance when 'lineNum' attribute is found
digester.addSetProperties("medWords/keyWords", "lineNum", "lineNum" );
// set different properties of Keywords instance using specified methods
digester.addCallMethod("medWords/keyWords/kw", "setKeyWord", 0);
// call 'addKeyWords' method when the next 'medWords/keyWords' pattern is seen
digester.addSetNext("medWords/keyWords", "addKeyWords" );
// now that rules and actions are configured, start the parsing process
IndexKeyWords dl = (IndexKeyWords) digester.parse(new File(sourceFile));
// optimize and close the index
writer.optimize();
writer.close();
}
/**
* JavaBean class that holds properties of each keyword entry.
* It is important that this class be public and static, in order for
* Digester to be able to instantiate it.
*/
public static class KeyWords
{
private String lineNum;
private List kw = new LinkedList();
public void setLineNum(String newLineNum)
{
lineNum = newLineNum;
}
public String getLineNum()
{
return lineNum;
}
public void setKeyWord(String newKW)
{
kw.add(newKW);
}
public List getKeyWord()
{
return kw;
}
}
}