/* * IndexKeyWords.java * * Created on April 16, 2007, 4:00 PM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ package cometvis.index; /** * * @author louiebagz */ import org.apache.commons.digester.Digester; import org.xml.sax.SAXException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.Analyzer; //import org.apache.lucene.analysis.WhitespaceAnalyzer; //import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * Parses the contents of keywords XML file and indexes all * medical keywords found in it. The name of the file to parse must be * specified as the first command line argument. */ public class IndexKeyWords { private static IndexWriter writer; private static String sourceFile = "src/cometvis/datasource/keywords.xml"; /** * Adds the keywords to the index. * * @param kw the KeyWords to add to the index */ public void addKeyWords(KeyWords kw) throws IOException { System.out.println("Adding " + kw.getLineNum() + " " + kw.getKeyWord()); Document keyWordDocument = new Document(); keyWordDocument.add(Field.Text("lineNum", kw.getLineNum())); //keyWordDocument.add(Field.Text("kw", kw.getKeyWord())); List list = kw.getKeyWord(); for (Iterator i = list.iterator(); i.hasNext();) { String listElement = i.next(); keyWordDocument.add(Field.Text("kw", listElement)); } writer.addDocument(keyWordDocument); } /** * Created an index to add KeyWords to, configures Digester rules and * actions, parses the XML file specified as the first argument. * * @param args command line arguments */ public static void main(String[] args) throws IOException, SAXException { //String indexDir = // System.getProperty("java.io.tmpdir", "tmp") + // System.getProperty("file.separator") + "chatFile"; //Analyzer analyzer = new WhitespaceAnalyzer(); File indexDir = new File("/Volumes/Bagz/My Thesis/Index/KeyWordsIndex"); Analyzer analyzer = new StandardAnalyzer(); boolean createFlag = true; // IndexWriter to use for adding chats to the index writer = new IndexWriter(indexDir, analyzer, createFlag); // instantiate Digester and disable XML validation Digester digester = new Digester(); digester.setValidating(false); // instantiate Search class digester.addObjectCreate("medWords", IndexKeyWords.class ); // instantiate Chat class digester.addObjectCreate("medWords/keyWords", KeyWords.class ); // set type property of KeyWords instance when 'lineNum' attribute is found digester.addSetProperties("medWords/keyWords", "lineNum", "lineNum" ); // set different properties of Keywords instance using specified methods digester.addCallMethod("medWords/keyWords/kw", "setKeyWord", 0); // call 'addKeyWords' method when the next 'medWords/keyWords' pattern is seen digester.addSetNext("medWords/keyWords", "addKeyWords" ); // now that rules and actions are configured, start the parsing process IndexKeyWords dl = (IndexKeyWords) digester.parse(new File(sourceFile)); // optimize and close the index writer.optimize(); writer.close(); } /** * JavaBean class that holds properties of each keyword entry. * It is important that this class be public and static, in order for * Digester to be able to instantiate it. */ public static class KeyWords { private String lineNum; private List kw = new LinkedList(); public void setLineNum(String newLineNum) { lineNum = newLineNum; } public String getLineNum() { return lineNum; } public void setKeyWord(String newKW) { kw.add(newKW); } public List getKeyWord() { return kw; } } }