Lucene搜索簡單的實例
項目環境:
1.導入jar包
KAnalyzer3.2.0Stable.jar
lucene-analyzers-3.0.1.jar
lucene-core-3.0.1.jar
lucene-highlighter-3.0.1.jar
lucene-memory-3.0.1.jar
2.寫一個demo
a.創建一個實體
DROP TABLE IF EXISTS `article`;
CREATE TABLE `article` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(20) DEFAULT NULL,
`content` varchar(5000) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
public class Article {
private int id;
private String title;
private String content;
................//get/set/toString方法省略
}
b.創建一個提供公共方法的類:
//提供封裝分頁數據的類: @SuppressWarnings("rawtypes") public class QueryResult { private List list; private int count; ..............get/set省略 } //提供創建索引,目錄的類 public class LuceneUtils{ private static Directory directory;// 建立索引庫存儲目錄 private static Analyzer analyzer;// 創建分詞器 private static IndexWriter indexWriter; // 在程序啟動是初始化,建立索引 private static IndexSearcher indexSearcher;// 查詢 static { try { // 加載配置文件lucene.properties,該文件中是創建索引庫的路徑"path=D:\\IindexSearch Properties prop = new Properties(); InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties"); //InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties"); prop.load(inStream); directory = FSDirectory.open(new File(prop.getProperty("path"))); analyzer = new StandardAnalyzer(Version.LUCENE_30); // 在程序啟動是初始化,建立索引 indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED); //程序退出時關閉資源 Runtime.getRuntime().addShutdownHook(new Thread(){ public void run(){ try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } }); } catch (Exception e) { e.printStackTrace(); } } public static Document objectToDocument(Object obj) { Article article = (Article) obj; // 將文檔轉為domcment Document doc = new Document(); String idstr = NumericUtils.intToPrefixCoded(article.getId()); doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED)); doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED)); return doc; } public static Object documentToObject(Document doc) { Article article = new Article(); //將Document轉為Article //將字符串轉化為數字 int id = NumericUtils.prefixCodedToInt(doc.get("id")); article.setId(id); article.setTitle(doc.get("title")); article.setContent(doc.get("content")); return article; } public static IndexWriter getIndexWriter() { return indexWriter; } public static IndexSearcher getIndexSearch() { // 執行查詢 try { indexSearcher = new IndexSearcher(directory); } catch (Exception e) { throw new RuntimeException(e); } return indexSearcher; } public static Directory getDirectory() { return directory; } public static Analyzer getAnalyzer() { return analyzer; } } c: 創建增刪改查方法 public class IndexDao { /** * //保存到索引庫 * * @return * @throws Exception */ public void save(Article article) { try { // 將Aritcle轉為Documnet Document doc = LuceneUtils.objectToDocument(article); // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.addDocument(doc); indexWriter.commit(); } catch (Exception e) { throw new RuntimeException(e); } } /** * 刪除索引庫 Term 表示制定列中包含的關鍵字 * * @return * @throws Exception */ public void delete(Article article) { String idStr = NumericUtils.intToPrefixCoded(article.getId()); Term term = new Term("id", idStr); try { // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.deleteDocuments(term);// 刪除指定Term總重的documnet數據 indexWriter.commit(); } catch (Exception e) { throw new RuntimeException(e); } } /** * 修改索引庫 * * @return * @throws Exception */ public void update(Article article) { // 創建Term String idStr = NumericUtils.intToPrefixCoded(article.getId()); Term term = new Term("id", idStr); // 準備document Document doc = LuceneUtils.objectToDocument(article); try { // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.updateDocument(term, doc);// 刪除指定Term總重的documnet數據 indexWriter.commit(); // 先刪除,在創建 // indexWriter.deleteDocuments(term); // indexWriter.addDocument(doc); } catch (Exception e) { throw new RuntimeException(e); } } /** * 查詢索引庫 * * @return * @throws Exception */ public QueryResult query(String queryString, int first, int max) { IndexSearcher indexSearcher = null; try { // MultiFieldQueryParser:表示可以根據多個字段查詢 int totail = first + max; // 1.把字符串轉為Query對象 QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" }, LuceneUtils.getAnalyzer()); Query query = parser.parse(queryString); // 2.執行查詢 indexSearcher = LuceneUtils.getIndexSearch(); // 指定排序條件 Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序 TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查詢并返回最多的前n條數據 int count = topDocs.totalHits;// 總記錄數 ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n條結果數據 // 生成高亮顯示器;設置前綴,后綴,摘要的大小 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer scorer = new QueryScorer(query);// 查詢條件 Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(100));// 設置摘要的大小 // 3.取出數據 int endIndex = Math.min(totail, scoreDoc.length); List<Article> list = new ArrayList<Article>(); for (int i = 0; i < endIndex; i++) { // float score = scoreDoc[i].score;//平均得分 int docId = scoreDoc[i].doc; Document doc = indexSearcher.doc(docId); // 進行高亮操作,當沒有找到關鍵詞時,返回為null String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title")); if (text != null) { doc.getField("title").setValue(text); } // 將Document轉為Article Article article = (Article) LuceneUtils.documentToObject(doc); list.add(article); } QueryResult queryResult = new QueryResult(list, count); return queryResult; } catch (Exception e) { throw new RuntimeException(e); } } c;測試dao方法: @Test public void testSave() { // 創建文檔對象 Article article = new Article(); for (int i = 0; i < 20; i++) { article.setId(i); article.setTitle("Lucene搜索的方式"); article.setContent("全文檢索是計算機程序通過掃描文章中的每一個詞,對每一個詞建立一個索引,指明該詞在文章中出現的次數和位置。"); indexDao.save(article); } } @Test public void testDelete() { Article article = new Article(); article.setId(1); indexDao.delete(article); } @Test public void testUpdate() { // 創建文檔對象 Article article = new Article(); article.setId(1); article.setTitle("Lucene搜索的方式"); article.setContent("跟新索引庫測試是否正確"); indexDao.update(article); } @Test @SuppressWarnings("unchecked") public void testQuery() { String queryString = "Lucene"; QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10); System.out.println("count---------->" + queryResult.getCount()); List<Article> list = (List<Article>)queryResult.getList(); for(Article article:list){ System.err.println("list--------->" + article.toString()); }
到此lucenes測試例子完成。以下是本人使用過程中進行改造封裝后,制作的模板,由于增刪改中都要用到一些相同的代碼,并且重復比較多,為此本人將重復的制作為一個方法,其他的不能確定的提供一個借口提供一個抽象方法,在調用者需要封裝那些實體,即可調用接口中的方法并重寫即可。此封裝主要用到的是模板設計模式。
如下:
public class ComsLuceneUtils { public interface CallBackQuery { //不分頁 //public abstract List documentToObject(Document doc, List list) throws Exception; //分頁方法 public abstract QueryResult documentToObject(Document doc, List list, int count) throws Exception; public abstract Document objectToDocument(Object obj) throws Exception; } /** * //保存到索引庫 * CallBackQuery :保存函數 * @return * @throws Exception */ public static void save(Article article, CallBackQuery callBack) { try { // 將Aritcle轉為Documnet Document doc = callBack.objectToDocument(article); // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.addDocument(doc); indexWriter.commit(); } catch (Exception e) { throw new RuntimeException(e); } } /** * 刪除索引庫 Term 表示制定列中包含的關鍵字 * * @return * @throws Exception */ public static void delete(Integer id) { String idStr = NumericUtils.intToPrefixCoded(id); Term term = new Term("id", idStr); try { // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.deleteDocuments(term);// 刪除指定Term總重的documnet數據 indexWriter.commit(); } catch (Exception e) { throw new RuntimeException(e); } } /** * 修改索引庫 * CallBackQuery :更新函數 * @return * @throws Exception */ public static void update(Article article, CallBackQuery callBack) { // 創建Term String idStr = NumericUtils.intToPrefixCoded(article.getId()); Term term = new Term("id", idStr); try { // 將Object轉換為Document對象 Document doc = callBack.objectToDocument(article); // 建立索引 IndexWriter indexWriter = LuceneUtils.getIndexWriter(); indexWriter.updateDocument(term, doc);// 刪除指定Term數的documnet數據 indexWriter.commit(); // 先刪除,在創建 // indexWriter.deleteDocuments(term); // indexWriter.addDocument(doc); } catch (Exception e) { throw new RuntimeException(e); } } /**查詢分頁方法 * * 查詢索引庫 querString: 查詢字符串 * first : 開始位置(分頁功能) * max : 最大數(分頁功能) * parameter :查詢指定的字段 * CallBackQuery : 查詢函數 * QueryResult : 返回結果集合 * @throws Exception */ public static QueryResult query(String queryString, int first, int max, String[] parameter, CallBackQuery callback) { IndexSearcher indexSearcher = null; try { // MultiFieldQueryParser:表示可以根據多個字段查詢 int totail = first + max; // 1.把字符串轉為Query對象 QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, parameter, LuceneUtils.getAnalyzer()); Query query = parser.parse(queryString); // 2.執行查詢 indexSearcher = LuceneUtils.getIndexSearch(); // 指定排序條件 Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序 TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查詢并返回最多的前n條數據 int count = topDocs.totalHits;// 總記錄數 ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n條結果數據 // 生成高亮顯示器;設置前綴,后綴,摘要的大小 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Scorer scorer = new QueryScorer(query);// 查詢條件 Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(100));// 設置摘要的大小 // 3.取出數據 int endIndex = Math.min(totail, scoreDoc.length); List<Article> list = new ArrayList<Article>(); for (int i = 0; i < endIndex; i++) {// start // float score = scoreDoc[i].score;//平均得分 int docId = scoreDoc[i].doc; Document doc = indexSearcher.doc(docId); // 進行高亮操作 if (parameter.length > 0) { for (int j = 0; j < parameter.length; j++) { // 進行高亮操作,當沒有找到關鍵詞時,返回為null String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), parameter[j], doc.get(parameter[j])); if (text != null) { doc.getField(parameter[j]).setValue(text); } } } // 將Document轉為Article callback.documentToObject(doc, list, count); }// end // 返回結果集 QueryResult queryResult = new QueryResult(list, count); return queryResult; } catch (Exception e) { throw new RuntimeException(e); } } } 測試方法: @Test @SuppressWarnings("unchecked") public void testQuery() { String queryString = "Lucene"; String[] param = new String[]{"id","title","content"}; try { QueryResult queryResult=ComsLuceneUtils.query(queryString, 0, 20, param, new cn.net.yixun.util.ComsLuceneUtils.CallBackQuery(){ public QueryResult documentToObject(Document doc,List list,int count)throws Exception{ Article article = new Article(); //將Document轉為Article //將字符串轉化為數字 int id = NumericUtils.prefixCodedToInt(doc.get("id")); article.setId(id); article.setTitle(doc.get("title")); article.setContent(doc.get("content")); list.add(article); QueryResult queryResult=new QueryResult(list, count); return queryResult; } public Document objectToDocument(Object obj)throws Exception{ return null;}; }); List<Article> list = (List<Article>)queryResult.getList(); for(Article article:list){ System.err.println("list--------->" + article.toString()); } } catch (Exception e) { e.printStackTrace(); } }
本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!