Lucene搜索簡單的實例

jopen 10年前發布 | 43K 次閱讀 Lucene 搜索引擎

項目環境:

1.導入jar包

KAnalyzer3.2.0Stable.jar
lucene-analyzers-3.0.1.jar
lucene-core-3.0.1.jar
lucene-highlighter-3.0.1.jar
lucene-memory-3.0.1.jar

 

2.寫一個demo

  a.創建一個實體

DROP TABLE IF EXISTS `article`;
CREATE TABLE `article` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `title` varchar(20) DEFAULT NULL,
  `content` varchar(5000) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

 

public class Article {
private int id;
private String title;
private String content;

................//get/set/toString方法省略

}

b.創建一個提供公共方法的類:

//提供封裝分頁數據的類:

@SuppressWarnings("rawtypes")
public class QueryResult {
private List list;
private int count;

..............get/set省略

}

//提供創建索引,目錄的類

public class LuceneUtils{

private static Directory directory;// 建立索引庫存儲目錄
private static Analyzer analyzer;// 創建分詞器

private static IndexWriter indexWriter; // 在程序啟動是初始化,建立索引

private static IndexSearcher indexSearcher;// 查詢


static {
try {
// 加載配置文件lucene.properties,該文件中是創建索引庫的路徑"path=D:\\IindexSearch
Properties prop = new Properties();
InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties");
//InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties");
prop.load(inStream);

directory = FSDirectory.open(new File(prop.getProperty("path")));
analyzer = new StandardAnalyzer(Version.LUCENE_30);
// 在程序啟動是初始化,建立索引
indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);

//程序退出時關閉資源
Runtime.getRuntime().addShutdownHook(new Thread(){
public void run(){
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
} 
}
});
} catch (Exception e) {
e.printStackTrace();
}
}


public static Document objectToDocument(Object obj) {
Article article = (Article) obj;
// 將文檔轉為domcment
Document doc = new Document();
String idstr = NumericUtils.intToPrefixCoded(article.getId());
doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
return doc;
}


public static Object documentToObject(Document doc) {
Article article = new Article();
//將Document轉為Article
//將字符串轉化為數字
int id = NumericUtils.prefixCodedToInt(doc.get("id"));
article.setId(id);
article.setTitle(doc.get("title"));
article.setContent(doc.get("content"));
return article;
}


public static IndexWriter getIndexWriter() {
return indexWriter;
}


public static IndexSearcher getIndexSearch() {
// 執行查詢
try {
indexSearcher = new IndexSearcher(directory);
} catch (Exception e) {
throw new RuntimeException(e);
}
return indexSearcher;
}


public static Directory getDirectory() {
return directory;
}


public static Analyzer getAnalyzer() {
return analyzer;
}
}

c: 創建增刪改查方法

public class IndexDao {


/**
* //保存到索引庫
* 
* @return
* @throws Exception
*/
public void save(Article article) {
try {
// 將Aritcle轉為Documnet
Document doc = LuceneUtils.objectToDocument(article);


// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.addDocument(doc);
indexWriter.commit();
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**
* 刪除索引庫 Term 表示制定列中包含的關鍵字
* 
* @return
* @throws Exception
*/
public void delete(Article article) {

String idStr = NumericUtils.intToPrefixCoded(article.getId());
Term term = new Term("id", idStr);
try {
// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.deleteDocuments(term);// 刪除指定Term總重的documnet數據
indexWriter.commit();
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**
* 修改索引庫
* 
* @return
* @throws Exception
*/
public void update(Article article) {

// 創建Term
String idStr = NumericUtils.intToPrefixCoded(article.getId());
Term term = new Term("id", idStr);


// 準備document
Document doc = LuceneUtils.objectToDocument(article);
try {
// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.updateDocument(term, doc);// 刪除指定Term總重的documnet數據
indexWriter.commit();


// 先刪除,在創建
// indexWriter.deleteDocuments(term);
// indexWriter.addDocument(doc);
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**
* 查詢索引庫
* 
* @return
* @throws Exception
*/
public QueryResult query(String queryString, int first, int max) {
IndexSearcher indexSearcher = null;
try {
// MultiFieldQueryParser:表示可以根據多個字段查詢
int totail = first + max;
// 1.把字符串轉為Query對象
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" },
LuceneUtils.getAnalyzer());
Query query = parser.parse(queryString);

// 2.執行查詢
indexSearcher = LuceneUtils.getIndexSearch();
// 指定排序條件
Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序
TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查詢并返回最多的前n條數據
int count = topDocs.totalHits;// 總記錄數
ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n條結果數據


// 生成高亮顯示器;設置前綴,后綴,摘要的大小
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);// 查詢條件
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(100));// 設置摘要的大小


// 3.取出數據
int endIndex = Math.min(totail, scoreDoc.length);
List<Article> list = new ArrayList<Article>();
for (int i = 0; i < endIndex; i++) {
// float score = scoreDoc[i].score;//平均得分
int docId = scoreDoc[i].doc;
Document doc = indexSearcher.doc(docId);


// 進行高亮操作,當沒有找到關鍵詞時,返回為null
String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title"));
if (text != null) {
doc.getField("title").setValue(text);
}
// 將Document轉為Article
Article article = (Article) LuceneUtils.documentToObject(doc);
list.add(article);
}
QueryResult queryResult = new QueryResult(list, count);
return queryResult;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

c;測試dao方法:

@Test
public void testSave() {
// 創建文檔對象
Article article = new Article();
for (int i = 0; i < 20; i++) {
article.setId(i);
article.setTitle("Lucene搜索的方式");
article.setContent("全文檢索是計算機程序通過掃描文章中的每一個詞,對每一個詞建立一個索引,指明該詞在文章中出現的次數和位置。");
indexDao.save(article);
}
}


@Test
public void testDelete() {
Article article = new Article();
article.setId(1);
indexDao.delete(article);
}


@Test
public void testUpdate() {
// 創建文檔對象
Article article = new Article();
article.setId(1);
article.setTitle("Lucene搜索的方式");
article.setContent("跟新索引庫測試是否正確");
indexDao.update(article);
}

@Test
@SuppressWarnings("unchecked")
public void testQuery() {
String queryString = "Lucene";
QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10);
System.out.println("count---------->" + queryResult.getCount());
List<Article> list = (List<Article>)queryResult.getList();
for(Article article:list){
System.err.println("list--------->" + article.toString());
}

 

到此lucenes測試例子完成。以下是本人使用過程中進行改造封裝后,制作的模板,由于增刪改中都要用到一些相同的代碼,并且重復比較多,為此本人將重復的制作為一個方法,其他的不能確定的提供一個借口提供一個抽象方法,在調用者需要封裝那些實體,即可調用接口中的方法并重寫即可。此封裝主要用到的是模板設計模式。

如下:

public class ComsLuceneUtils {
public interface CallBackQuery {

//不分頁

//public abstract  List  documentToObject(Document doc, List list) throws Exception;

//分頁方法
public abstract QueryResult documentToObject(Document doc, List list, int count) throws Exception;


public abstract Document objectToDocument(Object obj) throws Exception;
}


/**
* //保存到索引庫
* CallBackQuery :保存函數
* @return
* @throws Exception
*/
public static void save(Article article, CallBackQuery callBack) {
try {
// 將Aritcle轉為Documnet
Document doc = callBack.objectToDocument(article);


// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.addDocument(doc);
indexWriter.commit();
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**
* 刪除索引庫 Term 表示制定列中包含的關鍵字
* 
* @return
* @throws Exception
*/
public static void delete(Integer id) {


String idStr = NumericUtils.intToPrefixCoded(id);
Term term = new Term("id", idStr);
try {
// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.deleteDocuments(term);// 刪除指定Term總重的documnet數據
indexWriter.commit();
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**
* 修改索引庫
* CallBackQuery :更新函數
* @return
* @throws Exception
*/
public static void update(Article article, CallBackQuery callBack) {


// 創建Term
String idStr = NumericUtils.intToPrefixCoded(article.getId());
Term term = new Term("id", idStr);
try {
// 將Object轉換為Document對象
Document doc = callBack.objectToDocument(article);


// 建立索引
IndexWriter indexWriter = LuceneUtils.getIndexWriter();
indexWriter.updateDocument(term, doc);// 刪除指定Term數的documnet數據
indexWriter.commit();


// 先刪除,在創建
// indexWriter.deleteDocuments(term);
// indexWriter.addDocument(doc);
} catch (Exception e) {
throw new RuntimeException(e);
}
}


/**查詢分頁方法
* 
* 查詢索引庫 querString: 查詢字符串 
* first : 開始位置(分頁功能)
* max : 最大數(分頁功能) 
* parameter :查詢指定的字段 
* CallBackQuery : 查詢函數 
* QueryResult : 返回結果集合
* @throws Exception
*/
public static QueryResult query(String queryString, int first, int max, String[] parameter, CallBackQuery callback) {
IndexSearcher indexSearcher = null;
try {
// MultiFieldQueryParser:表示可以根據多個字段查詢
int totail = first + max;
// 1.把字符串轉為Query對象
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, parameter, LuceneUtils.getAnalyzer());
Query query = parser.parse(queryString);


// 2.執行查詢
indexSearcher = LuceneUtils.getIndexSearch();
// 指定排序條件
Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序
TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查詢并返回最多的前n條數據
int count = topDocs.totalHits;// 總記錄數
ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n條結果數據


// 生成高亮顯示器;設置前綴,后綴,摘要的大小
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);// 查詢條件
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(new SimpleFragmenter(100));// 設置摘要的大小


// 3.取出數據
int endIndex = Math.min(totail, scoreDoc.length);
List<Article> list = new ArrayList<Article>();
for (int i = 0; i < endIndex; i++) {// start
// float score = scoreDoc[i].score;//平均得分
int docId = scoreDoc[i].doc;
Document doc = indexSearcher.doc(docId);


// 進行高亮操作
if (parameter.length > 0) {
for (int j = 0; j < parameter.length; j++) {
// 進行高亮操作,當沒有找到關鍵詞時,返回為null
String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), parameter[j], doc.get(parameter[j]));
if (text != null) {
doc.getField(parameter[j]).setValue(text);
}
}
}


// 將Document轉為Article
callback.documentToObject(doc, list, count);
}// end


// 返回結果集
QueryResult queryResult = new QueryResult(list, count);
return queryResult;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

}

測試方法:

@Test
@SuppressWarnings("unchecked")
public void testQuery() {
String queryString = "Lucene";
String[] param = new String[]{"id","title","content"};

try {
QueryResult queryResult=ComsLuceneUtils.query(queryString, 0, 20, param, new cn.net.yixun.util.ComsLuceneUtils.CallBackQuery(){
public QueryResult documentToObject(Document doc,List list,int count)throws Exception{
Article article = new Article();
//將Document轉為Article
//將字符串轉化為數字
int id = NumericUtils.prefixCodedToInt(doc.get("id"));
article.setId(id);
article.setTitle(doc.get("title"));
article.setContent(doc.get("content"));
list.add(article);
QueryResult queryResult=new QueryResult(list, count);
return queryResult;
} 
public Document objectToDocument(Object obj)throws Exception{
return null;};
});

List<Article> list = (List<Article>)queryResult.getList();
for(Article article:list){
System.err.println("list--------->" + article.toString());
}
} catch (Exception e) {
e.printStackTrace();
}
}

 

 

 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!