操作lucene索引的工具類

jopen 10年前發布 | 20K 次閱讀 Lucene 搜索引擎

public class OperatorIndex {
public static final String INDEX_PATH = "D:/indexDir";

// 創建分詞器 private Analyzer analyzer = null; // 索引保存目錄 private File indexFile = null; // 目錄對象 private Directory directory = null; // 創建indexWriterConfig 只能實例化一次用一次 private IndexWriterConfig indexWriterConfig = null; SimpleDateFormat simpleDateFormat;

private IndexSearcher indexSearcher;

public void init() throws IOException { analyzer = new IKAnalyzer(); indexFile = new File(INDEX_PATH); directory = FSDirectory.open(indexFile); simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); System.out.println("##初始化完成##");

}

// 數據庫中需要建立索引的比表封裝成Document對象

public Document createDocument(Article article) { Document document = new Document(); document.add(new TextField("id", article.getId() + "", Field.Store.YES)); document.add(new TextField("title", article.getTitle(), Field.Store.YES)); document.add(new TextField("content", article.getContent(), Field.Store.YES)); return document; }

// 獲得日期 public String getDate() { return simpleDateFormat.format(new Date());

}

// 查詢所有索引 public void openIndexFile() throws IOException { System.out.println("讀取索引開始..."); IndexReader indexReader = IndexReader.open(directory); // 獲取索引個數 int maxDoc = indexReader.maxDoc(); System.out.println("maxDoc:" + maxDoc); Article article = null; for (int i = 0; i < maxDoc; i++) { Document document = indexReader.document(i); article = new Article(); if (document.get("id") == null) { System.out.println("id 為空");

}

article.setId(Integer.parseInt(document.get("id"))); article.setTitle(document.get("title")); article.setContent(document.get("content")); System.out.println(article);

} indexReader.close(); System.out.println("讀取索引結束");

}

// 創建索引 public void createIndex(Article article) throws IOException { indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); // 創建索引前刪除以前的索引 // indexWriter.deleteAll(); Document createDocument = createDocument(article); indexWriter.addDocument(createDocument); // indexWriter.commit(); indexWriter.close();

System.out.println("[" + getDate() + "]" + "lucene寫入索引到" + "["

  • indexFile.getAbsolutePath() + "]" + "成功"); }

// 批量創建索引 public void createIndexes(List<Article> articles) throws IOException { // indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer); // IndexWriter indexWriter = new IndexWriter(directory, // indexWriterConfig); // indexWriter.deleteAll();

for (Article article : articles) { createIndex(article);

}

}

// 刪除索引 public void deleteIndex(int id) throws IOException { if (indexFile.exists()) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig( Version.LATEST, analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); indexWriter.deleteDocuments(new Term("id", String.valueOf(id))); System.out.println("[" + getDate() + "]" + "lucene刪除索引成功"); indexWriter.close();

} else { System.out.println("刪除索引失敗"); }

}

// 批量刪除索引 public void deleteIndexes(List<Article> articles) { if (articles == null || articles.size() == 0) { return;

} for (Article article : articles) { try { deleteIndex(article.getId()); } catch (IOException e) { // TODO Auto-generated catch block System.out.println("刪除索引失敗"); e.printStackTrace(); }

}

}

// 更新索引 先刪除索引在添加索引 public void updateIndex(Article article) throws IOException { deleteIndex(article.getId()); createIndex(article);

}

//查詢索引 public void searchIndex(String keyword) { IndexReader indexReader = null; try { indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader);

QueryParser queryParser = new QueryParser(Version.LATEST, "title", new IKAnalyzer(false)); Query query = queryParser.parse(keyword.trim());

TopDocs topDocs = indexSearcher.search(query, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs == null || scoreDocs.length == 0) { System.out.println("很遺憾!沒有找到!");

} for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println(document.toString()); System.out.println("[title:" + document.get("title")

  • ",content:" + document.get("content") + "]");

} } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }

} }

}

//分頁查詢 public void paginationQuery(String keyword, int pageSize, int currentPage) { String[] fields = { "title", "content" }; IndexReader indexReader = null; try { MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser( Version.LATEST, fields, new IKAnalyzer(false)); Query query = multiFieldQueryParser.parse(keyword.trim());

indexReader = IndexReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader);

// 搜索返回的結果并取前100的結果 TopDocs topDocs = indexSearcher.search(query, 100); TopDocs allDocs = indexSearcher .search(new MatchAllDocsQuery(), 100); int totalHits = topDocs.totalHits; System.out.println("總數:" + totalHits); // 搜索返回的結果集合 ScoreDoc[] scoreDocs = topDocs.scoreDocs;

int begin = (currentPage - 1) * pageSize; int end = Math.min(begin + pageSize, scoreDocs.length);

for (int i = begin; i < end; i++) { Document document = indexSearcher.doc(scoreDocs[i].doc); System.out.println("[title:" + document.get("title") + "]");

} } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }

} }

}

//查詢高亮顯示 public void highlighterSearch() { IndexReader indexReader = null; try { indexReader = IndexReader.open(directory);

IndexSearcher indexSearcher = new IndexSearcher(indexReader);

Term term = new Term("content", "中國");

TermQuery termQuery = new TermQuery(term);

TopDocs topDocs = indexSearcher .search(termQuery, Integer.MAX_VALUE); System.out.println("查詢結果數:" + topDocs.totalHits); System.out.println("最大的評分" + topDocs.getMaxScore());

ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); System.out.println("檢索條件:" + term.toString()); String content = document.get("content");

System.out.println("content:" + document.get("content"));

// 高亮展示 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "【", "】");

Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(termQuery)); highlighter.setTextFragmenter(new SimpleFragmenter(content .length()));

if (!"".equals(content)) { TokenStream tokenStream = new IKAnalyzer().tokenStream( content, new StringReader(content)); String bestFragment = highlighter.getBestFragment( tokenStream, content); System.out.println("高亮顯示:" + "檢索結果如下所示:"); System.out.println(bestFragment); // 結束關鍵字高亮 System.out.println("文件內容:" + content); // 匹配相關度 System.out.println(scoreDoc.score);

}

} } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }

} }

}

// 釋放資源 public void destory() throws IOException { analyzer.close(); directory.close(); System.out.println("銷毀成功"); }

public static void main(String[] args) throws IOException { OperatorIndex index = new OperatorIndex(); index.init(); // Article article = new Article(); // article.setId(1); // article.setTitle("hello"); // article.setContent("hello world!"); // // index.createIndex(article); // index.openIndexFile();

// index.deleteIndex(1); // index.openIndexFile(); // article.setContent("hello"); // index.updateIndex(article); // index.openIndexFile();

List<Article> articles = new ArrayList<Article>(); Article article = new Article(1, "中國", "11111gdfjs中國"); Article article1 = new Article(2, "我愛你中國", "11111gdfjs我愛你中國"); Article article2 = new Article(3, "國中之國", "fdsab;1gdfjs國中之國"); Article article3 = new Article(4, "44", "111gdsa11gdfjs中國將成為世界上最強大的國家"); Article article4 = new Article(5, "55", "111gdas11gdfjs");

articles.add(article); articles.add(article1); articles.add(article2); articles.add(article3); articles.add(article4);

index.deleteIndexes(articles); index.createIndexes(articles); index.openIndexFile(); index.searchIndex("中國"); index.paginationQuery("中國", 1, 1); index.highlighterSearch();

// index.destory();

} }

Article.java實體

public class Article { private int id; private String title; private String content;

public int getId() { return id; }

public void setId(int id) { this.id = id; }

public String getTitle() { return title; }

public void setTitle(String title) { this.title = title; }

public String getContent() { return content; }

public void setContent(String content) { this.content = content; }

public Article(int id, String title, String content) { super(); this.id = id; this.title = title; this.content = content; }

public Article() { // TODO Auto-generated constructor stub }

@Override public String toString() { // TODO Auto-generated method stub return "article[id:" + id + ",title:" + title + ",content:" + content

  • "]"; }

}</pre>

 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!