使用Lucene4.8進行索引及搜索的基本操作

jopen 10年前發布 | 36K 次閱讀 Lucene 搜索引擎
在Lucene對文本進行處理的過程中，可以大致分為兩大部分：
1、索引文件：提取文檔內容并分析，生成索引
2、搜索內容：搜索索引內容，根據搜索關鍵字得出搜索結果
一、索引文件
基本步驟如下：
1、創建索引庫IndexWriter
2、根據文件創建文檔Document
3、向索引庫中寫入文檔內容
    package com.ljh.search.index;

import java.io.File;  
import java.io.FileReader;  
import java.io.IOException;  

import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.LongField;  
import org.apache.lucene.document.StringField;  
import org.apache.lucene.document.TextField;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.FSDirectory;  
import org.apache.lucene.util.Version;  

// 1、創建索引庫IndexWriter  
// 2、根據文件創建文檔Document  
// 3、向索引庫中寫入文檔內容  

public class IndexFiles {  

    public static void main(String[] args) throws IOException {  

        String usage = "java IndexFiles"  
                + " [-index INDEX_PATH] [-docs DOCS_PATH] \n\n"  
                + "This indexes the documents in DOCS_PATH, creating a Lucene index"  
                + "in INDEX_PATH that can be searched with SearchFiles";  

        String indexPath = null;  
        String docsPath = null;  
        for (int i = 0; i < args.length; i++) {  
            if ("-index".equals(args[i])) {  
                indexPath = args[i + 1];  
                i++;  
            } else if ("-docs".equals(args[i])) {  
                docsPath = args[i + 1];  
                i++;  
            }  
        }  

        if (docsPath == null) {  
            System.err.println("Usage: " + usage);  
            System.exit(1);  
        }  

        final File docDir = new File(docsPath);  
        if (!docDir.exists() || !docDir.canRead()) {  
            System.out  
                    .println("Document directory '"  
                            + docDir.getAbsolutePath()  
                            + "' does not exist or is not readable, please check the path");  
            System.exit(1);  
        }  

        IndexWriter writer = null;  
        try {  
            // 1、創建索引庫IndexWriter  
            writer = getIndexWriter(indexPath);  
            index(writer, docDir);  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            writer.close();  
        }  

    }  

    private static IndexWriter getIndexWriter(String indexPath)  
            throws IOException {  

        Directory indexDir = FSDirectory.open(new File(indexPath));  

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48,  
                new StandardAnalyzer(Version.LUCENE_48));  

        IndexWriter writer = new IndexWriter(indexDir, iwc);  

        return writer;  
    }  

    private static void index(IndexWriter writer, File file) throws IOException {  

        if (file.isDirectory()) {  
            String[] files = file.list();  
            if (files != null) {  
                for (int i = 0; i < files.length; i++) {  
                    index(writer, new File(file, files[i]));  
                }  
            }  
        } else {  
            // 2、根據文件創建文檔Document  
            Document doc = new Document();  
            Field pathField = new StringField("path", file.getPath(),  
                    Field.Store.YES);  
            doc.add(pathField);  
            doc.add(new LongField("modified", file.lastModified(),  
                    Field.Store.NO));  
            doc.add(new TextField("contents", new FileReader(file)));  
            System.out.println("Indexing " + file.getName());  

            // 3、向索引庫中寫入文檔內容  
            writer.addDocument(doc);  
        }  

    }  

}  </pre><a style="text-indent:0px;" title="派生到我的代碼片" href="/misc/goto?guid=4959554361015216275" target="_blank"></a></div>

</div>
</div>
（1）使用“java indexfiles -index d:/index -docs d:/tmp”運行程序，索引d:/tmp中的文件，并將索引文件放置到d:/index。

（2）上述生成的索引文件可以使用Luke進行查看。目前Luke已遷移至github進行托管。


二、搜索文件 1、打開索引庫IndexSearcher

2、根據關鍵詞進行搜索

3、遍歷結果并處理



package com.ljh.search.search;
//1、打開索引庫IndexSearcher

//2、根據關鍵詞進行搜索

//3、遍歷結果并處理

import java.io.File;

import java.io.IOException;
import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.index.Term;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TermQuery;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;
public class Searcher {

    public static void main(String[] args) throws IOException {

    String indexPath = null;  
    String term = null;  
    for (int i = 0; i < args.length; i++) {  
        if ("-index".equals(args[i])) {  
            indexPath = args[i + 1];  
            i++;  
        } else if ("-term".equals(args[i])) {  
            term = args[i + 1];  
            i++;  
        }  
    }  

    System.out.println("Searching " + term + " in " + indexPath);  

    // 1、打開索引庫  
    Directory indexDir = FSDirectory.open(new File(indexPath));  
    IndexReader ir = DirectoryReader.open(indexDir);  
    IndexSearcher searcher = new IndexSearcher(ir);  

    // 2、根據關鍵詞進行搜索  
    TopDocs docs = searcher.search(  
            new TermQuery(new Term("contents", term)), 20);  

    // 3、遍歷結果并處理  
    ScoreDoc[] hits = docs.scoreDocs;  
    System.out.println(hits.length);  
    for (ScoreDoc hit : hits) {  
        System.out.println("doc: " + hit.doc + " score: " + hit.score);  
    }  

    ir.close();  

}  


}  </pre></div>
</div>
</div>
來自：http://blog.csdn.net/jediael_lu/article/details/30035025
                    
                    
                         本文由用戶 jopen 自行上傳分享，僅供網友學習交流。所有權歸原作者，若您的權利被侵害，請聯系管理員。
                         轉載本站原創文章，請注明出處，并保留原始鏈接、圖片水印。
                         本站是一個以用戶分享為主的開源技術平臺，歡迎各類分享！
                         本文地址：http://www.baiduhome.net/lib/view/open1402477008697.html
                         Lucene 搜索引擎
                    

                
                
                    
                        相關經驗
                        
  使用Lucene4.8進行索引及搜索的基本操作
   lucene索引_加權操作、對日期和數字進行索引、IndexReader的設計
   使用wukong全文搜索引擎
   Apache Solr 之 使用SolrJ操作索引庫
   MongoDB 基本操作
                         
                    
                    
                        相關資訊
                        
  全文搜索引擎 XunSearch
   免費開源的代碼搜索引擎
   迅搜全文搜索引擎 XunSearch
   開源的PHP搜索引擎 - Yioop!
   13 款開源的全文搜索引擎
                         
                    
                    
                        相關文檔
                        
   搜索引擎solr環境配置、分詞及索引操作
    Svn基本操作
    qtp操作基本教程
    （新）Android基本操作
    圖論與搜索引擎
    MongoDB 簡介及基本操作-劉惠庭
    freepbx-使用指南(中文)
    使用 C# 調用 windows API 入門
    StarUML 使用手冊
                         
                    

                    目錄