lucene搜索實例解析

jopen 10年前發布 | 17K 次閱讀 Lucene 搜索引擎

1、搜索的簡單實現(TermQuery)

         1.1 創建 IndexReader

         1.2 創建 IndexSearcher

         1.3 創建Term和TermQuery

         1.4 創建TermQuery獲取TopDocs 

         1.5 創建TopDocs 獲取ScoreDoc

         1.6 根據ScoreDoc獲取相應的文檔

    2、其他搜索Query

         2.1TermRangeQuery

         2.2NumericRangeQuery

         2.3PrefixQuery

         2.4WildcardQuery

         2.5BooleanQuery

         2.6PhraseQuery

         2.7FuzzyQuery

   3、使用QueryParse

這是search類:

    package com.dhb.search;

import java.io.IOException;  
import java.text.ParseException;  
import java.text.SimpleDateFormat;  
import java.util.Date;  
import java.util.HashMap;  
import java.util.Map;  

import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.NumericField;  
import org.apache.lucene.index.CorruptIndexException;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.BooleanClause.Occur;  
import org.apache.lucene.search.BooleanQuery;  
import org.apache.lucene.search.FuzzyQuery;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.NumericRangeQuery;  
import org.apache.lucene.search.PhraseQuery;  
import org.apache.lucene.search.PrefixQuery;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.TermQuery;  
import org.apache.lucene.search.TermRangeQuery;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.search.WildcardQuery;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.LockObtainFailedException;  
import org.apache.lucene.store.RAMDirectory;  
import org.apache.lucene.util.Version;  


public class SearcherUtil {  
    private String[] ids = {"1","2","3","4","5","6"};  
    private String[] emails = {"aa@csdn.org","bb@csdn.org","cc@sina.org","dd@sina.org",  
            "ee@qq.com","ff@qq.com"};  
    private String[] contents = {"Welcome to my office ,I like surfing internet.",  
                                  "hello boys like haha",  
                                  "hello girls we like each other.",  
                                  "I like football,you like too.",  
                                  "I like basketball very much, how about you?",  
                                  "bye-bye see you I don't like."};  
    private int[] attachment ={2,3,1,4,5,5};  
    private String[] names = {"Victor","Nancy","Kitty","Cindy","Tom","Tony"};   
    private Map<String, Float> scores = new HashMap<String, Float>();  
    private Date[] dates = null;  

    private Directory directory;  
    private IndexReader reader;  

    public SearcherUtil() {  
        setDates();  
        scores.put("qq.com", 2.0f);  
        scores.put("sina.org", 1.5f);  
        directory = new RAMDirectory();  
        index();  
    }  
    private void setDates() {  
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");  
        dates = new Date[ids.length];  
        try {  
            dates[0] = sdf.parse("2010-02-19");  
            dates[1] = sdf.parse("2012-01-11");  
            dates[2] = sdf.parse("2011-09-19");  
            dates[3] = sdf.parse("2010-12-22");  
            dates[4] = sdf.parse("2012-01-01");  
            dates[5] = sdf.parse("2011-05-19");  
        } catch (ParseException e) {  
            e.printStackTrace();  
        }  
    }  

    public void index() {  
        IndexWriter writer = null;  
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,   
                new StandardAnalyzer(Version.LUCENE_35));  
        try {  
            writer = new IndexWriter(directory, iwc);  
            //清空所有索引  
            writer.deleteAll();  
            Document doc = null;  
            for (int i = 0;i < ids.length; i++) {  

                doc = new Document();  
                doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));  
                doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));  
                doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));  
                doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));  
                /** 
                 * 加權操作 
                 */  
                String et = emails[i].substring(emails[i].indexOf("@")+1);  
                if(scores.containsKey(et)) {  
                    doc.setBoost(scores.get(et));  
                } else {  
                    doc.setBoost(0.5f);  
                }  
                //對數字的操作,存儲數字  
                doc.add(new NumericField("attachment", Field.Store.YES, true).setIntValue(attachment[i]));  
                //對日期的操作,存儲日期  
                doc.add(new NumericField("dates", Field.Store.YES, true).setLongValue(dates[i].getTime()));  
                writer.addDocument(doc);  

            }  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (LockObtainFailedException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            if(writer!=null)  
                try {  
                    writer.close();  
                } catch (CorruptIndexException e) {  
                    e.printStackTrace();  
                } catch (IOException e) {  
                    e.printStackTrace();  
                }  
        }  
    }  
    public IndexSearcher getSearcher() {  
        try {  
            if(reader==null) {  
                reader = IndexReader.open(directory);  
            } else {  
                IndexReader tr = IndexReader.openIfChanged(reader);  
                if(tr!=null) {  
                    reader.close();           //關閉原來的reader  
                    reader = tr;      
                }  
            }  
            return new IndexSearcher(reader);  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return null;  
    }  
    /** 
     * 精確查詢 
     * @param fld 
     * @param txt 
     */  
    public void searchByTerm(String fld, String txt,int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            Query query = new TermQuery(new Term(fld, txt));  
            TopDocs tds = searcher.search(query, num);  
            //總記錄數,和num沒有任何關系  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 范圍查詢 
     * @param fld 查詢的field  
     * @param start 開始的字符 
     * @param end 結束的字符 
     * @param num 
     *  
     */  
    public void searchByTermRange(String fld, String start, String end, int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            //默認為true,就表示包含開始字符和結束字符  
            Query query = new TermRangeQuery(fld, start, end, true, true);  
            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 范圍查詢(整型) 
     * @param fld 
     * @param start 
     * @param end 
     * @param num 
     */  
    public void searchByNumericRange(String fld, int start, int end, int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            Query query = NumericRangeQuery.newIntRange(fld, start, end, true, true);  
            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 前綴查詢 
     * @param fld 
     * @param value 
     * @param num 
     */  
    public void searchByPrefix(String fld, String value, int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            Query query = new PrefixQuery(new Term(fld, value));  
            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 通配符查詢 
     * @param fld 
     * @param value 
     * @param num 
     */  
    public void searchByWildcard(String fld, String value, int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            Query query = new WildcardQuery(new Term(fld, value));  
            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 連接多個子查詢 
     * @param num 
     */  
    public void searchByBoolean(int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            BooleanQuery query = new BooleanQuery();  
            /** 
             * BooleanQuery可以連接多個子查詢 
             * Occur.MUST表示必須出現 
             * Occur.SHOULD表示可以出現 
             * Occur.MUST——NOT表示不能出現 
             */  
            query.add(new TermQuery(new Term("name", "Victor")), Occur.MUST);  
            query.add(new TermQuery(new Term("content", "like")), Occur.MUST);  

            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 短語查詢 
     * @param num 
     */  
    public void searchByPhrase(int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            PhraseQuery query = new PhraseQuery();  
            query.setSlop(1);  
            //這里的短語必須均為小寫字母  
            //第一個term  
            query.add(new Term("content", "hello"));  
            //產生距離之后的第二個term  
            query.add(new Term("content", "like"));  

            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
    /** 
     * 模糊查詢 
     * @param num 
     */  
    public void searchByFuzzy(int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            Query query = new FuzzyQuery(new Term("name", "Tiny"));  

            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  

    public void searchByQueryParse(Query query, int num) {  
        try {  
            IndexSearcher searcher = getSearcher();  
            TopDocs tds = searcher.search(query, num);  
            System.out.println("一共查詢了:"+tds.totalHits);  

            for(ScoreDoc sd : tds.scoreDocs) {  
                Document doc = searcher.doc(sd.doc);  
                System.out.println("("+sd.doc+") "+"--權值:"+doc.getBoost()+"--分數:"+sd.score+  
                        doc.get("name")+"["+doc.get("email")+"] "+doc.get("id")+",附件:"  
                        +doc.get("attachment")+",日期:"+doc.get("dates"));  
            }  
            searcher.close();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
}  </pre><br />

這是測試類:

    package com.dhb.test;

import org.apache.lucene.analysis.standard.StandardAnalyzer;  
import org.apache.lucene.queryParser.ParseException;  
import org.apache.lucene.queryParser.QueryParser;  
import org.apache.lucene.queryParser.QueryParser.Operator;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.util.Version;  
import org.junit.Before;  
import org.junit.Test;  

import com.dhb.search.SearcherUtil;  

public class TestSearch {  
    private SearcherUtil su;  

    @Before  
    public void init() {  
        su = new SearcherUtil();  
    }  

    @Test  
    public void searchByTerm() {  
        su.searchByTerm("content", "like", 3);  
        /** 
         * 一共查詢了:6 (5) --權值:1.0--分數:0.634387Tony[ff@qq.com] 
         * 6,附件:5,日期:1305734400000 (3) --權值:1.0--分數:0.5981058Cindy[dd@sina.org] 
         * 4,附件:4,日期:1292947200000 (4) --權值:1.0--分數:0.5286558Tom[ee@qq.com] 
         * 5,附件:5,日期:1325347200000 
         */  
    }  

    @Test  
    public void searchByTermRange() {  
        // su.searchByTermRange("id", "1", "3", 10);  
        // 查詢name以a開頭和s結尾的  
        // su.searchByTermRange("name", "A", "S", 10);  
        // 由于attachment是數字類型,使用TermRange無法查詢  
        su.searchByTermRange("attachment", "2", "10", 10);  
    }  

    @Test  
    public void searchByNumericRange() {  
        su.searchByNumericRange("attachment", 2, 10, 10);  
    }  

    @Test  
    public void searchByPrefix() {  
        // su.searchByPrefix("name", "T", 10);  
        // 把content里面的單詞,一個一個拆分  
        su.searchByPrefix("content", "b", 10);  
    }  

    @Test  
    public void searchByWildcard() {  
        // *表示可以匹配任意多個字符,?表示可以匹配單個字符  
        // su.searchByWildcard("email", "*@sina.org", 10);  
        su.searchByWildcard("name", "T??", 10);  
    }  

    @Test  
    public void searchByBoolean() {  
        su.searchByBoolean(10);  
    }  

    @Test  
    public void searchByPhrase() {  
        su.searchByPhrase(10);  
    }  

    @Test  
    public void searchByFuzzy() {  
        su.searchByFuzzy(10);  
    }  

    @Test  
    public void searchByQueryParse() throws ParseException {  
        // 創建QueryParser對象,默認搜索域為content  
        QueryParser parser = new QueryParser(Version.LUCENE_35, "content",  
                new StandardAnalyzer(Version.LUCENE_35));  

        // 改變空格的默認操作符,以下可以改為AND  
        //parser.setDefaultOperator(Operator.AND);  

        // 開啟第一個字符的通配查詢,默認是關閉的,太消耗性能  
        parser.setAllowLeadingWildcard(true);  

        // 搜索包含content中包含like的  
        Query query = parser.parse("like");  

        // 查詢content中,basketball 和 basketball 的文檔  
        query = parser.parse("basketball football");  

        // 改變搜索域  
        query = parser.parse("name:Tom");  

        // 用*或者? 通配符匹配  
        query = parser.parse("name:V*");  

        // 通配符默認不能放在首位  
        query = parser.parse("email:*@qq.com");  

        // 查詢 名字中沒mike,content中like的。 +和- 必須要放在前面  
        query = parser.parse("- name:Tom + like");  

        // 匹配一個區間,TO必須大寫  
        query = parser.parse("id:[1 TO 6]");  

        // 開區間  
        query = parser.parse("id:{1 TO 3}");  

        /** 
         * 查詢字符串中空格的,加上“” 完全匹配 
         */  
        query = parser.parse("\"I like football\"");  

        // 查詢I和football中一個字符距離的  
        query = parser.parse("\"I football\"~1");  

        // 沒辦法匹配數字范圍,必須要自定義  
        query = parser.parse("attach:[2 TO 10]");  
        // 模糊匹配  
        query = parser.parse("name:Tim~");  

        su.searchByQueryParse(query, 10);  
    }  
}  </pre><br />
 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!