Lucene3.6 之 排序篇

jopen 11年前發布 | 16K 次閱讀 Lucene 搜索引擎

Lucene的默認排序是按照Document的得分進行排序的。當檢索結果集中的兩個Document的具有相同的得分時,默認按照Document的ID對結果進行排序。 

 

一、使用Sort、SortField類實現排序 

Lucene在查詢的時候,可以通過以一個Sort作為參數構造一個檢索器IndexSearcher,在構造Sort的時候,指定排序規則。 調用sIndexSearcher.search,例如: 
IndexSearcher.search(query, filter, n, sort);

關于Sort類,在其內部定義了3種構造方法: 

f1.png

 

關于SortField類,其構造方法方法如下:

f2.png

 

其中type對應的取值如下:

f3.png

 

SortField. SCORE 按積分排序 
SortField. DOC 按文檔排序 
SortField. AUTO 域的值為int、long、float都有效 
SortField.STRING 域按STRING排序 
SortField..FLOAT 
SortField.LONG 
SortField.DOUBLE 
SortField.SHORT 
SortField.CUSTOM 通過比較器排序 
SortField.BYTE 


示例代碼

1、對單個字段進行排序

@Test
    public void sortSingleField(){
        try {
            String path = "D://LuceneEx/day01";
            String keyword = "android";
            File file = new File(path);
            Directory mdDirectory = FSDirectory.open(file);
//          Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            // 使用 商業分詞器
            Analyzer mAnalyzer = new IKAnalyzer();

            IndexReader reader = IndexReader.open(mdDirectory);

            IndexSearcher searcher = new IndexSearcher(reader);

            String[] fields = {"title","category"};     // (在多個Filed中搜索)
            QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36, fields, mAnalyzer);
//          String fieldName = "source";    
//          QueryParser parser = new QueryParser(Version.LUCENE_36, fieldName, mAnalyzer);
            Query query = parser.parse(keyword);

            SortField field = new SortField("reputation", SortField.FLOAT);
            Sort sort = new Sort(field );
            TopDocs tops = searcher.search(query, 50, sort );

            int count = tops.totalHits;

            System.out.println("totalHits="+count);

            ScoreDoc[] docs = tops.scoreDocs;

            for(int i=0;i<docs.length;i++){
                Document doc = searcher.doc(docs[i].doc);

                int id = Integer.parseInt(doc.get("id"));
                String title = doc.get("title");
                String author = doc.get("author");
                String publishTime = doc.get("publishTime");
                String source = doc.get("source");
                String category = doc.get("category");
                float reputation = Float.parseFloat(doc.get("reputation"));

                System.out.println(id+"\t"+title+"\t"+author+"\t"+publishTime+"\t"+source+"\t"+category+"\t"+reputation);
            }

            reader.close();
            searcher.close();

        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }


2、對多個字段進行排序

@Test
    public void sortMultiField(){
        try {
            String path = "D://LuceneEx/day01";
            String keyword = "Android";
            File file = new File(path);
            Directory mdDirectory = FSDirectory.open(file);
//          Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
            // 使用 商業分詞器
            Analyzer mAnalyzer = new IKAnalyzer();

            IndexReader reader = IndexReader.open(mdDirectory);

            IndexSearcher searcher = new IndexSearcher(reader);

            String[] fields = {"title","category"};     // (在多個Filed中搜索)
            QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36, fields, mAnalyzer);
//          String fieldName = "source";    
//          QueryParser parser = new QueryParser(Version.LUCENE_36, fieldName, mAnalyzer);
            Query query = parser.parse(keyword);

            SortField sortF1 =new SortField("reputation", SortField.FLOAT);
            SortField sortF2 =new SortField("source", SortField.STRING);
            Sort sort =new Sort(new SortField[]{sortF1 , sortF2});

            TopDocs tops = searcher.search(query, null, 100, sort);
            int count = tops.totalHits;

            System.out.println("totalHits="+count);

            ScoreDoc[] docs = tops.scoreDocs;

            for(int i=0;i<docs.length;i++){
                Document doc = searcher.doc(docs[i].doc);

                int id = Integer.parseInt(doc.get("id"));
                String title = doc.get("title");
                String author = doc.get("author");
                String publishTime = doc.get("publishTime");
                String source = doc.get("source");
                String category = doc.get("category");
                float reputation = Float.parseFloat(doc.get("reputation"));

                System.out.println(id+"\t"+title+"\t"+author+"\t"+publishTime+"\t"+source+"\t"+category+"\t"+reputation);
            }

            reader.close();
            searcher.close();

        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

用到的兩個工具方法代碼

/**
     * 創建文檔對象的工具方法
     * @param book
     * @return
     */
    public Document createDocument(Book book){
        Document doc = new Document();

        Field id = new Field("id", book.getId() + "", Store.YES,
                Index.ANALYZED);
        Field title = new Field("title", book.getTitle(), Store.YES,
                Index.ANALYZED);
        Field author = new Field("author", book.getAuthor(), Store.YES,
                Index.ANALYZED);
        Field publishTime = new Field("publishTime", book.getPublishTime(),
                Store.YES, Index.ANALYZED);
        Field source = new Field("source", book.getSource(), Store.YES,
                Index.ANALYZED);
        Field category = new Field("category", book.getCategory(),
                Store.YES, Index.ANALYZED);
        Field reputation = new Field("reputation", book.getReputation()
                + "", Store.YES, Index.ANALYZED);

        doc.add(id);
        doc.add(title);
        doc.add(author);
        doc.add(publishTime);
        doc.add(source);
        doc.add(category);
        doc.add(reputation);

        return doc;
    }

    /**
     * 創建Book對象
     * @param title
     * @param author
     * @param publishTime
     * @param category
     * @param reputation
     * @return
     */
    public Book createBook(String title,String author,String publishTime,String category,float reputation){

        Random r = new Random();
        int id = r.nextInt(10000);

        Book book = new Book();
        book.setId(id);
        book.setAuthor(author);
        book.setTitle(title);
        book.setCategory(category);
        book.setPublishTime(publishTime);
        book.setReputation(reputation);
        book.setSource("清華大學出版社");

        return book;
    }


二、改變boost(激勵因子) 

1、改變Document的boost(激勵因子) 
改變boost的大小,會導致Document的得分的改變,從而按照Lucene默認的對檢索結果集的排序方式,改變檢索結果中Document的排序的提前或者靠后。在計算得分的時候,使用到了boost的值,默認boost的值為1.0,也就說默認情況下Document的得分與boost的無關的。一旦改變了默認的boost的值,也就從Document的得分與boost無關,變為相關了:boost值越大,Document的得分越高。 

2、改變Field的boost(激勵因子) 
改變Field的boost值,和改變Document的boost值是一樣的。因為Document的boost是通過添加到Docuemnt中Field體現的,所以改變Field的boost值,可以改變Document的boost值。 


示例代碼

@Test
    public void testBoost(){
        try {
            String path = "D://LuceneEx/day02";
            String keyword = "android";
            File file = new File(path);
            Directory mdDirectory = FSDirectory.open(file);
            // 使用 商業分詞器
            Analyzer mAnalyzer = new IKAnalyzer();

            IndexReader reader = IndexReader.open(mdDirectory);

            IndexSearcher searcher = new IndexSearcher(reader);

            String[] fields = { "title", "category" }; // (在多個Filed中搜索)
            QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
                    fields, mAnalyzer);
            Query query = parser.parse(keyword);

            TopDocs tops = searcher.search(query, null, 50);

            int count = tops.totalHits;

            System.out.println("totalHits=" + count);

            ScoreDoc[] docs = tops.scoreDocs;

            for (int i = 0; i < docs.length; i++) {

                Document doc = searcher.doc(docs[i].doc);

                float score = docs[i].score;

                int id = Integer.parseInt(doc.get("id"));
                String title = doc.get("title");
                String author = doc.get("author");
                String publishTime = doc.get("publishTime");
                String source = doc.get("source");
                String category = doc.get("category");
                float reputation = Float.parseFloat(doc.get("reputation"));

                System.out.println(id + "\t" + title + "\t" + author + "\t"
                        + publishTime + "\t" + source + "\t" + category + "\t"
                        + reputation+"\t"+score);
            }

            reader.close();
            searcher.close();

        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    @Test
    public void testAdd() {

        try {
            String path = "D://LuceneEx/day02";
            File file = new File(path);
            Directory mdDirectory = FSDirectory.open(file);

            // 使用Lucene提供的分詞器
            // Analyzer mAnalyzer = new StandardAnalyzer(Version.LUCENE_36);
            // 使用 商業分詞器
            Analyzer mAnalyzer = new IKAnalyzer();
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                    mAnalyzer);

            IndexWriter writer = new IndexWriter(mdDirectory, config);

            Book book1 = createBook("Android內核揭秘", "ABC", "2010-07", "android 移動開發", 8.9f);
            Document doc1 = createDocument(book1);
            doc1.setBoost(2.0F); //boost:設置得分,2F在當前得分的基礎上*2,使得分增高

            Book book2 = createBook("Android多媒體開發", "BCD", "2011-07", "android 多媒體", 8.5f);
            Document doc2 = createDocument(book2);
            doc2.setBoost(2.5F); //boost:設置得分,2F在當前得分的基礎上*2,使得分增高

            Book book3 = createBook("Android企業應用開發", "QAB", "2012-05", "android 企業應用", 8.2f);
            Document doc3 = createDocument(book3);
            doc3.setBoost(1.5F); //boost:設置得分,2F在當前得分的基礎上*2,使得分增高

            writer.addDocument(doc1);
            writer.addDocument(doc2);
            writer.addDocument(doc3);

            writer.close();

        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

運行結果

totalHits=3
3383 Android多媒體開發BCD2011-07清華大學出版社android 多媒體8.51.259212
891 Android內核揭秘ABC2010-07清華大學出版社android 移動開發8.91.0073696
2919 Android企業應用開發QAB2012-05清華大學出版社android 企業應用8.20.75552726


三、自定義排序 
待完成。。。

 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!