lucenc.net 全文檢索 創建索引、 查詢、分頁

jopen 10年前發布 | 23K 次閱讀 Lucene 搜索引擎 lucenc.net

    #region 創建、跟新詞庫
/// <summary>
/// 創建、跟新詞庫
/// </summary>
private void CreateIndexData()
{
//索引庫文件夾
FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory());
//是否存在索引庫
bool has = IndexReader.IndexExists(dir);
if (has)
{
//解鎖被鎖定的索引庫
if (IndexWriter.IsLocked(dir))
{
IndexWriter.Unlock(dir);
}
}
//索引庫寫對象
IndexWriter iw = new IndexWriter(dir, new PanGuAnalyzer(), !has, IndexWriter.MaxFieldLength.UNLIMITED);
List<LuceneModel> list = LuceneBll.Instance.Get();
foreach (var o in list)
{
Document d = new Document();
//Field.Store.YES: 存儲字段值(未分詞前的字段值)
//Field.Store.NO: 不存儲,存儲與索引沒有關系
//Field.Store.COMPRESS: 壓縮存儲,用于長文本或二進制,但性能受損

               //Field.Index ANALYZED;                    分詞建索引  
               //Field.Index ANALYZED_NO_NORMS;           分詞壓縮建索引  
               //Field.Index NO;  
               //Field.Index NOT_ANALYZED;                不分詞建索引  
               //Field.Index NOT_ANALYZED_NO_NORMS;       不分詞壓縮建索引  

               //Field.TermVector NO;                        
               //Field.TermVector WITH_OFFSETS;           存儲偏移量  
               //Field.TermVector WITH_POSITIONS;         存儲位置  
               //Field.TermVector WITH_POSITIONS_OFFSETS; 存儲位置和偏移量  
               //Field.TermVector YES;                    為每個文檔(Document)存儲該字段的TermVector  

               d.Add(new Field("Id", o.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));  
               d.Add(new Field("Title", o.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));  
               d.Add(new Field("Message", o.Message, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));  
               iw.AddDocument(d);  
           }  
           iw.Optimize();  
           //釋放文件  
           iw.Close();  
           dir.Close();  
       }  
       #endregion  </pre><pre class="brush:c#; toolbar: true; auto-links: false;">    #region 查詢  
        /// <summary>  
        /// 查詢  
        /// </summary>  
        /// <param name="str">要查詢的關鍵字</param>  
        /// <param name="index">頁索引(從1開始)</param>  
        /// <param name="count">每頁顯示數據條數</param>  
        /// <returns></returns>  
        private List<LuceneModel> Search(string str, int index = 1, int count = 3)  
        {  
            List<LuceneModel> list = null;  
            //索引庫文件夾  
            FSDirectory dir = FSDirectory.Open(new DirectoryInfo(path), new NativeFSLockFactory());  
            //是否存在索引庫  
            bool has = IndexReader.IndexExists(dir);  
            //不存在則創建  
            if (!has)  
            {  
                //創建索引庫  
                CreateIndexData();  
            }  
            //索引庫讀對象  
            IndexReader ir = IndexReader.Open(dir, true);  
            //索引庫查詢對象  
            IndexSearcher searcher = new IndexSearcher(ir);  
            //搜索條件  
            PhraseQuery query = new PhraseQuery();  
            //分詞  
            {  
                Segment segment = new Segment();  
                ICollection<WordInfo> words = segment.DoSegment(str);  
                foreach (var o in words)  
                {  
                    query.Add(new Term("Message", o.ToString()));  
                }  
            }  
            //指定關鍵詞相隔最大距離  
            query.SetSlop(100);  
            //盛放查詢結果的容器  
            TopScoreDocCollector collector = TopScoreDocCollector.create(index * count, false);  
            //根據query查詢條件進行查詢  
            searcher.Search(query, null, collector);  
            ScoreDoc[] docs = collector.TopDocs(count * (index - 1), count * (index - 1) + count).scoreDocs;  
            if (docs.Length > 0)  
            {  
                list = new List<LuceneModel>();  
                foreach (var o in docs)  
                {  
                    Document document = searcher.Doc(o.doc);  
                    LuceneModel m = new LuceneModel();  
                    m.Id = Convert.ToInt32(document.Get("Id"));  
                    m.Title = document.Get("Title");  
                    m.Message = document.Get("Message");  
                    list.Add(m);  
                }  
            }  
            //釋放文件  
            ir.Close();  
            searcher.Close();  
            dir.Close();  
            return list;  
        }  
        #endregion  </pre>來自:http://blog.csdn.net/pigkeli/article/details/34848141<br />
 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!