Lucene3.6 之 查詢篇
1、BooleanQuery
lucene3.6中BooleanQuery 實現與或的復合搜索
BooleanClause用于表示布爾查詢子句關系的類,包括:BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。必須包含,不能包含,可以包含三種.有以下6種組合:
1.MUST和MUST:取得連個查詢子句的交集。
2.MUST和MUST_NOT:表示查詢結果中不能包含MUST_NOT所對應得查詢子句的檢索結果。
3.SHOULD與MUST_NOT:連用時,功能同MUST和MUST_NOT。
4.SHOULD與MUST連用時,結果為MUST子句的檢索結果,但是SHOULD可影響排序。
5.SHOULD與SHOULD:表示“或”關系,最終檢索結果為所有檢索子句的并集。
6.MUST_NOT和MUST_NOT:無意義,檢索無結果。
示例代碼
public static void query(String path,String keyword,int size){
try {
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
Analyzer analyzer = new IKAnalyzer();
// Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String[] fieldName = { "title", "category" }; // (在多個Filed中搜索)
QueryParser queryParser = new MultiFieldQueryParser(
Version.LUCENE_36, fieldName, analyzer);
Query q1 = queryParser.parse(keyword);
QueryParser parser = new QueryParser(Version.LUCENE_36, "author", analyzer);
Query q2 = parser.parse("周偉明");
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(q1, BooleanClause.Occur.MUST);
boolQuery.add(q2,BooleanClause.Occur.MUST);
ScoreDoc[] docs = searcher.search(boolQuery, null, size).scoreDocs;
for (int i = 0; docs != null && i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
Book book = new Book();
book.setId(id);
book.setTitle(title);
book.setAuthor(author);
book.setPublishTime(publishTime);
book.setSource(source);
book.setCategory(category);
book.setReputation(reputation);
System.out.println(book);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}</pre><br />
2、TermQuery
詞條查詢,通過對某個詞條的指定,實現檢索索引中存在該詞條的所有文檔。
@Test
public void testTermQuery(){
try {
String path = "D://LuceneEx/day02";
String keyword = "android";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("title", keyword));
TopDocs tops = searcher.search(query, null, 50);
int count = tops.totalHits;
System.out.println("totalHits=" + count);
ScoreDoc[] docs = tops.scoreDocs;
for (int i = 0; i < docs.length; i++) {
Document doc = searcher.doc(docs[i].doc);
float score = docs[i].score;
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id + "\t" + title + "\t" + author + "\t"
+ publishTime + "\t" + source + "\t" + category + "\t"
+ reputation+"\t"+score);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre><br />
3、TermRangeQuery
范圍查詢,這種范圍可以是日期,時間,數字,大小等等。可以使用"context:[a to b]"(包含邊界)或者"content:{a to b}"(不包含邊界) 查詢表達式
示例代碼
@Test
public void testTermRangeQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "publishTime";
//查詢出版日期在 "2011-04" 到 "2011-07" 之間的書籍
TermRangeQuery tq = new TermRangeQuery(fieldName, "2011-04", "2011-07", false, true);
TopDocs tops = searcher.search(tq, null, 10);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id+" "+title+" "+author+" "+publishTime+" "+source);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre> <p></p>
4、PrefixQuery
搜索以指定字符串開頭的項的文檔。當查詢表達式中的短語以"*"結尾時,QueryParser的parse函數會為查詢表達式項創建一個PrefixQuery對象。
示例代碼
@Test
public void testPrefixQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "source";
Term prefix = new Term(fieldName, "清華大學");
PrefixQuery preq = new PrefixQuery(prefix );
TopDocs tops = searcher.search(preq, null, 10);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id+" "+title+" "+author+" "+publishTime+" "+source);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre><br />
5、PhraseQuery
短語查詢,默認為完全匹配,但可以指定坡度(Slop,默認為0)改變范圍。比如Slop=1,檢索短語為“電臺”,那么在“電臺”中間有一個字的也可以被查找出來,比如“電視臺”。 查詢表達式可以為“電 臺 ~1”
示例代碼
@Test
public void testPhraseQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "title";
PhraseQuery query = new PhraseQuery();
query.add(new Term(fieldName,"Lucene"));
query.add(new Term(fieldName,"入門"));
// query.setSlop(1);
TopDocs tops = searcher.search(query, null, 50);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id+" "+title+" "+author+" "+publishTime+" "+source);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre><br />
6、FuzzyQuery
模糊查詢使用的匹配算法是levensh-itein算法。此算法在比較兩個字符串時,將動作分為3種:加一個字母(Insert),刪一個字母(Delete),改變一個字母(Substitute)。 編輯距離能夠影響結果的得分,編輯距離越小得分越高.查詢表達式為"fuzzy~",使用~來表示模糊查詢。
示例代碼
@Test
public void testFuzzyQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "category";
Term term = new Term(fieldName, "云計算");
FuzzyQuery query = new FuzzyQuery(term, 0.1f);
// FuzzyQuery query = new FuzzyQuery(term, 0.1f,1);
TopDocs tops = searcher.search(query, null, 50);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id+" "+title+" "+author+" "+publishTime+" "+source+" "+category);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre><br />
7、WildcardQuery
通配符查詢,“*”號表示0到多個字符,“?”表示單個字符。 最好不要用通配符為首,否則會遍歷所有索引項
@Test
public void testWildcardQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "title";
Term term = new Term(fieldName, "lucene*");
WildcardQuery query = new WildcardQuery(term);
TopDocs tops = searcher.search(query, null, 100);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
String category = doc.get("category");
float reputation = Float.parseFloat(doc.get("reputation"));
System.out.println(id+" "+title+" "+author+" "+publishTime+" "+source+" "+category);
}
reader.close();
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}</pre><br />
8、SpanQuery
SpanQuery:跨度查詢。此類為抽象類。
SpanTermQuery:檢索效果完全同TermQuery,但內部會記錄一些位置信息,供SpanQuery的其它API使用,是其它屬于SpanQuery的Query的基礎。
SpanFirstQuery:查找方式為從Field的內容起始位置開始,在一個固定的寬度內查找所指定的詞條。
SpanNearQuery:功能類似PharaseQuery,SpanNearQuery查找所匹配的不一定是短語,還有可能是另一個SpanQuery的查詢結果作為整體考慮,進行嵌套查詢。
SpanOrQuery:把所有SpanQuery查詢結果綜合起來,作為檢索結果。
SpanNotQuery:從第一個SpanQuery查詢結果中,去掉第二個SpanQuery查詢結果,作為檢索結果。
示例代碼
@Test
public void testSpanQuery(){
try {
String path = "D://LuceneEx/day01";
File file = new File(path);
Directory mdDirectory = FSDirectory.open(file);
IndexReader reader = IndexReader.open(mdDirectory);
IndexSearcher searcher = new IndexSearcher(reader);
String fieldName = "title";
Term t1=new Term(fieldName,"權威");
Term t2=new Term(fieldName,"lucene");
Term t3=new Term(fieldName,"搜索");
Term t4=new Term(fieldName,"出版社");
SpanTermQuery q1=new SpanTermQuery(t1);
SpanTermQuery q2=new SpanTermQuery(t2);
SpanTermQuery q3=new SpanTermQuery(t3);
SpanTermQuery q4=new SpanTermQuery(t4);
SpanNearQuery query1=new SpanNearQuery(new SpanQuery[]{q1,q2},1,false);
SpanNearQuery query2=new SpanNearQuery(new SpanQuery[]{q3,q4},3,false);
SpanNotQuery query = new SpanNotQuery(query1, query2);
// Term t =new Term("content","mary");
// SpanTermQuery people = new SpanTermQuery(t);
// SpanFirstQuery query = new SpanFirstQuery(people,3);//3是寬度
TopDocs tops = searcher.search(query, null, 100);
int count = tops.totalHits;
System.out.println("totalHits="+count);
ScoreDoc[] docs = tops.scoreDocs;
for(int i=0;i<docs.length;i++){
Document doc = searcher.doc(docs[i].doc);
int id = Integer.parseInt(doc.get("id"));
String title = doc.get("title");
String author = doc.get("author");
String publishTime = doc.get("publishTime");
String source = doc.get("source");
</pre> 來自:http://blog.csdn.net/fx_sky/article/details/8543146