elasticsearch整合分詞、創建索引、搜索例子

hwl0420 8年前發布 | 9K 次閱讀 Java ElasticSearch
elasticsearch整合分詞、創建索引、搜索例子,elasticsearch版本為1.0,索引數據從數據表中動態讀取生成,有關鍵字高亮效果,查詢分頁

源碼地址參考:http://git.oschina.net/kangjie1209/elasticsearch1.0

 

InitES.java  

package com.elasticsearch.config;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;

/**
 * 初始化連接es服務端,這里相當于dao層..
 * 
 * @author:jackkang
 * 
 *                  2013-1-12 下午11:27:37
 */
public class InitES {

    static Log log = LogFactory.getLog(InitES.class);
    /**
     * 靜態,單例...
     */
    private static TransportClient client;

    public static TransportClient initESClient() {
        try {
            if (client == null) {
                // 配置你的es,現在這里只配置了集群的名,默認是elasticsearch,跟服務器的相同
                Settings settings = ImmutableSettings
                        .settingsBuilder()
                        .put("cluster.name", "elasticsearch")
                        .put("discovery.type", "zen")//發現集群方式
                        .put("discovery.zen.minimum_master_nodes", 2)//最少有2個master存在
                        .put("discovery.zen.ping_timeout", "200ms")//集群ping時間,太小可能會因為網絡通信而導致不能發現集群
                        .put("discovery.initial_state_timeout", "500ms")
                        .put("gateway.type", "local")//(fs, none, local)
                        .put("index.number_of_shards", 1)
                        .put("action.auto_create_index", false)//配置是否自動創建索引
                        .put("cluster.routing.schedule", "50ms")//發現新節點時間 

                        .build();
                // 從屬性文件中獲取搜索服務器相對域地址
                String transportAddresses = Config.getProperty(
                        "transportAddresses", "");
                // 集群地址配置
                List<InetSocketTransportAddress> list = new ArrayList<InetSocketTransportAddress>();
                if (StringUtils.isNotEmpty(transportAddresses)) {
                    String[] strArr = transportAddresses.split(",");
                    for (String str : strArr) {
                        String[] addressAndPort = str.split(":");
                        String address = addressAndPort[0];
                        int port = Integer.valueOf(addressAndPort[1]);

                        InetSocketTransportAddress inetSocketTransportAddress = new InetSocketTransportAddress(
                                address, port);
                        list.add(inetSocketTransportAddress);
                    }
                }
                // 這里可以同時連接集群的服務器,可以多個,并且連接服務是可訪問的
                InetSocketTransportAddress addressList[] = (InetSocketTransportAddress[]) list
                        .toArray(new InetSocketTransportAddress[list.size()]);
                // Object addressList[]=(Object [])list.toArray();

                client = new TransportClient(settings)
                        .addTransportAddresses(addressList);
                // 這里可以同時連接集群的服務器,可以多個,并且連接服務是可訪問的 192.168.1.102
                // client = new TransportClient(settings).addTransportAddresses(
                // new InetSocketTransportAddress("192.168.1.103", 9300));
                //
                // Client client = new TransportClient()
                // .addTransportAddress(new
                // InetSocketTransportAddress("192.168.0.149", 9300))
                // .addTransportAddress(new
                // InetSocketTransportAddress("192.168.0.162", 9300));

                // 改變shards數目:
                /*client.admin().indices().prepareUpdateSettings("test")
                .setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_replicas", 2)).execute().actionGet();*/
            }
        } catch (Exception e) {
            // if (log.isDebugEnabled()) {
            // log.debug("方法AppCommentAction-deleteAppComment,參數信息:commentid" );
            // }
            log.error("獲取客戶端對象異常:" + e.getMessage());
        }
        return client;
    }


    public static void closeESClient() {
        if (client != null) {
            client.close();
        }
    }
}

SearchAction.java       

package com.elasticsearch.action;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;

import com.elasticsearch.config.ElasticsearchUtil;
import com.elasticsearch.pojo.Pager;
import com.opensymphony.xwork2.ActionSupport;

public class SearchAction extends ActionSupport {
    private static final long serialVersionUID = 1L;
    /** 關鍵字 **/
    private String wd;
    /** 消耗時間 **/
    private double spendTime;
    /** 查詢結果集對象 **/
    private List<Map<String, Object>> pageList = new ArrayList<Map<String, Object>>();
    /** 分頁對象 **/
    private Pager pager;
    /** 總記錄數 使用靜態變量的方式緩存 **/
    private Long total;

    private SearchResponse response;

    /**
     * 條件檢索action
     * 
     * @throws MalformedURLException
     * @throws SolrServerException
     * @throws UnsupportedEncodingException
     **/
    public String search() throws MalformedURLException,
            UnsupportedEncodingException {
        /** 檢索開始時間 **/
        long startTime = System.currentTimeMillis();

        /** 獲取頁面封裝好的分頁對象 **/
        if (pager == null) {
            pager = new Pager();
            pager.setMaxPageItems(10);
        }

        wd = new String(wd.getBytes("ISO-8859-1"), "UTF-8"); // 解決亂碼

        pager.setDefaultMaxPageItems(1);
        /**高亮字段**/
        String[] highFields=new String[]{"content","title"};

        response = ElasticsearchUtil.searcher("medcl", "news",
                pager.getOffset(), pager.getMaxPageItems(), wd,highFields);

        /** 總記錄數 **/
        total = response.getHits().totalHits();

        System.out.println("命中總數:" + total);
        SearchHits searchHits = response.getHits();
        SearchHit[] hits = searchHits.getHits();
        for (int i = 0; i < hits.length; i++) {
            Map<String, Object> map = new HashMap<String, Object>();
            SearchHit hit = hits[i];
            String id=hit.getId();

            String content = ElasticsearchUtil.getHighlightFields(hit,"content");
            String title = ElasticsearchUtil.getHighlightFields(hit,"title");

            map.put("id", hit.getSource().get("id"));
            map.put("content", content);
            map.put("title", title);
            map.put("create_time", hit.getSource().get("create_time"));
            map.put("links", hit.getSource().get("link"));

            pageList.add(map);
        }

        /** 檢索完成時間 **/
        long endTime = System.currentTimeMillis();
        /** 檢索花費時間 **/
        //spendTime = (double) (endTime - startTime) / 1000;


        Calendar c = Calendar.getInstance();  
        c.setTimeInMillis(endTime - startTime);  
        spendTime =  c.get(Calendar.MILLISECOND);

        return SUCCESS;
    }

    public static String Html2Text(String inputString) {
        String htmlStr = inputString; // 含html標簽的字符串
        String textStr = "";
        java.util.regex.Pattern p_script;
        java.util.regex.Matcher m_script;
        java.util.regex.Pattern p_style;
        java.util.regex.Matcher m_style;
        java.util.regex.Pattern p_html;
        java.util.regex.Matcher m_html;

        try {
            String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; // 定義script的正則表達式{或<script[^>]*?>[\\s\\S]*?<\\/script>
                                                                                                        // }
            String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; // 定義style的正則表達式{或<style[^>]*?>[\\s\\S]*?<\\/style>
                                                                                                    // }
            String regEx_html = "<[^>]+>"; // 定義HTML標簽的正則表達式

            p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
            m_script = p_script.matcher(htmlStr);
            htmlStr = m_script.replaceAll(""); // 過濾script標簽

            p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
            m_style = p_style.matcher(htmlStr);
            htmlStr = m_style.replaceAll(""); // 過濾style標簽

            p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
            m_html = p_html.matcher(htmlStr);
            htmlStr = m_html.replaceAll(""); // 過濾html標簽

            textStr = htmlStr;

        } catch (Exception e) {
            System.err.println("Html2Text: " + e.getMessage());
        }

        return textStr;// 返回文本字符串
    }

    public String getWd() {
        return wd;
    }

    public void setWd(String wd) {
        this.wd = wd;
    }

    public double getSpendTime() {
        return spendTime;
    }

    public void setSpendTime(double spendTime) {
        this.spendTime = spendTime;
    }

    public List<Map<String, Object>> getPageList() {
        return pageList;
    }

    public void setPageList(List<Map<String, Object>> pageList) {
        this.pageList = pageList;
    }

    public Pager getPager() {
        return pager;
    }

    public void setPager(Pager pager) {
        this.pager = pager;
    }

    public Long getTotal() {
        return total;
    }

    public void setTotal(Long total) {
        this.total = total;
    }
}

114219_1e0794b8_22473.png    

144252_69cfbde5_22473.png    

[圖片] 144317_68b7e7cc_22473.png    

 本文由用戶 hwl0420 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!