Java實現敏感詞檢測的代碼
[Java]代碼
import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; public class BadWordsUtil { public static final int WORDS_MAX_LENGTH = 10; public static final String BAD_WORDS_LIB_FILE_NAME = "badWords.txt"; //敏感詞列表 public static Map[] badWordsList = null; //敏感詞索引 public static Map<String, Integer> wordIndex = new HashMap<String, Integer>(); /* * 初始化敏感詞庫 */ public static void initbadWordsList() throws IOException { if (badWordsList == null) { badWordsList = new Map[WORDS_MAX_LENGTH]; for (int i = 0; i < badWordsList.length; i++) { badWordsList[i] = new HashMap<String, String>(); } } //敏感詞詞庫所在目錄,這里為txt文本,一個敏感詞一行 String path = BadWordsUtil.class.getClassLoader() .getResource(BAD_WORDS_LIB_FILE_NAME) .getPath(); System.out.println(path); List<String> words = FileUtils.readLines(new File(path),"UTF-8"); for (String w : words) { if (StringUtils.isNotBlank(w)) { //將敏感詞按長度存入map badWordsList[w.length()].put(w.toLowerCase(), ""); Integer index = wordIndex.get(w.substring(0, 1)); //生成敏感詞索引,存入map if (index == null) { index = 0; } int x = (int) Math.pow(2, w.length()); index = (index | x); wordIndex.put(w.substring(0, 1), index); } } } /** * 檢索敏感詞 * @param content * @return */ public static List<String> searchBanWords(String content) { if (badWordsList == null) { try { initbadWordsList(); } catch (IOException e) { throw new RuntimeException(e); } } List<String> result = new ArrayList<String>(); for (int i = 0; i < content.length(); i++) { Integer index = wordIndex.get(content.substring(i, i + 1)); int p = 0; while ((index != null) && (index > 0)) { p++; index = index >> 1; String sub = ""; if ((i + p) < (content.length() - 1)) { sub = content.substring(i, i + p); } else { sub = content.substring(i); } if (((index % 2) == 1) && badWordsList[p].containsKey(sub)) { result.add(content.substring(i, i + p)); } } } return result; } public static void main(String[] args) throws IOException { String content = "含有敏感詞的測試"; BadWordsUtil.initbadWordsList(); List<String> badWordList = BadWordsUtil.searchBanWords(content); if (badWordList.size() == 0){ System.out.println("沒有找到敏感詞!"); }else{ for(String s : badWordList){ System.out.println("找到敏感詞:"+s); } } } }
本文由用戶 cenmin 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!