package com.diagnose.util; import java.util.*; public class StringMatcher { private final int GRAM_SIZE = 1; private Map> indexMap = new HashMap<>(); private List dataList; public StringMatcher(List dataList) { this.dataList = dataList; buildIndex(); } private void buildIndex() { // 为了平衡性能和内存,可以考虑只索引n-gram for (int i = 0; i < dataList.size(); i++) { String s = dataList.get(i); // 生成所有可能的子串或n-gram for (int j = 0; j < s.length() - GRAM_SIZE + 1; j++) { String gram = s.substring(j, j + GRAM_SIZE); // n-gram indexMap.computeIfAbsent(gram, k -> new HashSet<>()).add(i); } } } public List search(String query, int limit) { // 如果查询字符串长度小于GRAM_SIZE,使用简单遍历 if (query.length() < GRAM_SIZE) { return simpleSearch(query, limit); } Set candidates = null; // 使用查询的n-gram找候选集 for (int i = 0; i <= query.length() - GRAM_SIZE; i++) { String gram = query.substring(i, i + GRAM_SIZE); Set indices = indexMap.getOrDefault(gram, Collections.emptySet()); if (candidates == null) { candidates = new HashSet<>(indices); } else { candidates.retainAll(indices); } if (candidates.isEmpty()) { return Collections.emptyList(); } } // 验证候选集 List result = new ArrayList<>(); for (Integer idx : candidates) { if (dataList.get(idx).contains(query)) { result.add(dataList.get(idx)); if (result.size() >= limit) { break; } } } return result; } private List simpleSearch(String query, int limit) { // 简单遍历法 List result = new ArrayList<>(); for (String s : dataList) { if (s.contains(query)) { result.add(s); if (result.size() >= limit) { break; } } } return result; } }