187. Repeated DNA Sequences 发表于 2022-01-29 Trie1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283class Solution { private static class Trie { private final TrieNode root; public Trie() { this.root = new TrieNode(); } } private static class TrieNode { private boolean end; private final TrieNode[] children; public TrieNode() { this.end = false; this.children = new TrieNode[4]; } public TrieNode getChild(char c) { return children[charToIndex(c)]; } private int charToIndex(char c) { switch (c) { case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; default: throw new IllegalArgumentException(); } } public boolean isEnd() { return end; } public void setEnd(boolean end) { this.end = end; } public TrieNode addChild(char c) { TrieNode child = new TrieNode(); children[charToIndex(c)] = child; return child; } } public List<String> findRepeatedDnaSequences(String s) { Set<String> set = new HashSet<>(); // 注意使用 Trie 树时可能多次到达树中节点,故用 set 去重 Trie trie = new Trie(); for (int i = 0; i < s.length() - 9; i++) { addCharToTrie(s, i, trie, set); } return new ArrayList<>(set); } private void addCharToTrie(String s, int i, Trie trie, Set<String> set) { TrieNode node = trie.root; for (int j = i; j < i + 10; j++) { char c = s.charAt(j); TrieNode childNode = node.getChild(c); if (childNode != null) { node = childNode; } else { node = node.addChild(c); } } if (node.isEnd()) { set.add(s.substring(i, i + 10)); } else { node.setEnd(true); } }} Hash12345678910111213141516171819202122232425262728293031323334353637383940414243class Solution { private int hash(char c) { switch (c) { case 'A': return 0b00; case 'C': return 0b01; case 'G': return 0b10; case 'T': return 0b11; default: throw new IllegalArgumentException(); } } public List<String> findRepeatedDnaSequences(String s) { List<String> list = new ArrayList<>(); if (s.length() <= 10) { return list; } int hash = 0; for (int i = 0; i < 10; i++) { hash = (hash << 2) | hash(s.charAt(i)); } Map<Integer, Integer> hashToCountMap = new HashMap<>(); hashToCountMap.put(hash, 1); for (int i = 10; i < s.length(); i++) { hash = ((hash << 2) | hash(s.charAt(i))) & 0b11111111111111111111; int count = hashToCountMap.getOrDefault(hash, 0); if (count == 1) { list.add(s.substring(i - 9, i + 1)); } hashToCountMap.put(hash, count + 1); } return list; }} Rolling Hash123456789101112131415161718192021222324252627282930class Solution { private static final int PRIME = Boolean.hashCode(true); public List<String> findRepeatedDnaSequences(String s) { List<String> answerList = new ArrayList<>(); int[] prefixHashArray = new int[s.length() + 1]; // 前 i 个字符组成的字符串对应的 hash 值数组 int[] factorArray = new int[s.length() + 1]; factorArray[0] = 1; // 计算出截止至下标为 i 的字符(含)的前缀字符串哈希 for (int i = 0; i < s.length(); i++) { prefixHashArray[i + 1] = prefixHashArray[i] * PRIME + s.charAt(i); factorArray[i + 1] = factorArray[i] * PRIME; } Map<Integer, Integer> hashToCountMap = new HashMap<>(); for (int j = 9; j < s.length(); j++) { int i = j - 9; int hash = prefixHashArray[j + 1] - prefixHashArray[i] * factorArray[10]; int count = hashToCountMap.getOrDefault(hash, 0); if (count == 1) { answerList.add(s.substring(i, j + 1)); } hashToCountMap.put(hash, count + 1); } return answerList; }} Rolling Hash123456789101112131415161718192021222324252627282930313233class Solution { private static final long PRIME = Boolean.hashCode(true); public List<String> findRepeatedDnaSequences(String s) { if (s.length() <= 10) { return Collections.emptyList(); } List<String> seqList = new ArrayList<>(); Map<Long, Integer> hashToFeqMap = new HashMap<>(); long hash = 0; long product = 1; // target: PRIME ^ 9 for (int i = 0; i < 10; i++) { if (i != 0) { product *= PRIME; } hash = hash * PRIME + s.charAt(i); } hashToFeqMap.put(hash, 1); for (int i = 10; i < s.length(); i++) { hash -= s.charAt(i - 10) * product; hash = hash * PRIME + s.charAt(i); int feq = hashToFeqMap.getOrDefault(hash, 0); if (feq == 1) { seqList.add(s.substring(i - 9, i + 1)); } hashToFeqMap.put(hash, feq + 1); } return seqList; }} Reference187. Repeated DNA Sequences