Solution1: hash set + substring
"""
step1: create 2 sets for seen and repeated 10digit substring.
step2: loop through whole string, until last 9 digit
step3: check each substing start with i, end with i+10, if it has been seen before
step4: if not seen, update seen, if yes, add to repeat set
step5: return repeat set in list format
"""
class Solution:
def findRepeatedDnaSequences(self, s: str) -> List[str]:
# time O(L(N-L)) ~ O(N) space O(L(N-L)) ~ O(N)
seen = set()
repeat = set()
for i in range(len(s)-9):
ten = s[i:i+10]
if ten not in seen:
seen.add(ten)
else:
repeat.add(ten)
return list(repeat)
Soution 2: Bit Manipulation
class Solution:
def findRepeatedDnaSequences(self, s: str) -> List[str]:
# time O(4N) ~ O(N) space O(4N)
seen = dict()
d = {'A':0,'C':1,'G':2,'T':3}
res = []
v = 0
for i in range(len(s)):
v <<= 2
v |= d[s[i]]
v &= 0XFFFFF
if i < 9:
continue
if v not in seen:
seen[v] = 1
elif seen[v] == 1:
res.append(s[i-9:i+1])
seen[v] = 2
return res