472. Concatenated Words

Top Down

class Solution:
    def findAllConcatenatedWordsInADict(self, words: List[str]) -> List[str]:
        """tc O(N*W^2*2^W)  sc O(N*W^2*2^W)?? O(N*W^2)   W:  avg len of word
        main idea: dfs + memo 
        1. create a global memo, put all words into set for O(1) look up
        2. loop through all the words and check if it can be concatenated by at least two words
        2.1. assume any point of current word as split point, break into prefix and suffix
        2.2. check (1)if prefix and suffix are composed of two words in the word set  (2) check if prefix is a single word and remaining suffix composed of more than one word in word set  =>  if any of the two condition is meet, we consider current word can be concatenated,  otherwise not.  Cache the result into global memo for future pruning  
        
        """
        memo = {}
        words_set = set(words)
        def dfs(s, memo):
            if s in memo:
                return memo[s]
            memo[s] = False 
            for i in range(1,len(s)): # start from 1 because we want to skip cases where s[:0] == ""  
                prefix = s[:i]
                suffix = s[i:]
                if prefix in words_set and suffix in words_set:
                    memo[s] = True 
                    break 
                if prefix in words_set and dfs(s[i:],memo):
                    memo[s] = True
                    break
                # there is no need to check suffix since prefix already covered 
            return memo[s]
        res = []
        for w in words_set:
            if dfs(w,memo):
                res.append(w)
        return res 

Optimization: Bottom up —-

class Solution:
    def findAllConcatenatedWordsInADict(self, words: List[str]) -> List[str]:
        """tc O(N*W^3)  sc O(N+ max(W))   W:  avg len of word
        1. sort by length,  
        2. dp to find if current s can be split into more than one possible, put it into result
            2.1 find split point start from  max(1, min_len of word) to n +1 
            2.2 if we can find substring w[j:i] in the seen set and word w[:j] is valid, means we found the word can be concatenated  
        3. add current into seen 
        """
        # dp[i]: word[:i] can be concatenated with words
        words.sort(key=len)
        min_len = min(1,len(words[0]))
        memo = {}
        words_set = set()
        def bottom_up(s):
            if not s : return False # edge case:  s == "" 
            n = len(s)
            dp = [False]*(n+1)
            dp[0] = True 
            for i in range(min_len,n+1): # skip cases suffix length smaller than min word length
                for j in range(i):
                    if dp[j] and s[j:i] in words_set: # tc O(W)  => to optimize, use Trie, see solution below
                        dp[i] = True
                        break
            words_set.add(s)
            return dp[-1]
            
        res = []
        for w in words:
            if bottom_up(w):
                res.append(w)
        return res 

Trie + DP solution

class TrieNode :
    def __init__(self):
        self.children = {}
        self.isEnd = False


class Trie:
    def __init__(self):
        self.root = TrieNode()
    
    def add(self, word):
        cur = self.root
        for c in word:
            if c not in cur.children:
                cur.children[c] = TrieNode()
            cur = cur.children[c]
        cur.isEnd = True
        cur.word = word
    
    def is_concatenated(self, word):
        n = len(word)
        dp = [-1] * (n+1) 
        dp[n] = 0
        cur = self.root 
        for i in range(n-1,-1,-1):
            cur = self.root 
            for j in range(i,n):
                if not word[j] :  continue # for "" string
                if word[j] in cur.children:
                    cur = cur.children[word[j]]
                else:
                    break
                if cur.isEnd and dp[j+1] != -1: # current index has reached to a word's end and its neighbour substring starting with j + 1 is also a word  
                    dp[i] = max(dp[i],dp[j+1] +1) # avoid overwirting 
        return dp[0] >= 2
         
    
class Solution:
    def findAllConcatenatedWordsInADict(self, words: List[str]) -> List[str]:
        """ tc O(N*W^2) sc O(N*W)
        1. create trie from words 
        2. from back to front, check each word if it is concatenated 
        """
        root = Trie()
        for w in words:
            root.add(w)
            
        res = []
        for w in words:
            if root.is_concatenated(w):
                res.append(w)
                
        return res