forked from keon/algorithms
-
Notifications
You must be signed in to change notification settings - Fork 1
/
breaking_bad.py
113 lines (92 loc) · 3.42 KB
/
breaking_bad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
Given an api which returns an array of words and an array of symbols, display
the word with their matched symbol surrounded by square brackets.
If the word string matches more than one symbol, then choose the one with
longest length. (ex. 'Microsoft' matches 'i' and 'cro'):
Example:
Words array: ['Amazon', 'Microsoft', 'Google']
Symbols: ['i', 'Am', 'cro', 'Na', 'le', 'abc']
Output:
[Am]azon, Mi[cro]soft, Goog[le]
My solution(Wrong):
(I sorted the symbols array in descending order of length and ran loop over
words array to find a symbol match(using indexOf in javascript) which
worked. But I didn't make it through the interview, I am guessing my solution
was O(n^2) and they expected an efficient algorithm.
output:
['[Am]azon', 'Mi[cro]soft', 'Goog[le]', 'Amaz[o]n', 'Micr[o]s[o]ft', 'G[o][o]gle']
"""
words = ['Amazon', 'Microsoft', 'Google']
symbols = ['i', 'Am', 'cro', 'le', 'abc']
def match_symbol(words, symbols):
import re
combined = []
for s in symbols:
for c in words:
r = re.search(s, c)
if r:
combined.append(re.sub(s, "[{}]".format(s), c))
return combined
print(match_symbol(words, symbols))
"""
O(n * max(log(n), l)) time complexity
n = len(words), l = len of a word
"""
def match_symbol_1(words, symbols):
res = []
# reversely sort the symbols according to their lengths.
symbols = sorted(symbols, key = lambda _: len(_), reverse = True)
for word in words:
for symbol in symbols:
word_replaced = ''
# once match, append the `word_replaced` to res, process next word
if word.find(symbol) != -1:
word_replaced = word.replace(symbol, '[' + symbol + ']')
res.append(word_replaced)
break
# if this word matches no symbol, append it.
if word_replaced == '':
res.append(word)
return res
words = ['Amazon', 'Microsoft', 'Google', 'Facebook']
symbols = ['i', 'Am', 'cro', 'Na', 'le', 'abc']
print(match_symbol_1(words, symbols))
# ['[Am]azon', 'Mi[cro]soft', 'Goog[le]', 'Facebook']
"""
Another approach is to use a Trie for the dictionary (the symbols), and then match
brute force. The complexity will depend on the dictionary;
if all are suffixes of the other, it will be n*m
(where m is the size of the dictionary). For example, in Python:
"""
from functools import reduce
class TrieNode:
def __init__(self):
self.c = dict()
self.sym = None
def bracket(words, symbols):
root = TrieNode()
for s in symbols:
t = root
for char in s:
if char not in t.c:
t.c[char] = TrieNode()
t = t.c[char]
t.sym = s
result = dict()
for word in words:
i = 0
symlist = list()
while i < len(word):
j, t = i, root
while j < len(word) and word[j] in t.c:
t = t.c[word[j]]
if t.sym is not None:
symlist.append((j+1-len(t.sym), j+1, t.sym))
j += 1
i += 1
if len(symlist) > 0:
sym = reduce(lambda x, y: x if x[1]-x[0] >= y[1]-y[0] else y, symlist)
result[word] = "{}[{}]{}".format(word[:sym[0]], sym[2], word[sym[1]:])
return tuple(word if word not in result else result[word] for word in words)
bracket(['amazon', 'microsoft', 'google'], ['i', 'am', 'cro', 'na', 'le', 'abc'])
>>> ('[am]azon', 'mi[cro]soft', 'goog[le]')