-
Notifications
You must be signed in to change notification settings - Fork 0
/
multiprocessing_test.py
84 lines (59 loc) · 2.04 KB
/
multiprocessing_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pymysql
import multiprocessing
import os
PROJECT_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATE_DIR = os.path.join(PROJECT_HOME, 'data', 'keyword')
SELECT_QUERY = """
SELECT 'ID' AS ID, 'CONTENT' AS CONTENT
FROM DUAL
"""
def fetch_and_write(select_query, file_name):
print(f'fetching and writing starting={select_query},{file_name}')
conn = pymysql.connect(
host='localhost'
, user='root'
, password='system'
, db='mobi_chat'
, charset='utf8'
)
curs = conn.cursor(pymysql.cursors.DictCursor)
curs.execute(select_query)
#print(type(curs))
with open(file_name, 'w', encoding='utf-8') as file:
row = curs.fetchone()
for i in range(1000):
doc_id = row['ID']
doc_content = row['CONTENT']
index = i
line_to_write = f'{file_name},{index}\n'
file.write(line_to_write)
return
def make_file_ranges(start, end, gap):
file_ranges = []
#size = end - start
for i in range(start, end, gap):
file_ranges.append(i)
last = file_ranges[-1]
file_ranges.append(last + gap)
return file_ranges
def make_file_names(file_dir, prefix, file_ranges):
file_names = []
for i in range(0, len(file_ranges)-1):
start = file_ranges[i]
end = file_ranges[i+1]
file_name = '{}_{}_{}.keyword'.format(prefix, start, end)
file_path = os.path.join(file_dir, file_name)
file_names.append((start, end, file_path))
return file_names
def extract_keyword(start, end, file_path):
fetch_and_write()
return
if __name__ == '__main__':
#print(PROJECT_HOME)
print('fetching and writing start')
file_ranges = make_file_ranges(0, 10000, 1000)
file_names = make_file_names(os.path.join(PROJECT_HOME, 'data', 'keyword', 'edims'), 'edims', file_ranges)
for start, end, file_name in file_names:
t = multiprocessing.Process(target=fetch_and_write, args=(SELECT_QUERY, file_name))
t.start()
t.join()