-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataloader.py
42 lines (35 loc) · 1.44 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import codecs
import pandas as pd
import json
class DataLoader:
"""
loading different datasets!
"""
def __init__(self):
pass
def read_texts(self,path, split=False):
"""
reading plain text document,
split is for spliting text into seprated lines(default is False)
"""
if split:
return open(path,"r").read().split('\n')
else:
return open(path,"r").read()
def load_json(self, path):
"loading json files"
return json.load(open(path, 'r'))
def read_df(self, path, df_type="csv", sep = None, encoding = None, names=None):
"""loading pandas pkl and csv files"""
config = {"sep":sep, "encoding":encoding, "df_type":df_type, "path":path, "names":names}
return self._df_handler(config)
def _df_handler(self, config):
df = pd.read_pickle if config['df_type'] == 'pkl' else pd.read_csv
if config['sep'] != None and config['encoding'] != None and config['names'] != None:
return df(config['path'], sep=config['sep'], encoding=config['encoding'], names=config['names'])
elif config['encoding'] != None and config['names'] != None:
return df(config['path'], encoding=config['encoding'], names=config['names'])
elif config['sep'] != None:
return df(config['path'], sep=config['sep'])
else:
return df(config['path'])