You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
测试集中没有rel和json文件夹,可是以下代码会检查rel和json文件夹是否存在,不存在则报错,要怎么解决?
def load_dataset(self, dataset_dir, with_cells, trim=None, debug=False, exts=None):
dataset, cells = [], []
if exts is None:
exts = ['chunk','rel']
if with_cells:
exts.append('json')
sub_paths = self.get_sub_paths(dataset_dir, exts, trim=trim)
for i, paths in enumerate(sub_paths):
if debug and i > 50:
break
chunk_path = paths[0]
relation_path = paths[1]
chunks = self.load_chunks(chunk_path)
# TODO handle big tables
#if len(chunks) > 100 or len(chunks) == 0: continue
relations = self.load_relations(relation_path)
#new_chunks, new_rels = self.clean_chunk_rel(chunks, relations)
#chunks, relations = new_chunks, new_rels
if with_cells:
cell_path = paths[2]
with open(cell_path) as f:
cell_json = json.load(f)
else:
cell_json = None
dataset.append(Data(
chunks=chunks,
relations=relations,
cells=cell_json,
path=chunk_path,
))
return dataset
def get_sub_paths(self, root_dir: str, sub_names: List[str], trim=None):
# Check the existence of directories
assert os.path.isdir(root_dir)
# TODO: sub_dirs redundancy
sub_dirs = []
for sub_name in sub_names:
sub_dir = os.path.join(root_dir, sub_name)
assert os.path.isdir(sub_dir), '"%s" is not dir.' % sub_dir
sub_dirs.append(sub_dir)
paths = []
d = os.listdir(sub_dirs[0])
d = d[:trim] if trim else d
for file_name in d:
sub_paths = [os.path.join(sub_dirs[0], file_name)]
name = os.path.splitext(file_name)[0]
for ext in sub_names[1:]:
sub_path = os.path.join(root_dir, ext, name + '.' + ext)
assert os.path.exists(sub_path)
sub_paths.append(sub_path)
paths.append(sub_paths)
return paths
The text was updated successfully, but these errors were encountered:
测试集中没有rel和json文件夹,可是以下代码会检查rel和json文件夹是否存在,不存在则报错,要怎么解决?
def load_dataset(self, dataset_dir, with_cells, trim=None, debug=False, exts=None):
dataset, cells = [], []
if exts is None:
exts = ['chunk','rel']
if with_cells:
exts.append('json')
sub_paths = self.get_sub_paths(dataset_dir, exts, trim=trim)
for i, paths in enumerate(sub_paths):
if debug and i > 50:
break
chunk_path = paths[0]
relation_path = paths[1]
def get_sub_paths(self, root_dir: str, sub_names: List[str], trim=None):
# Check the existence of directories
assert os.path.isdir(root_dir)
# TODO: sub_dirs redundancy
sub_dirs = []
for sub_name in sub_names:
sub_dir = os.path.join(root_dir, sub_name)
assert os.path.isdir(sub_dir), '"%s" is not dir.' % sub_dir
sub_dirs.append(sub_dir)
The text was updated successfully, but these errors were encountered: