-
Notifications
You must be signed in to change notification settings - Fork 4
/
TemporalDataSet.py
38 lines (37 loc) · 1.38 KB
/
TemporalDataSet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import xml.etree.ElementTree as ET
class temprel_ee:
def __init__(self, xml_element):
self.xml_element = xml_element
self.label = xml_element.attrib['LABEL']
self.sentdiff = int(xml_element.attrib['SENTDIFF'])
self.docid = xml_element.attrib['DOCID']
self.source = xml_element.attrib['SOURCE']
self.target = xml_element.attrib['TARGET']
self.data = xml_element.text.strip().split()
self.token = []
self.lemma = []
self.part_of_speech = []
self.position = []
self.length = len(self.data)
self.event_ix = []
for i,d in enumerate(self.data):
tmp = d.split('///')
self.part_of_speech.append(tmp[-2])
self.position.append(tmp[-1])
if tmp[-1] == 'E1':
self.event_ix.append(i)
elif tmp[-1] == 'E2':
self.event_ix.append(i)
# self.token.append(d[:-(len(tmp[-1])+len(tmp[-2])+2)])
self.token.append(tmp[0])
self.lemma.append(tmp[1])
class temprel_set:
def __init__(self, xmlfname, datasetname="matres"):
self.xmlfname = xmlfname
self.datasetname = datasetname
tree = ET.parse(xmlfname)
root = tree.getroot()
self.size = len(root)
self.temprel_ee = []
for e in root:
self.temprel_ee.append(temprel_ee(e))