-
Notifications
You must be signed in to change notification settings - Fork 0
/
makePrules.m
121 lines (94 loc) · 3.45 KB
/
makePrules.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
function model = makePrules(model, gECNumbers)
%makePrules This function takes model.rxnECNumbers and makes a list with
%the protein rules (model.Prules) and a list of unique EC numbers (model.ECNumbers)
%
% USAGE:
% model = makePrules(model)
%
% INPUT:
% model: cobra model structure
%
% OUTPUT:
% model: cobra model with aditional properties Prules and ECNumbers
%
% AUTHORS: Nicolas Mendoza-Mejia, Apr 2020
if ~exist('gECNumbers','var')
gECNumbersFlag = 0;
gECNumbers = model.rxnECNumbers;
else
gECNumbersFlag = 1;
end
ecPattern = '\<(?!EC:|^\>)([0-9A-Z]+.(([0-9A-Z]|-)+.)*([0-9A-Z]|-)+)';
if gECNumbersFlag
parsedECNumbers = regexprep(gECNumbers, ecPattern, 'x($1)');
[Prules, ECNumbers] = makePrulesWithGrules(model.rules, parsedECNumbers);
else
parsedECNumbers = regexp(gECNumbers, ecPattern, 'match');
[Prules, ECNumbers] = makePrulesWithRxnECNumbers(model, parsedECNumbers);
end
model.Prules = Prules;
model.ECNumbers = ECNumbers;
end
function [index, ECList, size] = makeECList(inECNumber, ECList, size)
match = strcmp(inECNumber, ECList);
if isempty(match) || sum(match) == 0
size = size + 1;
index = size;
% Save in the list of unique ECNumbers
ECList{size} = inECNumber;
else
index = find(match);
end
end
function [Prules, ECNumbers] = makePrulesWithRxnECNumbers(model, rxnECNumbers)
nRxns = numel(model.rxns);
Prules = cell (nRxns, 1);
ECNumbers = {};
sizeEcNumbers = 0;
for i=1:nRxns
nECNum = numel(rxnECNumbers{i});
% This is done to avoid adding non-indexed things from rxnECNumbers
if (nECNum > 0)
Prules{i} = model.rxnECNumbers{i};
end
for j=1:nECNum
[ECNumberIndex, ECNumbers, sizeEcNumbers] = makeECList(rxnECNumbers{i}{j}, ECNumbers, sizeEcNumbers);
% Modify the protein rules
pattern = regexptranslate('escape',rxnECNumbers{i}{j});
replacement = ['x(' num2str(ECNumberIndex) ')'];
Prules{i} = regexprep(Prules{i}, pattern, replacement, 'once');
end
end
end
function [Prules, ECNumbers] = makePrulesWithGrules(gRules, gECNumbers)
Prules = gRules;
ECNumbers = {};
sizeEcNumbers = 0;
geneInexes = regexp(gRules,'(?<=x\()([0-9]+)(?=\))','match');
for i=1:numel(geneInexes)
indexes = unique(str2double(geneInexes{i}));
for j=1:numel(indexes)
index = indexes(j);
pattern = ['x(' num2str(index) ')'];
if isempty(gECNumbers{index})
% We need to delete the entry and one of the logic characters
% around it
regExp = regexptranslate('escape', pattern);
Prules(i) = regexprep(Prules(i), [regExp ' *[&|] *'], '');
Prules(i) = regexprep(Prules(i), ['( *[&|] *)?' regExp], '');
else
replacement = gECNumbers{index};
match = regexp(gECNumbers{index},'(?<=x\().*?(?=\))','match');
for k=1:numel(match)
[ECNumberIndex, ECNumbers, sizeEcNumbers] = makeECList(match{k}, ECNumbers, sizeEcNumbers);
pat = ['x(' ECNumbers{ECNumberIndex} ')'];
rep = ['x(' num2str(ECNumberIndex) ')'];
replacement = strrep(replacement, pat, rep);
end
Prules(i) = strrep(Prules(i), pattern, replacement);
end
end
end
Prules = regexprep(Prules,'((\||&) *\( *\) *)*$',''); % Remove emty parenthesis at the end
Prules = regexprep(Prules,' *\( *\) *(\||&)?',''); % Remove emty parenthesis at the start and in the midle
end