-
Notifications
You must be signed in to change notification settings - Fork 0
/
almostnolabel.py
76 lines (63 loc) · 2.53 KB
/
almostnolabel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import numpy as np
import pandas as pd
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from urllib.request import urlopen
from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
import rpy2
from llp_learn.base import baseLLPClassifier
from abc import ABC, abstractmethod
import numpy as np
from scipy.special import expit
class MMBaseClassifier(baseLLPClassifier, ABC):
"""
Base class for all MM classifiers - (Patriani, 2014) paper.
"""
def __init__(self, lmd=1):
self.lmd = lmd
@abstractmethod
def fit(self, X, bags, proportions):
pass
def predict(self, X):
return np.where(self.predict_proba(X) >= 0.5, 1, -1)
def predict_proba(self, X):
return expit(2 * X @ self.w)
def set_params(self, **params):
for param in params:
self.__dict__[param] = params[param]
def get_params(self):
return self.__dict__
class LMM(MMBaseClassifier):
def __init__(self, lmd, gamma, sigma, similarity="G,s"):
super().__init__(lmd)
self.gamma = gamma
self.sigma = sigma
self.similarity = similarity
def fit(self, X, bags, proportions):
"""
Fit the model according to the given training data.
Parameters
----------
X : array-like, shape = (n_samples, n_features)
The training input samples.
bags : array-like, shape = (n_samples,)
The training bags.
proportions : array-like, shape = (n_bags,)
The bags proportions.
"""
with open("{}/almostnolabel/laplacian.mean.map.R".format(os.path.dirname(os.path.abspath(__file__))), "r") as f:
string = f.read()
lmm = SignatureTranslatedAnonymousPackage(string, "laplacian.mean.map")
self.laplacian = lmm.laplacian
self.laplacian_mean_map = lmm.laplacian_mean_map
pandas2ri.activate()
# Creating the R object expected by the R function
y_proportions = np.array([proportions[bag] for bag in bags])
trainset = pd.DataFrame(np.concatenate((y_proportions.reshape(-1, 1), bags.reshape(-1, 1), X), axis=1), columns=["label", "bag"] + ["x" + str(i) for i in range(X.shape[1])])
trainset = trainset.astype({"bag": int})
N_bags = len(np.unique(bags))
# Computing the laplacian
laplacian = self.laplacian(self.similarity, trainset, N_bags, self.sigma)
# Calling the R function
self.w = self.laplacian_mean_map(trainset, laplacian, self.lmd, self.gamma)