Skip to content

Commit

Permalink
feat: version 0.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
PrivacyGo-PETPlatform committed Sep 26, 2024
1 parent 93e8943 commit a51a2f0
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 70 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# List of Changes

## Version 0.1.1
### Changed
- The model storage method has changed from pickle to json.

## Version 0.1.0
### Add
- Federated Leiden algorithm
- Two-party Secure Xgboost
- Two-party PSI
16 changes: 8 additions & 8 deletions docs/user_guide/Horizontal_SecureBoost.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ petml.operators.boosting.XGBoostClassifierFit

| Name | File Type | Description |
| --- |-----------| --- |
| model_path | pkl | The trained model |
| model_path | json | The trained model |

#### Examples
```
Expand Down Expand Up @@ -148,15 +148,15 @@ config = {
"train_data": "data0.csv",
},
"outputs": {
"model_path": "model_name0.pkl"
"model_path": "model_name0.json"
}
},
"party_b": {
"inputs": {
"train_data": "data1.csv",
},
"outputs": {
"model_path": "model_name1.pkl"
"model_path": "model_name1.json"
}
}
}
Expand All @@ -176,7 +176,7 @@ petml.operators.boosting.XGBoostClassifierPredict
| Name | File Type | Description |
|--------------|-----------|-----------------------|
| predict_data | csv | The inference dataset |
| model_path | pkl | The trained model |
| model_path | json | The trained model |


#### Output
Expand Down Expand Up @@ -204,7 +204,7 @@ config = {
"party_a": {
"inputs": {
"predict_data": "data0.csv",
"model_path": "model_name0.pkl"
"model_path": "model_name0.json"
},
"outputs": {
"inference_res_path": "predict0.csv"
Expand All @@ -213,7 +213,7 @@ config = {
"party_b": {
"inputs": {
"predict_data": "data1.csv",
"model_path": "model_name1.pkl"
"model_path": "model_name1.json"
},
"outputs": {
"inference_res_path": "predict1.csv"
Expand Down Expand Up @@ -258,7 +258,7 @@ petml.operators.boosting.XGBoostRegressorFit

| Name | File Type | Description |
|------------|-----------|-------------------|
| model_path | pkl | The trained model |
| model_path | json | The trained model |

#### Examples
Refer to the examples in classifier training config
Expand All @@ -273,7 +273,7 @@ petml.operators.boosting.XGBoostRegressorPredict
| Name | File Type | Description |
|--------------|-----------|-----------------------|
| predict_data | csv | The inference dataset |
| model_path | pkl | The trained model |
| model_path | json | The trained model |


#### Output
Expand Down
30 changes: 30 additions & 0 deletions petml/fl/boosting/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,21 @@ def __init__(self,
self.left_child = left_child
self.right_child = right_child

def to_dict(self):
"""transform object to dict"""
return {k: v.to_dict() if isinstance(v, MPCTreeNode) else v for k, v in vars(self).items()}

@classmethod
def from_dict(cls, data):
"""transform from dict to object"""
obj = cls()
for k, v in data.items():
if isinstance(v, dict):
setattr(obj, k, cls.from_dict(v))
else:
setattr(obj, k, v)
return obj


class MPCTree:
"""
Expand Down Expand Up @@ -121,6 +136,21 @@ def __init__(self,
self.min_split_loss = min_split_loss
self.max_depth = max_depth

def to_dict(self):
"""transform object to dict"""
return {k: v.to_dict() if isinstance(v, MPCTreeNode) else v for k, v in vars(self).items()}

@classmethod
def from_dict(cls, data):
"""transform from dict to object"""
obj = cls()
for k, v in data.items():
if isinstance(v, dict):
setattr(obj, k, cls.from_dict(v))
else:
setattr(obj, k, v)
return obj

def _calc_threshold(self, gsum):
"""clip the value of gain"""
res = snp.where(gsum > self.reg_alpha, gsum - self.reg_alpha,
Expand Down
21 changes: 19 additions & 2 deletions petml/fl/boosting/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ class LogisticLoss:
class for calculate logistic loss function
"""

def to_dict(self):
return {'class': 'LogisticLoss'}

@classmethod
def from_dict(cls, data):
if data['class'] == 'LogisticLoss':
return cls()
else:
raise ValueError('Invalid class: ' + data['class'])

def _sigmoid(self, y_pred: np.ndarray):
"""
Implemented sigmoid equation
Expand Down Expand Up @@ -90,8 +100,15 @@ class SquareLoss:
class for calculate square loss function
"""

def __init__(self):
pass
def to_dict(self):
return {'class': 'SquareLoss'}

@classmethod
def from_dict(cls, data):
if data['class'] == 'SquareLoss':
return cls()
else:
raise ValueError('Invalid class: ' + data['class'])

def grad(self, y_pred: Union[SecureArray, np.ndarray], label: Union[SecureArray, np.ndarray]):
"""
Expand Down
124 changes: 88 additions & 36 deletions petml/fl/boosting/xgb_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import hashlib
import pickle
import json
import time

import numpy as np
Expand Down Expand Up @@ -366,35 +366,43 @@ def transform_one_tree(self, train_x, train_y, train_y_cipher, eval_x_cipher, y_

return tree, y_hat, eval_y_hat

@staticmethod
def export_share(share_value) -> list:
return share_value.to_share().astype(np.int64).tolist()

@staticmethod
def load_share(load_data):
return snp.fromshare(np.array(load_data).astype(np.int64), np.float64)

def save_tree_from_ss_to_numpy(self, trees):
for tree in trees:
tree.columns = tree.columns.to_share().astype(np.int64)
tree.columns = self.export_share(tree.columns)
self._save_tree_from_ss_to_numpy(tree.root)

def _save_tree_from_ss_to_numpy(self, tree_node):
"""Convert secure object to numerical value"""
if tree_node.is_leaf:
tree_node.leaf_weight = tree_node.leaf_weight.to_share().astype(np.int64)
tree_node.leaf_weight = self.export_share(tree_node.leaf_weight)
return

tree_node.split_feat = tree_node.split_feat.to_share().astype(np.int64)
tree_node.split_val = tree_node.split_val.to_share().astype(np.int64)
tree_node.split_feat = self.export_share(tree_node.split_feat)
tree_node.split_val = self.export_share(tree_node.split_val)
self._save_tree_from_ss_to_numpy(tree_node.left_child)
self._save_tree_from_ss_to_numpy(tree_node.right_child)

def load_tree_from_numpy_to_ss(self, trees):
for tree in trees:
tree.columns = snp.fromshare(tree.columns, np.float64)
tree.columns = self.load_share(tree.columns)
self._load_tree_from_numpy_to_ss(tree.root)

def _load_tree_from_numpy_to_ss(self, tree_node):
"""Convert to secure object from numerical value"""
if tree_node.is_leaf:
tree_node.leaf_weight = snp.fromshare(tree_node.leaf_weight, np.float64)
tree_node.leaf_weight = self.load_share(tree_node.leaf_weight)
return

tree_node.split_feat = snp.fromshare(tree_node.split_feat, np.float64)
tree_node.split_val = snp.fromshare(tree_node.split_val, np.float64)
tree_node.split_feat = self.load_share(tree_node.split_feat)
tree_node.split_val = self.load_share(tree_node.split_val)
self._load_tree_from_numpy_to_ss(tree_node.left_child)
self._load_tree_from_numpy_to_ss(tree_node.right_child)

Expand Down Expand Up @@ -502,6 +510,30 @@ def __init__(
self.trees = []
self.loss_func = LogisticLoss()

def to_dict(self):
result = vars(self).copy()
del result['logger']
result['trees'] = [tree.to_dict() for tree in self.trees]
result['loss_func'] = self.loss_func.to_dict()
return result

@classmethod
def from_dict(cls, data):
obj = cls()
for k, v in data.items():
if k == 'trees':
setattr(obj, k, [MPCTree.from_dict(tree_dict) for tree_dict in v])
elif k == 'loss_func':
loss_map = {
'LogisticLoss': LogisticLoss,
'SquareLoss': SquareLoss,
}
loss_class = loss_map[v['class']]
setattr(obj, k, loss_class.from_dict(v))
else:
setattr(obj, k, v)
return obj

def fit(self, data: pd.DataFrame) -> None:
"""
Fit the model
Expand Down Expand Up @@ -619,10 +651,11 @@ def save_model(self, model_path: str) -> None:
try:
self._federation = None
self._mpc_engine = None
with open(model_path, 'wb') as f:
pickle.dump(self, f)
json_str = json.dumps(self.to_dict())
with open(model_path, 'w') as f:
f.write(json_str)
self.logger.info("Save model success")
except pickle.PickleError as e:
except json.JSONDecodeError as e:
self.logger.error(f"Save model file. err={e}")

def load_model(self, model_path: str) -> None:
Expand All @@ -634,21 +667,20 @@ def load_model(self, model_path: str) -> None:
model_path: string
File path of the saved model
Returns
-------
loadobj: model
Saved XGboost model
"""
try:
with open(model_path, 'rb') as f:
load_obj = pickle.load(f)
with open(model_path, 'r') as f:
load_obj = json.load(f)
load_attributes = self.from_dict(load_obj)

for attr in vars(self):
if attr not in ['logger', '_federation', '_mpc_engine']:
setattr(self, attr, getattr(load_attributes, attr))

self.learning_rate = load_obj.learning_rate
self.base_score = load_obj.base_score
self.trees = load_obj.trees
self.load_tree_from_numpy_to_ss(self.trees)
self.logger.info("Load model success")
except pickle.PickleError as e:

except json.JSONDecodeError as e:
self.logger.error(f"Load model fail. err={e}")


Expand Down Expand Up @@ -747,6 +779,27 @@ def __init__(
self.trees = []
self.loss_func = LogisticLoss()

def to_dict(self):
result = vars(self).copy()
del result['logger']
result['trees'] = [tree.to_dict() for tree in self.trees]
result['loss_func'] = self.loss_func.to_dict()
return result

@classmethod
def from_dict(cls, data):
obj = cls()
for k, v in data.items():
if k == 'trees':
setattr(obj, k, [MPCTree.from_dict(tree_dict) for tree_dict in v])
elif k == 'loss_func':
loss_map = {'LogisticLoss': LogisticLoss, 'SquareLoss': SquareLoss}
loss_class = loss_map[v['class']]
setattr(obj, k, loss_class.from_dict(v))
else:
setattr(obj, k, v)
return obj

def fit(self, data: pd.DataFrame) -> None:
"""
Fit the model
Expand Down Expand Up @@ -846,10 +899,11 @@ def save_model(self, model_path: str) -> None:
try:
self._federation = None
self._mpc_engine = None
with open(model_path, 'wb') as f:
pickle.dump(self, f)
json_str = json.dumps(self.to_dict())
with open(model_path, 'w') as f:
f.write(json_str)
self.logger.info("Save model success")
except pickle.PickleError as e:
except json.JSONDecodeError as e:
self.logger.error(f"Save model file. err={e}")

def load_model(self, model_path: str) -> None:
Expand All @@ -860,20 +914,18 @@ def load_model(self, model_path: str) -> None:
----------
model_path: string
File path of the saved model
Returns
-------
loadobj: model
Saved XGboost model
"""
try:
with open(model_path, 'rb') as f:
load_obj = pickle.load(f)
with open(model_path, 'r') as f:
load_obj = json.load(f)
load_attributes = self.from_dict(load_obj)

for attr in vars(self):
if attr not in ['logger', '_federation', '_mpc_engine']:
setattr(self, attr, getattr(load_attributes, attr))

self.learning_rate = load_obj.learning_rate
self.base_score = load_obj.base_score
self.trees = load_obj.trees
self.load_tree_from_numpy_to_ss(self.trees)
self.logger.info("Load model success")
except pickle.PickleError as e:

except json.JSONDecodeError as e:
self.logger.error(f"Load model fail. err={e}")
Loading

0 comments on commit a51a2f0

Please sign in to comment.