Skip to content

Commit

Permalink
target encoding feature: 默认使用统计信息进行特征筛选
Browse files Browse the repository at this point in the history
  • Loading branch information
enjoysport2022 committed Jul 26, 2021
1 parent cb928b6 commit 0714e37
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions autox/feature_engineer/fe_target_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,13 @@ def fit(self, df, target, df_feature_type = None, silence_cols = [], select_all
self.ops.append([feature])

if not self.select_all:
if self.target is not None:
# 训练模型,对group_col进行筛选
pass
else:
# 通过统计信息进行筛选
del_targetencoding_cols = []
for targetencoding_col in self.ops:
if df.drop_duplicates(targetencoding_col).shape[0] > df.shape[0] * 0.05:
del_targetencoding_cols.append(targetencoding_col)
for targetencoding_col in del_targetencoding_cols:
self.ops.remove(targetencoding_col)
# 通过统计信息进行筛选
del_targetencoding_cols = []
for targetencoding_col in self.ops:
if df.drop_duplicates(targetencoding_col).shape[0] > df.shape[0] * 0.05:
del_targetencoding_cols.append(targetencoding_col)
for targetencoding_col in del_targetencoding_cols:
self.ops.remove(targetencoding_col)

def get_ops(self):
return self.ops
Expand Down

0 comments on commit 0714e37

Please sign in to comment.