随机森林的重要参数、接口及其使用

news/2024/7/9 8:37:59 标签: sklearn

随机森林的重要参数、接口及其使用

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split,cross_val_score
import matplotlib.pyplot as plt


wine = load_wine()
print([*wine])
print(wine.data,wine.target)

# 随机森林与决策树对比
x_train,x_test,y_train,y_test = train_test_split(wine.data,wine.target
                                                 ,test_size=0.3,random_state=42)

rf = RandomForestClassifier(random_state=0)
df = DecisionTreeClassifier(random_state=0
                             )

rf = rf.fit(x_train,y_train)
df = df.fit(x_train,y_train)

print(rf.score(x_test,y_test))
print(df.score(x_test,y_test))


# 参数1 n_estimators 弱评估器(决策树)个数

res = []
for i in range(1,101,2):
    res.append(cross_val_score(RandomForestClassifier(n_estimators=i),x_train
                    ,y_train,cv=10).mean())
    
plt.figure(figsize=(20,5))
plt.plot(range(1,101,2),res)
plt.show()

# 参数2 random_state 确定一片固定的森林，而不是一棵树每一棵树都一样
##（随机体现：随机挑选特征进行分枝）

# 属性1 estimators_ 查看森林中树的状态，返回每一个决策树对象
for i in rf.estimators_:
    print(i)

print(rf.estimators_[0].score(x_test,y_test))
print(rf.estimators_[0].random_state)

# 参数3 bootstrap 控制有放回的随机抽样技术的参数 （默认为True/False）##(随机体现：测试集样本随机)
#        & oob_score(out od bag data)是否使用袋外样本测试数据如果为Ture 使用属性oob_score_查看结果得分

RF = RandomForestClassifier(n_estimators=25,oob_score=True)
RF = RF.fit(wine.data,wine.target)
print(RF.oob_score_)


## 其他重要属性和接口
# 参数：n_estimators,random_state,boostrap,oob_score
# 属性：.estimators_,.oob_score_,.feature_importances_
# 接口：apply,fit,predict,score,predict_proba

print('------------------------------------------------------------')

RF = RandomForestClassifier(n_estimators=25,oob_score=True)
RF = RF.fit(x_train,y_train)
print('特征综合贡献度',RF.feature_importances_)
print('每个样本的特征的叶子索引',RF.apply(x_test))# 目前不知道有什么用
print('每个测试样本被分到一类标签的平均概率',RF.predict_proba(x_test))