import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

# 1.读取数据
data = pd.read_csv('/root/data/titanic/train.csv')
print(data.head())
print(data.info())

# 2.数据处理
x = data[['Pclass','Sex','Age']].copy()
y = data['Survived'].copy()
print(x.head(10))

x['Age'].fillna(x['Age'].mean(),inplace = True)
print(x.head(10))

x = pd.get_dummies(x)
print(x.head(10))

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

model = GradientBoostingClassifier()
model.fit(x_train,y_train)
print(model.score(x_test,y_test))

作者 admin

张宴银,大数据开发工程师

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注