from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
np.random.seed(0)
x1 = np.random.normal(size=100)
x2 = np.random.normal(size=100)
e = np.random.normal(size=100)
y = 2*x1 + 3*x2 + e
df = pd.DataFrame(
dict(x1=x1, x2=x2, y=y)
)
df.head()
x1 | x2 | y | |
---|---|---|---|
0 | 1.764052 | 1.883151 | 8.808375 |
1 | 0.400157 | -1.347759 | -3.482342 |
2 | 0.978738 | -1.270485 | -0.754319 |
3 | 2.240893 | 0.969397 | 8.045240 |
4 | 1.867558 | -1.173123 | 0.855877 |
tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X=df[["x1", "x2"]], y=df.y)
plt.figure(figsize=(20, 8))
plot_tree(tree, fontsize=12)
plt.show()
forest = RandomForestRegressor(max_depth=3)
forest.fit(X=df[["x1", "x2"]], y=df.y)
predict = forest.predict(X=df[["x1", "x2"]])
sns.regplot(x=df.y, y=predict, ci=None)
plt.xlabel("Actual y")
plt.ylabel("Predicted y")
plt.show()