from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
sns.set_style("whitegrid")


np.random.seed(0)
x1 = np.random.normal(size=100)
x2 = np.random.normal(size=100)
e = np.random.normal(size=100)
y = 2*x1 + 3*x2 + e
df = pd.DataFrame(
    dict(x1=x1, x2=x2, y=y)
)
df.head()


tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X=df[["x1", "x2"]], y=df.y)
plt.figure(figsize=(20, 8))
plot_tree(tree, fontsize=12)
plt.show()


forest = RandomForestRegressor(max_depth=3)
forest.fit(X=df[["x1", "x2"]], y=df.y)
predict = forest.predict(X=df[["x1", "x2"]])
sns.regplot(x=df.y, y=predict, ci=None)
plt.xlabel("Actual y")
plt.ylabel("Predicted y")
plt.show()

	x1	x2	y
0	1.764052	1.883151	8.808375
1	0.400157	-1.347759	-3.482342
2	0.978738	-1.270485	-0.754319
3	2.240893	0.969397	8.045240
4	1.867558	-1.173123	0.855877

Decision Trees and Random Forests¶

MGMT 638: Data-Driven Investments: Equity¶

Kerry Back, Rice University¶

Imports¶

Generate data¶

Fit and view a decision tree¶

Fit a random forest and view goodness of fit¶