import pandas as pd
# change path_to_file to "./" if the file is in your working directory
path_to_file = "../../"
df = pd.read_csv(path_to_file + "data-2023-11-08.csv")
df.head()
ticker | date | marketcap | pb | ret | mom | volume | volatility | roe | accruals | agr | sector | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AACC | 2011-01-14 | 188.3 | 1.4 | -0.014634 | -0.184615 | 2.078000e+04 | 0.071498 | -0.118239 | -0.182275 | 0.004383 | Financial Services |
1 | AAI | 2011-01-14 | 1012.1 | 2.0 | 0.002677 | 0.438224 | 2.775580e+06 | 0.128450 | 0.108073 | -0.220045 | 0.002237 | Industrials |
2 | AAIC | 2011-01-14 | 189.3 | 1.0 | -0.010119 | 0.684547 | 3.466000e+04 | 0.048505 | 0.136967 | 0.108055 | 0.135697 | Real Estate |
3 | AAON | 2011-01-14 | 479.4 | 4.2 | 0.007778 | 0.528685 | 2.817291e+05 | 0.044912 | 0.191801 | -0.088557 | 0.011656 | Basic Materials |
4 | AATC | 2011-01-14 | 63.3 | 1.4 | -0.013960 | 0.008216 | 6.800000e+03 | 0.049756 | 0.072269 | -0.008792 | 0.089436 | Technology |
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(max_depth=3)
df["target"] = df.groupby("date", group_keys=False).ret.apply(
lambda x: x - x.median()
)
features = [
"marketcap",
"pb",
"mom",
"volume",
"volatility",
"roe",
"accruals"
]
dates = df.date.unique()
dates.sort()
df = df[df.date.isin(dates[-156:])]
forest.fit(X=df[features], y=df.target)
RandomForestRegressor(max_depth=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor(max_depth=3)
from joblib import dump
dump(forest, path_to_file + "forest.joblib")
['../../forest.joblib']