import pandas as pd

df = pd.read_excel("https://www.dropbox.com/scl/fi/5mof24qi5is6x9xcbkmam/data-current-2023-11-13.xlsx?rlkey=flfttx48bglim1a1cwaddl03p&dl=1")


from joblib import load 
from urllib.request import urlopen

url = "https://www.dropbox.com/scl/fi/kssvcsgze16p36dwjyiaw/forest_ver2.joblib?rlkey=76hbmsqnecyv96qsmi39fwrr8&dl=1"
file = urlopen(url)
forest = load(file)


features = [
    "marketcap", 
    "pb", 
    "mom", 
    "volume", 
    "volatility", 
    "roe", 
    "accruals", 
    "agr"
]
features.sort()

for x in features:
    df[x+"_vol"] = df[x]*df.mktvol
features_final = features + [x+"_vol" for x in features]
df["predict"] = forest.predict(X=df[features_final])


df.predict.describe()

count    1753.000000
mean       50.043001
std         1.615579
min        38.642801
25%        50.023600
50%        50.649518
75%        50.925365
max        53.033142
Name: predict, dtype: float64


import numpy as np

def predict(mktvol):
    lst = []
    for x in features:
        item = input(f"Input {x}: ")
        lst.append(float(item))
    lst = lst + [mktvol*x for x in lst]
    arr = np.array(lst).reshape(1, len(lst))
    d = pd.DataFrame(arr, columns=features+[x+"_vol" for x in features])
    return forest.predict(d).item()


predict(0.15)

Applying a Random Forest II¶

MGMT 638: Data-Driven Investments: Equity¶

Kerry Back, Rice University¶

Outline¶

Read data¶

Read model¶

Make predictions¶

Distribution of predictions¶

Create an interactive predictor¶

Use the interactive predictor¶