import pandas as pd
df = pd.read_excel("https://www.dropbox.com/scl/fi/5mof24qi5is6x9xcbkmam/data-current-2023-11-13.xlsx?rlkey=flfttx48bglim1a1cwaddl03p&dl=1")
from joblib import load
from urllib.request import urlopen
url = "https://www.dropbox.com/scl/fi/kssvcsgze16p36dwjyiaw/forest_ver2.joblib?rlkey=76hbmsqnecyv96qsmi39fwrr8&dl=1"
file = urlopen(url)
forest = load(file)
features = [
"marketcap",
"pb",
"mom",
"volume",
"volatility",
"roe",
"accruals",
"agr"
]
features.sort()
for x in features:
df[x+"_vol"] = df[x]*df.mktvol
features_final = features + [x+"_vol" for x in features]
df["predict"] = forest.predict(X=df[features_final])
df.predict.describe()
count 1753.000000 mean 50.043001 std 1.615579 min 38.642801 25% 50.023600 50% 50.649518 75% 50.925365 max 53.033142 Name: predict, dtype: float64
import numpy as np
def predict(mktvol):
lst = []
for x in features:
item = input(f"Input {x}: ")
lst.append(float(item))
lst = lst + [mktvol*x for x in lst]
arr = np.array(lst).reshape(1, len(lst))
d = pd.DataFrame(arr, columns=features+[x+"_vol" for x in features])
return forest.predict(d).item()
predict(0.15)