import pandas as pd
from sqlalchemy import create_engine
import pymssql
server = 'fs.rice.edu'
database = 'stocks'
username = 'stocks'
password = '6LAZH1'
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database
conn = create_engine(string).connect()
Exception during reset or similar Traceback (most recent call last): File "c:\Users\kerry\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py", line 753, in _finalize_fairy fairy._reset(pool) File "c:\Users\kerry\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py", line 1004, in _reset pool._dialect.do_rollback(self) File "c:\Users\kerry\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\dialects\mssql\base.py", line 2792, in do_rollback super(MSDialect, self).do_rollback(dbapi_connection) File "c:\Users\kerry\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py", line 683, in do_rollback dbapi_connection.rollback() File "src\pymssql\_pymssql.pyx", line 316, in pymssql._pymssql.Connection.rollback File "src\pymssql\_pymssql.pyx", line 300, in pymssql._pymssql.Connection.rollback File "src\pymssql\_mssql.pyx", line 1055, in pymssql._mssql.MSSQLConnection.execute_non_query File "src\pymssql\_mssql.pyx", line 1076, in pymssql._mssql.MSSQLConnection.execute_non_query File "src\pymssql\_mssql.pyx", line 1250, in pymssql._mssql.MSSQLConnection.format_and_run_query File "src\pymssql\_mssql.pyx", line 1788, in pymssql._mssql.check_cancel_and_raise File "src\pymssql\_mssql.pyx", line 1834, in pymssql._mssql.raise_MSSQLDatabaseException pymssql._mssql.MSSQLDatabaseException: (20047, b'DB-Lib error message 20047, severity 9:\nDBPROCESS is dead or not enabled\n')
Data from SF1
sf1 = pd.read_sql(
"""
select ticker, datekey, lastupdated, netinc, ncfo, equity, assets
from sf1
where dimension='ARQ' and datekey>='2021-01-01' and equity>0 and assets>0
order by ticker, datekey
""",
conn,
parse_dates=["datekey"]
)
sf1 = sf1.groupby(["ticker", "datekey", "lastupdated"]).last()
sf1 = sf1.droplevel("lastupdated")
sf1 = sf1.reset_index()
for col in ["netinc", "ncfo"]:
sf1[col] = sf1.groupby("ticker", group_keys=False)[col].apply(
lambda x: x.rolling(4).sum()
)
for col in ["equity", "assets"]:
sf1[col] = sf1.groupby("ticker", group_keys=False)[col].apply(
lambda x: x.rolling(4).mean()
)
sf1["roe"] = sf1.netinc / sf1.equity
sf1["accruals"] = (sf1.netinc - sf1.ncfo) / sf1.equity
sf1["agr"] = sf1.groupby("ticker", group_keys=False)["assets"].pct_change()
sf1 = sf1[["ticker", "datekey", "roe", "accruals", "agr"]].dropna()
Data from sep_weekly
sep_weekly = pd.read_sql(
"""
select ticker, date, volume, closeadj, closeunadj, lastupdated
from sep_weekly
where date>='2022-01-01'
order by ticker, date, lastupdated
""",
conn,
parse_dates=["date"]
)
sep_weekly = sep_weekly.groupby(["ticker", "date", "lastupdated"]).last()
sep_weekly = sep_weekly.droplevel("lastupdated")
sep_weekly["ret"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change()
sep_weekly["annual"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change(52)
sep_weekly["monthly"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change(4)
sep_weekly["mom"] = sep_weekly.groupby("ticker", group_keys=False).apply(
lambda d: (1+d.annual)/(1+d.monthly) - 1
)
sep_weekly["volatility"] = sep_weekly.groupby("ticker", group_keys=False).ret.apply(
lambda x: x.rolling(26).std()
)
sep_weekly = sep_weekly[["mom", "volume", "volatility", "closeunadj"]]
sep_weekly = sep_weekly.reset_index()
Data from weekly
weekly = pd.read_sql(
"""
select ticker, date, marketcap, pb, lastupdated
from weekly
where date>='2022-01-01' and marketcap>0 and pb>0
order by ticker, date, lastupdated
""",
conn,
parse_dates=["date"]
)
weekly = weekly.groupby(["ticker", "date", "lastupdated"]).last()
weekly = weekly.droplevel("lastupdated")
weekly = weekly.reset_index()
df = weekly.merge(sep_weekly, on=["ticker", "date"], how="inner")
df["year"] = df.date.apply(lambda x: x.isocalendar()[0])
df["week"] = df.date.apply(lambda x: x.isocalendar()[1])
sf1["year"] = sf1.datekey.apply(lambda x: x.isocalendar()[0])
sf1["week"] = sf1.datekey.apply(lambda x: x.isocalendar()[1])
df = df.merge(sf1, on=["ticker", "year", "week"], how="left")
df = df.drop(columns=["year", "week", "datekey"])
for col in ["roe", "accruals", "agr"]:
df[col] = df.groupby("ticker", group_keys=False)[col].apply(
lambda x: x.ffill()
)
tickers = pd.read_sql(
"""
select ticker, sector from tickers
""",
conn
)
df = df.merge(tickers, on="ticker")
df = df[df.date==df.date.max()].copy()
df = df[df.closeunadj>5]
df = df.dropna()
df["rnk"] = df.marketcap.rank(
ascending=False,
method="first"
)
df = df[(df.rnk>1000) & (df.rnk<=3000)]
df = df.drop(columns=["closeunadj", "rnk"])
features = [
"marketcap",
"pb",
"mom",
"volume",
"volatility",
"roe",
"accruals"
]
# change this to "./" if forest.joblib is in your working directory
path_to_file = "../../"
from joblib import load
forest = load(path_to_file + "forest.joblib")
df["predict"] = forest.predict(X=df[features])
df["rnk_long"] = df.predict.rank(
ascending=False,
method="first"
)
df["rnk_short"] = df.predict.rank(
ascending=True,
method="first"
)
longs = df[df.rnk_long<=44]
shorts = df[df.rnk_short<=44]
df["rnk_long"] = df.groupby("sector", group_keys=False).predict.rank(
ascending=False,
method="first"
)
df["rnk_short"] = df.groupby("sector", group_keys=False).predict.rank(
ascending=True,
method="first"
)
longs_neutral = df[df.rnk_long<=4]
shorts_neutral = df[df.rnk_short<=4]
with pd.ExcelWriter("portfolios 2023-11-08.xlsx") as writer:
longs.to_excel(writer, "long", index=False)
shorts.to_excel(writer, "short", index=False)
longs_neutral.to_excel(writer, "long neutral", index=False)
shorts_neutral.to_excel(writer, "short neutral", index=False)
df.to_excel(writer, "today", index=False)