Code
import pyreadr
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import mean_squared_error
market = pyreadr.read_r("./data/marketing.rda")
market = market['marketing']
shuffle_id = np.random.choice(['train', 'test'],
replace=True,
p=[0.75, 0.25],
size=market.shape[0])
market['type'] = shuffle_id
# Model
from sklearn.linear_model import LinearRegression
lr1 = LinearRegression().fit(market.loc[market.type == "train", ['youtube']], market.loc[market.type == "train", "sales"])
y_hat = lr1.predict(market.loc[market.type == "test", ['youtube']])
import plotly.express as px
import plotly.graph_objects as go
fig1 = px.scatter(data_frame=market,
x="youtube",
y="sales",
color="type",
color_discrete_map={
"train": "#e89927",
"test": "#3bbc35"
})
fig1.add_trace(go.Scatter(x=market.loc[market.type == "test", 'youtube'],
y=y_hat,
name="Model built on train data",
line=dict(color="#e89927")))
fig1.update_layout(width=600, height=250, title="SLR Model: Sales vs Youtube")
fig1.show()