import pyreadr
import numpy as np
import seaborn as sns
from sklearn.metrics import mean_squared_error
# market = pyreadr.read_r(path1)
# market = market['marketing']
X = data[['weight', 'horsepower', 'acceleration', 'mpg']]
shuffle_id = np.random.choice(['train', 'test'],
replace=True,
p=[0.75, 0.25],
size=X.shape[0])
X['type'] = shuffle_id
# Model
from sklearn.linear_model import LinearRegression
lr1 = LinearRegression().fit(X.loc[X.type == "train", ['weight']], X.loc[X.type == "train", "mpg"])
y_hat = lr1.predict(X.loc[X.type == "train", ['weight']])
import plotly.express as px
import plotly.graph_objects as go
fig1 = px.scatter(data_frame=X,
x="weight",
y="mpg",
color="type",
color_discrete_map={
"train": "#e89927",
"test": "#3bbc35"
})
fig1.add_trace(go.Scatter(x=X.loc[X.type == "train", 'weight'],
y=y_hat,
name="Model built on train data",
line=dict(color="blue")))
fig1.update_layout(width=600, height=250, title="SLR Model: mpg vs weight")
fig1.show()