Uploaded by Sohom Chakraborty

vertopal.com linear model on Advertising (2)

advertisement
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error,r2_score
data=pd.read_csv(r"C:\Users\goura\Downloads\CWML\Advertising.csv")
data
0
1
2
3
4
..
195
196
197
198
199
Unnamed: 0
1
2
3
4
5
...
196
197
198
199
200
TV
230.1
44.5
17.2
151.5
180.8
...
38.2
94.2
177.0
283.6
232.1
radio
37.8
39.3
45.9
41.3
10.8
...
3.7
4.9
9.3
42.0
8.6
newspaper
69.2
45.1
69.3
58.5
58.4
...
13.8
8.1
6.4
66.2
8.7
sales
22.1
10.4
9.3
18.5
12.9
...
7.6
9.7
12.8
25.5
13.4
[200 rows x 5 columns]
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
#
Column
Non-Null Count Dtype
--- ------------------- ----0
Unnamed: 0 200 non-null
int64
1
TV
200 non-null
float64
2
radio
200 non-null
float64
3
newspaper
200 non-null
float64
4
sales
200 non-null
float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB
data.corr()
Unnamed: 0
TV
radio
newspaper
sales
data.skew()
Unnamed: 0
1.000000
0.017715
-0.110680
-0.154944
-0.051616
TV
radio
0.017715 -0.110680
1.000000 0.054809
0.054809 1.000000
0.056648 0.354104
0.782224 0.576223
newspaper
sales
-0.154944 -0.051616
0.056648 0.782224
0.354104 0.576223
1.000000 0.228299
0.228299 1.000000
Unnamed: 0
0.000000
TV
-0.069853
radio
0.094175
newspaper
0.894720
sales
0.407571
dtype: float64
data.hist()
array([[<Axes:
<Axes:
[<Axes:
<Axes:
[<Axes:
title={'center':
title={'center':
title={'center':
title={'center':
title={'center':
data.dtypes
Unnamed: 0
TV
radio
newspaper
sales
dtype: object
int64
float64
float64
float64
float64
X_IV=data.iloc[:,1:4]
y_DV=data["sales"]
'Unnamed: 0'}>,
'TV'}>],
'radio'}>,
'newspaper'}>],
'sales'}>, <Axes: >]], dtype=object)
X_IV
0
1
2
3
4
..
195
196
197
198
199
TV
230.1
44.5
17.2
151.5
180.8
...
38.2
94.2
177.0
283.6
232.1
radio
37.8
39.3
45.9
41.3
10.8
...
3.7
4.9
9.3
42.0
8.6
newspaper
69.2
45.1
69.3
58.5
58.4
...
13.8
8.1
6.4
66.2
8.7
[200 rows x 3 columns]
y_DV
0
1
2
3
4
22.1
10.4
9.3
18.5
12.9
...
195
7.6
196
9.7
197
12.8
198
25.5
199
13.4
Name: sales, Length: 200, dtype: float64
from statsmodels.stats.api import linear_rainbow,het_goldfeldquandt
myLrModel=sm.OLS(y_DV,X_IV).fit()
sats=linear_rainbow(myLrModel)
sats
(0.9011628312360566, 0.6971094281960714)
if sats[0]>.05:
print("the p value is",round(sats[1],4),">0.05 so reject the null")
else:
print("the p value is",round(sats[1],4),"<0.05 so accept the null")
the p value is 0.6971 >0.05 so reject the null
trainer=LinearRegression()
lr=trainer.fit(X_IV,y_DV)
lr.coef_
array([ 0.04576465,
0.18853002, -0.00103749])
lr.intercept_
2.938889369459412
y_pred=lr.predict(X_IV)
y_pred
array([20.52397441,
12.47834763,
7.0322992 ,
20.81929952,
18.10076728,
15.6100386 ,
21.6339338 ,
17.00682618,
16.37766467,
15.16152314,
12.63121132,
21.2926106 ,
5.74215558,
7.84904532,
17.77949782,
11.85832174,
11.44592364,
15.18140789,
9.99922965,
16.31492112,
13.87595844,
17.93641467,
14.7598741 ,
12.91968895,
14.41010605,
9.12734958,
10.4276871 ,
14.00141385,
9.48964097,
9.70853872,
18.36720534,
5.3075589 ,
14.21383298,
14.22372138,
7.39497997,
24.78687031,
10.5871997 ,
20.80301059,
12.33785482,
11.72975995,
17.28512918,
12.82365674,
14.7405382 ,
14.98951429,
11.3460929 ,
23.40590052,
17.2959832 ,
8.87338673,
9.33981296,
8.52771254,
22.89067208,
9.01603163,
10.62693815,
4.47758904,
14.64794093,
11.59870739,
4.49631598,
12.63571716,
23.24248685,
6.12602215,
21.14027498,
11.97874744,
7.83807642,
10.57717411,
15.57779819,
11.45348627,
18.39902932,
15.29422368,
10.0095377 ,
15.38485192,
13.55914568,
10.82439531,
14.35827373,
19.9793727 ,
13.92809918,
9.69137313,
12.30767078,
12.12295317,
10.57712073,
23.22495716,
6.4891503 ,
17.05167344,
7.63888314,
15.62347779,
21.59580326,
21.7226299 ,
20.66297563,
12.77458802,
16.78426073,
12.0370073 ,
10.36684881,
13.81190223,
10.17840799,
15.59378475,
19.15639616,
15.33707782,
17.64409385,
7.10850249,
13.88060985,
6.5707774 ,
13.6264571 ,
6.599669 ,
8.44915012,
20.85125198,
19.24986927,
23.26086103,
16.3600003 ,
10.0143112 ,
14.94678206,
13.36324677,
7.60769238,
12.1620464 ,
6.55467
,
17.07644223,
17.59782951,
3.72734086,
8.82630048,
9.95168206,
16.5459329 ,
19.41053803,
18.86426829,
9.90868103,
13.96385684,
16.26362018,
19.94469957,
21.89805198,
13.21069202,
18.97657924,
9.90298206,
8.81331353,
14.42184212,
11.71127101,
21.22757378,
24.11860723,
14.76221142,
3.58725841,
16.40377623,
15.56609348,
15.0827909 ,
22.25549161,
19.26692307,
9.76842795,
8.76480262,
12.26335941,
18.22390132,
10.38419866,
17.35163608,
17.1861428 ,
11.97093887,
16.01099722,
24.13310013,
18.64430648,
13.18867186,
12.55084872,
18.43436638,
14.16607293,
8.14651887,
9.14402389,
7.57483051,
20.44761039,
8.88787996,
8.1681656 ,
20.37443008,
18.13348698,
16.97773556,
21.10891244,
17.32931197,
9.67530328,
20.78136464,
16.92225511,
10.48212385,
16.94035021,
20.30110878,
19.69293106,
15.30509593,
6.82006767,
19.45441306,
7.88410649,
11.8368039 ,
19.67547632,
10.09133403,
9.8272711 ,
15.50161696,
12.39914823,
11.0682946 ,
17.9415563 ,
13.74435742,
12.38455495,
18.53852096,
6.05162411,
12.4891591 ,
5.37034248,
8.42401933, 4.46622956, 18.48695797, 16.49530044,
8.16531236, 12.78592082, 23.76732149, 15.17319554])
mean_squared_error(y_pred,y_DV)
2.784126314510936
r2_score(y_DV,y_pred)
0.8972106381789522
input=(38.2,3.7,17.2)
test=np.asarray(input)
test_reshape=test.reshape(1,-1)
t_pred=lr.predict(test_reshape)
C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature
names, but LinearRegression was fitted with feature names
warnings.warn(
t_pred
array([5.36681501])
print(f"The sales amount for above input={t_pred[0]}")
The sales amount for above input=5.366815008122364
def predict(Tv,Radio,Newspaper):
import numpy as np
inp=(Tv,Radio,Newspaper)
test=np.asarray(inp)
test_reshape=test.reshape(1,-1)
t_pred=lr.predict(test_reshape)
return t_pred[0]
predict(56.3,45,23)
Amount of Insurance:
C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature
names, but LinearRegression was fitted with feature names
warnings.warn(
13.97542732994054
import gradio as gr
C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature
names, but LinearRegression was fitted with feature names
warnings.warn(
demo = gr.Interface(
fn=predict,
inputs=["number", "number","number"],
outputs=["number"],
)
demo.launch()
Running on local URL:
http://127.0.0.1:7872
To create a public link, set `share=True` in `launch()`.
<IPython.core.display.HTML object>
Download