Linear Regression in Python for Sales Prediction

import pandas as pd import seaborn as sns import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression import statsmodels.api as sm from sklearn.metrics import mean_squared_error,r2_score data=pd.read_csv(r"C:\Users\goura\Downloads\CWML\Advertising.csv") data 0 1 2 3 4 .. 195 196 197 198 199 Unnamed: 0 1 2 3 4 5 ... 196 197 198 199 200 TV 230.1 44.5 17.2 151.5 180.8 ... 38.2 94.2 177.0 283.6 232.1 radio 37.8 39.3 45.9 41.3 10.8 ... 3.7 4.9 9.3 42.0 8.6 newspaper 69.2 45.1 69.3 58.5 58.4 ... 13.8 8.1 6.4 66.2 8.7 sales 22.1 10.4 9.3 18.5 12.9 ... 7.6 9.7 12.8 25.5 13.4 [200 rows x 5 columns] data.info() <class 'pandas.core.frame.DataFrame'> RangeIndex: 200 entries, 0 to 199 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------------------- ----0 Unnamed: 0 200 non-null int64 1 TV 200 non-null float64 2 radio 200 non-null float64 3 newspaper 200 non-null float64 4 sales 200 non-null float64 dtypes: float64(4), int64(1) memory usage: 7.9 KB data.corr() Unnamed: 0 TV radio newspaper sales data.skew() Unnamed: 0 1.000000 0.017715 -0.110680 -0.154944 -0.051616 TV radio 0.017715 -0.110680 1.000000 0.054809 0.054809 1.000000 0.056648 0.354104 0.782224 0.576223 newspaper sales -0.154944 -0.051616 0.056648 0.782224 0.354104 0.576223 1.000000 0.228299 0.228299 1.000000 Unnamed: 0 0.000000 TV -0.069853 radio 0.094175 newspaper 0.894720 sales 0.407571 dtype: float64 data.hist() array([[<Axes: <Axes: [<Axes: <Axes: [<Axes: title={'center': title={'center': title={'center': title={'center': title={'center': data.dtypes Unnamed: 0 TV radio newspaper sales dtype: object int64 float64 float64 float64 float64 X_IV=data.iloc[:,1:4] y_DV=data["sales"] 'Unnamed: 0'}>, 'TV'}>], 'radio'}>, 'newspaper'}>], 'sales'}>, <Axes: >]], dtype=object) X_IV 0 1 2 3 4 .. 195 196 197 198 199 TV 230.1 44.5 17.2 151.5 180.8 ... 38.2 94.2 177.0 283.6 232.1 radio 37.8 39.3 45.9 41.3 10.8 ... 3.7 4.9 9.3 42.0 8.6 newspaper 69.2 45.1 69.3 58.5 58.4 ... 13.8 8.1 6.4 66.2 8.7 [200 rows x 3 columns] y_DV 0 1 2 3 4 22.1 10.4 9.3 18.5 12.9 ... 195 7.6 196 9.7 197 12.8 198 25.5 199 13.4 Name: sales, Length: 200, dtype: float64 from statsmodels.stats.api import linear_rainbow,het_goldfeldquandt myLrModel=sm.OLS(y_DV,X_IV).fit() sats=linear_rainbow(myLrModel) sats (0.9011628312360566, 0.6971094281960714) if sats[0]>.05: print("the p value is",round(sats[1],4),">0.05 so reject the null") else: print("the p value is",round(sats[1],4),"<0.05 so accept the null") the p value is 0.6971 >0.05 so reject the null trainer=LinearRegression() lr=trainer.fit(X_IV,y_DV) lr.coef_ array([ 0.04576465, 0.18853002, -0.00103749]) lr.intercept_ 2.938889369459412 y_pred=lr.predict(X_IV) y_pred array([20.52397441, 12.47834763, 7.0322992 , 20.81929952, 18.10076728, 15.6100386 , 21.6339338 , 17.00682618, 16.37766467, 15.16152314, 12.63121132, 21.2926106 , 5.74215558, 7.84904532, 17.77949782, 11.85832174, 11.44592364, 15.18140789, 9.99922965, 16.31492112, 13.87595844, 17.93641467, 14.7598741 , 12.91968895, 14.41010605, 9.12734958, 10.4276871 , 14.00141385, 9.48964097, 9.70853872, 18.36720534, 5.3075589 , 14.21383298, 14.22372138, 7.39497997, 24.78687031, 10.5871997 , 20.80301059, 12.33785482, 11.72975995, 17.28512918, 12.82365674, 14.7405382 , 14.98951429, 11.3460929 , 23.40590052, 17.2959832 , 8.87338673, 9.33981296, 8.52771254, 22.89067208, 9.01603163, 10.62693815, 4.47758904, 14.64794093, 11.59870739, 4.49631598, 12.63571716, 23.24248685, 6.12602215, 21.14027498, 11.97874744, 7.83807642, 10.57717411, 15.57779819, 11.45348627, 18.39902932, 15.29422368, 10.0095377 , 15.38485192, 13.55914568, 10.82439531, 14.35827373, 19.9793727 , 13.92809918, 9.69137313, 12.30767078, 12.12295317, 10.57712073, 23.22495716, 6.4891503 , 17.05167344, 7.63888314, 15.62347779, 21.59580326, 21.7226299 , 20.66297563, 12.77458802, 16.78426073, 12.0370073 , 10.36684881, 13.81190223, 10.17840799, 15.59378475, 19.15639616, 15.33707782, 17.64409385, 7.10850249, 13.88060985, 6.5707774 , 13.6264571 , 6.599669 , 8.44915012, 20.85125198, 19.24986927, 23.26086103, 16.3600003 , 10.0143112 , 14.94678206, 13.36324677, 7.60769238, 12.1620464 , 6.55467 , 17.07644223, 17.59782951, 3.72734086, 8.82630048, 9.95168206, 16.5459329 , 19.41053803, 18.86426829, 9.90868103, 13.96385684, 16.26362018, 19.94469957, 21.89805198, 13.21069202, 18.97657924, 9.90298206, 8.81331353, 14.42184212, 11.71127101, 21.22757378, 24.11860723, 14.76221142, 3.58725841, 16.40377623, 15.56609348, 15.0827909 , 22.25549161, 19.26692307, 9.76842795, 8.76480262, 12.26335941, 18.22390132, 10.38419866, 17.35163608, 17.1861428 , 11.97093887, 16.01099722, 24.13310013, 18.64430648, 13.18867186, 12.55084872, 18.43436638, 14.16607293, 8.14651887, 9.14402389, 7.57483051, 20.44761039, 8.88787996, 8.1681656 , 20.37443008, 18.13348698, 16.97773556, 21.10891244, 17.32931197, 9.67530328, 20.78136464, 16.92225511, 10.48212385, 16.94035021, 20.30110878, 19.69293106, 15.30509593, 6.82006767, 19.45441306, 7.88410649, 11.8368039 , 19.67547632, 10.09133403, 9.8272711 , 15.50161696, 12.39914823, 11.0682946 , 17.9415563 , 13.74435742, 12.38455495, 18.53852096, 6.05162411, 12.4891591 , 5.37034248, 8.42401933, 4.46622956, 18.48695797, 16.49530044, 8.16531236, 12.78592082, 23.76732149, 15.17319554]) mean_squared_error(y_pred,y_DV) 2.784126314510936 r2_score(y_DV,y_pred) 0.8972106381789522 input=(38.2,3.7,17.2) test=np.asarray(input) test_reshape=test.reshape(1,-1) t_pred=lr.predict(test_reshape) C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names warnings.warn( t_pred array([5.36681501]) print(f"The sales amount for above input={t_pred[0]}") The sales amount for above input=5.366815008122364 def predict(Tv,Radio,Newspaper): import numpy as np inp=(Tv,Radio,Newspaper) test=np.asarray(inp) test_reshape=test.reshape(1,-1) t_pred=lr.predict(test_reshape) return t_pred[0] predict(56.3,45,23) Amount of Insurance: C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names warnings.warn( 13.97542732994054 import gradio as gr C:\Users\goura\AppData\Roaming\Python\Python310\sitepackages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names warnings.warn( demo = gr.Interface( fn=predict, inputs=["number", "number","number"], outputs=["number"], ) demo.launch() Running on local URL: http://127.0.0.1:7872 To create a public link, set `share=True` in `launch()`. <IPython.core.display.HTML object>

Linear Regression in Python for Sales Prediction

Products

Support

Linear Regression in Python for Sales Prediction

Add this document to collection(s)

Add this document to saved

Suggest us how to improve StudyLib