Uploaded by praveenraj2001.007

19BCE0685 VL2020210504787 AST01

advertisement
2/21/2021
Untitled13.ipynb - Colaboratory
CSE 3020: Data Visualization
DA- 1
16 PLOTS FROM A DATASET
PRAVEEN RAJ M
19BCE0685
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
data=pd.read_csv('/content/final.csv')
da = pd.read_csv('/content/final.csv')
1. BARPLOT
age_count=Counter(data.age)
most_common_ages=age_count.most_common(5)
x,y=zip(*most_common_ages)
x,y=list(x),list(y)
# visualization(barplot)
plt.figure(figsize=(10,10))
ax=sns.barplot(x=x,y=y,palette=sns.cubehelix_palette(len(x)))
plt.xlabel('Ages')
plt.ylabel('Frequency')
plt.title('Most Common 5 age of Students')
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
1/15
2/21/2021
Untitled13.ipynb - Colaboratory
2. POINT PLOT
#
Daily alcohol consumption vs First grade of each age
# first grade avarage of each age
age_list=list(data['age'].unique())
first_grades_avarage=[]
for i in age_list:
x=data[data['age']==i]
first_grade_mean=sum(x.G1)/len(x)
first_grades_avarage.append(first_grade_mean)
df=pd.DataFrame({'age_list':age_list,'first_grades_avarage':first_grades_avarage})
new_index=(df['first_grades_avarage'].sort_values(ascending=False)).index.values
sorted_data2=df.reindex(new_index)
# alcohol consumptions avarage of each age
li t li t(d t ['
']
i
())
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
2/15
2/21/2021
Untitled13.ipynb - Colaboratory
age_list=list(data['age'].unique())
alcohol_consumptions=[]
for i in age_list:
x=data[data['age']==i]
avarage_alcohol_consumptions=sum(x.Dalc)/len(x)
alcohol_consumptions.append(avarage_alcohol_consumptions)
df=pd.DataFrame({'age_list':age_list,'alcohol_consumptions':alcohol_consumptions})
new_index=(df['alcohol_consumptions'].sort_values(ascending=False)).index.values
sorted_data3=df.reindex(new_index)
# Daily alcohol cunsumptions vs First Grade of
each age
sorted_data2['first_grades_avarage']=sorted_data2['first_grades_avarage']/max(sorted_data2
sorted_data3['alcohol_consumptions']=sorted_data3['alcohol_consumptions']/max(sorted_data3
data=pd.concat([sorted_data2,sorted_data3['alcohol_consumptions']],axis=1)
# visualization(Point Plot)
f,ax=plt.subplots(figsize=(20,10))
sns.pointplot(x='age_list',y='alcohol_consumptions',data=data,color='lime',alpha=0.8)
sns.pointplot(x='age_list',y='first_grades_avarage',data=data,color='red',alpha=0.8)
plt.text(6,0.6,'alcohol consumptions',color='red',fontsize=17,style='italic')
plt.text(6,0.55,'first grades avarage',color='lime',fontsize=18,style='italic')
plt.xlabel('Ages',fontsize=15,color='blue')
plt.ylabel('Values',fontsize=15,color='blue')
plt.title('Daily alcohol consumptions vs first Grade of each age ',fontsize=20,color='blue
plt.grid()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
3/15
2/21/2021
Untitled13.ipynb - Colaboratory
3.JOINT PLOT(KDE)
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
# visualization(joint plot)
g=sns.jointplot(data.first_grades_avarage,data.alcohol_consumptions,kind='kde',height=7)
plt.savefig('graph.png')
plt.show()
/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas
FutureWarning
4.JOINT PLOT
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
g=sns.jointplot('first_grades_avarage','alcohol_consumptions',data=data,height=5,ratio=3,c
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
4/15
2/21/2021
Untitled13.ipynb - Colaboratory
/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas
FutureWarning
5.PIE CHART
# Health rates according in data
labels=da.health.value_counts().index
colors=['grey','blue','red','yellow','green']
explode=[0,0,0,0,0]
sizes=da.health.value_counts().values
# visualization(Pie plot)
plt.figure(figsize=(7,7))
plt.pie(sizes,explode=explode,labels=labels,colors=colors,autopct='%1.1f%%')
plt.title('Health Rates According in data',color='blue',fontsize=15)
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
5/15
2/21/2021
Untitled13.ipynb - Colaboratory
6.KDE PLOT
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
# visualization(kdeplot)
sns.kdeplot(data.first_grades_avarage,data.alcohol_consumptions,shade=True,cut=5)
plt.show()
/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas
FutureWarning
7.LM PLOT
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
6/15
2/21/2021
Untitled13.ipynb - Colaboratory
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
# visualization(lm-plot)
sns.lmplot(x='first_grades_avarage',y='alcohol_consumptions',data=data)
plt.show()
8.HEAT MAP
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
# visualization(Seaborn-Heatmap)
f,ax=plt.subplots(figsize=(5,5))
sns.heatmap(data.corr(),annot=True,linewidths=.5,linecolor='r',fmt='.1f',ax=ax)
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
7/15
2/21/2021
Untitled13.ipynb - Colaboratory
9.BOX PLOT
# Box plot
# sex
# first grade
# reason
sns.boxplot(x='sex',y='G1',hue='reason',data=da,palette='PRGn')
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
8/15
2/21/2021
Untitled13.ipynb - Colaboratory
10. SWARM PLOT
# Swarm plot
# visualization(Seaborn-Swarm plot)
sns.swarmplot(x='sex',y='G1',hue='Pstatus',data=da)
plt.show()
/usr/local/lib/python3.6/dist-packages/seaborn/categorical.py:1296: UserWarning: 49.
warnings.warn(msg, UserWarning)
/usr/local/lib/python3.6/dist-packages/seaborn/categorical.py:1296: UserWarning: 36.
warnings.warn(msg, UserWarning)
11. PAIR PLOT
# Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s
# Visualization(Seaborn-Pair Plot)
data.drop(['age_list'],axis=1,inplace=True)
sns.pairplot(data)
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
9/15
2/21/2021
Untitled13.ipynb - Colaboratory
12. COUNT PLOT
# Count plot
# sex
sns.countplot(da.age)
plt.title('age',color='blue',fontsize=15)
plt.show()
# school
sns.countplot(da.school)
plt.title('school',color='blue',fontsize=15)
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
10/15
2/21/2021
Untitled13.ipynb - Colaboratory
/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas
FutureWarning
/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas
FutureWarning
13. SCATTER PLOT
#Scatter Plot between G2 and G3 based on Mothers Job as category
categories = np.unique(da['Mjob'])
colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
11/15
2/21/2021
Untitled13.ipynb - Colaboratory
# Draw Plot for Each Category
plt.figure(figsize=(6, 6), dpi= 80, facecolor='w', edgecolor='k')
for i, category in enumerate(categories):
plt.scatter('G2', 'G3',
data=da.loc[da.Mjob==category, :],
s=20, c=colors[i], label=str(category))
# Decorations
plt.gca().set(xlim=(0,22), ylim=(0, 22),
xlabel='Free Time', ylabel='Age')
plt.xticks(fontsize=12); plt.yticks(fontsize=12)
plt.title("Scatterplot of Free Time vs Going Out", fontsize=13)
plt.legend(fontsize=12)
plt.show()
*c*
*c*
*c*
*c*
*c*
argument
argument
argument
argument
argument
looks
looks
looks
looks
looks
like
like
like
like
like
a
a
a
a
a
single
single
single
single
single
numeric
numeric
numeric
numeric
numeric
RGB
RGB
RGB
RGB
RGB
or
or
or
or
or
RGBA
RGBA
RGBA
RGBA
RGBA
sequence,
sequence,
sequence,
sequence,
sequence,
which
which
which
which
which
should
should
should
should
should
be
be
be
be
be
avoid
avoid
avoid
avoid
avoid
14. VIOLIN PLOT
#Violin Plot of G2 scores by Daily Alcohol
# Draw Plot
plt.figure(figsize=(5,5), dpi= 80)
sns violinplot(x='Dalc' y='G2' data=da scale='width'
inner='quartile')
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
12/15
2/21/2021
Untitled13.ipynb - Colaboratory
sns.violinplot(x= Dalc , y= G2 , data=da, scale= width , inner= quartile )
# Decoration
plt.title('Violin Plot of G2 scores by Daily Alcohol', fontsize=13)
plt.show()
15. TREEMAP
pip install squarify
Collecting squarify
Downloading https://files.pythonhosted.org/packages/0b/2b/2e77c35326efec19819cd1d7
Installing collected packages: squarify
Successfully installed squarify-0.4.3
#Treemap of Health values
import squarify
labels=da.health.value_counts().index
colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))]
explode=[0,0,0,0,0]
sizes=da.health.value_counts().values
# Draw Plot
plt.figure(figsize=(6,6), dpi= 80)
squarify.plot(sizes=sizes, label=labels, color=colors, alpha=.8)
# Decorate
plt.title('Treemap of Health values')
plt.axis('off')
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
13/15
2/21/2021
Untitled13.ipynb - Colaboratory
16. STACKED HISTOGRAM
#Stacked Histogram of Weekly Alcohol colored by Family Relation Score
# Prepare data
x_var = 'Dalc'
groupby_var = 'age'
df_agg = da.loc[:, [x_var, groupby_var]].groupby(groupby_var)
vals = [da[x_var].values.tolist() for i, da in df_agg]
# Draw
plt.figure(figsize=(8,8), dpi= 80)
colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))]
n, bins, patches = plt.hist(vals, da[x_var].unique().__len__(), stacked=True, density=Fals
# Decoration
plt.legend({group:col for group, col in zip(np.unique(da[groupby_var]).tolist(), colors[:l
plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=13)
plt.xlabel(x_var)
plt.ylabel("Frequency")
plt.ylim(0, 750)
plt.xticks(ticks=bins, labels=np.unique(da[x_var]).tolist(), rotation=90, horizontalalignm
plt.show()
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
14/15
2/21/2021
Untitled13.ipynb - Colaboratory
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecation
return array(a, dtype, copy=False, order=order)
https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true
15/15
Download