2/21/2021 Untitled13.ipynb - Colaboratory CSE 3020: Data Visualization DA- 1 16 PLOTS FROM A DATASET PRAVEEN RAJ M 19BCE0685 import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import seaborn as sns from collections import Counter data=pd.read_csv('/content/final.csv') da = pd.read_csv('/content/final.csv') 1. BARPLOT age_count=Counter(data.age) most_common_ages=age_count.most_common(5) x,y=zip(*most_common_ages) x,y=list(x),list(y) # visualization(barplot) plt.figure(figsize=(10,10)) ax=sns.barplot(x=x,y=y,palette=sns.cubehelix_palette(len(x))) plt.xlabel('Ages') plt.ylabel('Frequency') plt.title('Most Common 5 age of Students') plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 1/15 2/21/2021 Untitled13.ipynb - Colaboratory 2. POINT PLOT # Daily alcohol consumption vs First grade of each age # first grade avarage of each age age_list=list(data['age'].unique()) first_grades_avarage=[] for i in age_list: x=data[data['age']==i] first_grade_mean=sum(x.G1)/len(x) first_grades_avarage.append(first_grade_mean) df=pd.DataFrame({'age_list':age_list,'first_grades_avarage':first_grades_avarage}) new_index=(df['first_grades_avarage'].sort_values(ascending=False)).index.values sorted_data2=df.reindex(new_index) # alcohol consumptions avarage of each age li t li t(d t [' '] i ()) https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 2/15 2/21/2021 Untitled13.ipynb - Colaboratory age_list=list(data['age'].unique()) alcohol_consumptions=[] for i in age_list: x=data[data['age']==i] avarage_alcohol_consumptions=sum(x.Dalc)/len(x) alcohol_consumptions.append(avarage_alcohol_consumptions) df=pd.DataFrame({'age_list':age_list,'alcohol_consumptions':alcohol_consumptions}) new_index=(df['alcohol_consumptions'].sort_values(ascending=False)).index.values sorted_data3=df.reindex(new_index) # Daily alcohol cunsumptions vs First Grade of each age sorted_data2['first_grades_avarage']=sorted_data2['first_grades_avarage']/max(sorted_data2 sorted_data3['alcohol_consumptions']=sorted_data3['alcohol_consumptions']/max(sorted_data3 data=pd.concat([sorted_data2,sorted_data3['alcohol_consumptions']],axis=1) # visualization(Point Plot) f,ax=plt.subplots(figsize=(20,10)) sns.pointplot(x='age_list',y='alcohol_consumptions',data=data,color='lime',alpha=0.8) sns.pointplot(x='age_list',y='first_grades_avarage',data=data,color='red',alpha=0.8) plt.text(6,0.6,'alcohol consumptions',color='red',fontsize=17,style='italic') plt.text(6,0.55,'first grades avarage',color='lime',fontsize=18,style='italic') plt.xlabel('Ages',fontsize=15,color='blue') plt.ylabel('Values',fontsize=15,color='blue') plt.title('Daily alcohol consumptions vs first Grade of each age ',fontsize=20,color='blue plt.grid() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 3/15 2/21/2021 Untitled13.ipynb - Colaboratory 3.JOINT PLOT(KDE) # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s # visualization(joint plot) g=sns.jointplot(data.first_grades_avarage,data.alcohol_consumptions,kind='kde',height=7) plt.savefig('graph.png') plt.show() /usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas FutureWarning 4.JOINT PLOT # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s g=sns.jointplot('first_grades_avarage','alcohol_consumptions',data=data,height=5,ratio=3,c https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 4/15 2/21/2021 Untitled13.ipynb - Colaboratory /usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas FutureWarning 5.PIE CHART # Health rates according in data labels=da.health.value_counts().index colors=['grey','blue','red','yellow','green'] explode=[0,0,0,0,0] sizes=da.health.value_counts().values # visualization(Pie plot) plt.figure(figsize=(7,7)) plt.pie(sizes,explode=explode,labels=labels,colors=colors,autopct='%1.1f%%') plt.title('Health Rates According in data',color='blue',fontsize=15) plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 5/15 2/21/2021 Untitled13.ipynb - Colaboratory 6.KDE PLOT # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s # visualization(kdeplot) sns.kdeplot(data.first_grades_avarage,data.alcohol_consumptions,shade=True,cut=5) plt.show() /usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas FutureWarning 7.LM PLOT https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 6/15 2/21/2021 Untitled13.ipynb - Colaboratory # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s # visualization(lm-plot) sns.lmplot(x='first_grades_avarage',y='alcohol_consumptions',data=data) plt.show() 8.HEAT MAP # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s # visualization(Seaborn-Heatmap) f,ax=plt.subplots(figsize=(5,5)) sns.heatmap(data.corr(),annot=True,linewidths=.5,linecolor='r',fmt='.1f',ax=ax) plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 7/15 2/21/2021 Untitled13.ipynb - Colaboratory 9.BOX PLOT # Box plot # sex # first grade # reason sns.boxplot(x='sex',y='G1',hue='reason',data=da,palette='PRGn') plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 8/15 2/21/2021 Untitled13.ipynb - Colaboratory 10. SWARM PLOT # Swarm plot # visualization(Seaborn-Swarm plot) sns.swarmplot(x='sex',y='G1',hue='Pstatus',data=da) plt.show() /usr/local/lib/python3.6/dist-packages/seaborn/categorical.py:1296: UserWarning: 49. warnings.warn(msg, UserWarning) /usr/local/lib/python3.6/dist-packages/seaborn/categorical.py:1296: UserWarning: 36. warnings.warn(msg, UserWarning) 11. PAIR PLOT # Visualization of Daily alcohol cunsumptions vs First Grade of each ages with different s # Visualization(Seaborn-Pair Plot) data.drop(['age_list'],axis=1,inplace=True) sns.pairplot(data) plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 9/15 2/21/2021 Untitled13.ipynb - Colaboratory 12. COUNT PLOT # Count plot # sex sns.countplot(da.age) plt.title('age',color='blue',fontsize=15) plt.show() # school sns.countplot(da.school) plt.title('school',color='blue',fontsize=15) plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 10/15 2/21/2021 Untitled13.ipynb - Colaboratory /usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas FutureWarning /usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pas FutureWarning 13. SCATTER PLOT #Scatter Plot between G2 and G3 based on Mothers Job as category categories = np.unique(da['Mjob']) colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))] https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 11/15 2/21/2021 Untitled13.ipynb - Colaboratory # Draw Plot for Each Category plt.figure(figsize=(6, 6), dpi= 80, facecolor='w', edgecolor='k') for i, category in enumerate(categories): plt.scatter('G2', 'G3', data=da.loc[da.Mjob==category, :], s=20, c=colors[i], label=str(category)) # Decorations plt.gca().set(xlim=(0,22), ylim=(0, 22), xlabel='Free Time', ylabel='Age') plt.xticks(fontsize=12); plt.yticks(fontsize=12) plt.title("Scatterplot of Free Time vs Going Out", fontsize=13) plt.legend(fontsize=12) plt.show() *c* *c* *c* *c* *c* argument argument argument argument argument looks looks looks looks looks like like like like like a a a a a single single single single single numeric numeric numeric numeric numeric RGB RGB RGB RGB RGB or or or or or RGBA RGBA RGBA RGBA RGBA sequence, sequence, sequence, sequence, sequence, which which which which which should should should should should be be be be be avoid avoid avoid avoid avoid 14. VIOLIN PLOT #Violin Plot of G2 scores by Daily Alcohol # Draw Plot plt.figure(figsize=(5,5), dpi= 80) sns violinplot(x='Dalc' y='G2' data=da scale='width' inner='quartile') https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 12/15 2/21/2021 Untitled13.ipynb - Colaboratory sns.violinplot(x= Dalc , y= G2 , data=da, scale= width , inner= quartile ) # Decoration plt.title('Violin Plot of G2 scores by Daily Alcohol', fontsize=13) plt.show() 15. TREEMAP pip install squarify Collecting squarify Downloading https://files.pythonhosted.org/packages/0b/2b/2e77c35326efec19819cd1d7 Installing collected packages: squarify Successfully installed squarify-0.4.3 #Treemap of Health values import squarify labels=da.health.value_counts().index colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))] explode=[0,0,0,0,0] sizes=da.health.value_counts().values # Draw Plot plt.figure(figsize=(6,6), dpi= 80) squarify.plot(sizes=sizes, label=labels, color=colors, alpha=.8) # Decorate plt.title('Treemap of Health values') plt.axis('off') plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 13/15 2/21/2021 Untitled13.ipynb - Colaboratory 16. STACKED HISTOGRAM #Stacked Histogram of Weekly Alcohol colored by Family Relation Score # Prepare data x_var = 'Dalc' groupby_var = 'age' df_agg = da.loc[:, [x_var, groupby_var]].groupby(groupby_var) vals = [da[x_var].values.tolist() for i, da in df_agg] # Draw plt.figure(figsize=(8,8), dpi= 80) colors = [plt.cm.Spectral(i/float(len(vals)-1)) for i in range(len(vals))] n, bins, patches = plt.hist(vals, da[x_var].unique().__len__(), stacked=True, density=Fals # Decoration plt.legend({group:col for group, col in zip(np.unique(da[groupby_var]).tolist(), colors[:l plt.title(f"Stacked Histogram of ${x_var}$ colored by ${groupby_var}$", fontsize=13) plt.xlabel(x_var) plt.ylabel("Frequency") plt.ylim(0, 750) plt.xticks(ticks=bins, labels=np.unique(da[x_var]).tolist(), rotation=90, horizontalalignm plt.show() https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 14/15 2/21/2021 Untitled13.ipynb - Colaboratory /usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecation return array(a, dtype, copy=False, order=order) https://colab.research.google.com/drive/1WXteMZC2qGWuVMPcC8lnm7_YTo2S_54-#scrollTo=Rzny3nzWlMGN&printMode=true 15/15