Uploaded by Rahul Bhangarwala

3213 Dilip Parmar Exp no 1

advertisement
In [1]: import pandas as pd
import numpy as np
In [2]: import csv
In [3]: ! pip install opendatasets
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
already
already
already
already
already
already
already
already
already
already
already
already
already
already
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
opendatasets in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (0.1.22)
tqdm in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from opendatasets) (4.65.0)
kaggle in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from opendatasets) (1.5.13)
click in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from opendatasets) (8.1.3)
colorama in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from click->opendatasets) (0.4.6)
six>=1.10 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (1.16.0)
certifi in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (2022.12.7)
python-dateutil in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (2.8.2)
requests in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (2.28.2)
python-slugify in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (8.0.1)
urllib3 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from kaggle->opendatasets) (1.26.15)
text-unidecode>=1.3 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from python-slugify->kaggle->opendatasets) (1.3)
charset-normalizer<4,>=2 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from requests->kaggle->opendatasets) (3.1.0)
idna<4,>=2.5 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from requests->kaggle->opendatasets) (3.4)
[notice] A new release of pip available: 22.3.1 -> 23.1
[notice] To update, run: python.exe -m pip install --upgrade pip
In [4]: import opendatasets as od
dataset = "https://www.kaggle.com/datasets/kurtnakasato/imdb-100000-moviestvshows"
In [5]: data = od.download(dataset)
data
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: divyadeepakpatil
Your Kaggle Key: ········
Downloading imdb-100000-moviestvshows.zip to .\imdb-100000-moviestvshows
100%|██████████████████████████████████████████████████████████████████████████████| 9.81M/9.81M [00:22<00:00, 466kB/s]
In [6]: df= pd.read_csv("contentDataGenre.csv")
In [7]: df
dataId
genre
0
102795
Drama
1
102796
Documentary
2
102796
Comedy
3
102797
Documentary
4
102797
Horror
...
...
...
219208
499
Fantasy
219209
500
Action
219210
500
Mystery
219211
500
Sci-Fi
219212
0
Drama
Out[7]:
219213 rows × 2 columns
In [8]: df.head()
dataId
genre
0
102795
Drama
1
102796
Documentary
2
102796
Comedy
3
102797
Documentary
4
102797
Horror
Out[8]:
In [9]: df.isnull()
dataId
genre
0
False
False
1
False
False
2
False
False
3
False
False
4
False
False
...
...
...
219208
False
False
219209
False
False
219210
False
False
219211
False
False
219212
False
False
Out[9]:
219213 rows × 2 columns
In [10]: df.isnull().sum()
Out[10]:
dataId
0
genre
0
dtype: int64
In [11]: df.describe()
dataId
Out[11]:
count
219213.000000
mean
50280.744933
std
30137.677555
min
0.000000
25%
23606.000000
50%
50181.000000
75%
76251.000000
max
103293.000000
In [12]: df.dtypes
Out[12]:
dataId
int64
genre
object
dtype: object
In [14]: ! pip install matplotlib
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
Requirement
already
already
already
already
already
already
already
already
already
already
already
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
satisfied:
matplotlib in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (3.7.1)
contourpy>=1.0.1 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (1.0.7)
cycler>=0.10 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (0.11.0)
fonttools>=4.22.0 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (4.39.3)
kiwisolver>=1.0.1 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (1.4.4)
numpy>=1.20 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (1.24.2)
packaging>=20.0 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (23.1)
pillow>=6.2.0 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (9.5.0)
pyparsing>=2.3.1 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (3.0.9)
python-dateutil>=2.7 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from matplotlib) (2.8.2)
six>=1.5 in c:\users\divya patil\appdata\local\programs\python\python311\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
[notice] A new release of pip available: 22.3.1 -> 23.1
[notice] To update, run: python.exe -m pip install --upgrade pip
In [15]: import matplotlib.pyplot as plt
In [16]: df.plot()
Out[16]:
<Axes: >
In [17]: size=df.size
In [18]: shape=df.shape
In [19]: df_ndim=df.ndim
In [22]: dummies=pd.get_dummies(df.genre)
merged=pd.concat([df,dummies], axis ='columns')
In [23]: merged=pd.concat([df,dummies], axis ='columns')
In [24]: merged.drop(['genre'],axis='columns')
dataId
Action
Adventure
Animation
Biography
Comedy
Crime
Documentary
Drama
Family
...
Sci-Fi
Short
Sport
Talk-Show
Thriller
War
Western
l
n
u
0
102795
False
False
False
False
False
False
False
True
False
...
False
False
False
False
False
False
False
False
False
False
1
102796
False
False
False
False
False
False
True
False
False
...
False
False
False
False
False
False
False
False
False
False
2
102796
False
False
False
False
True
False
False
False
False
...
False
False
False
False
False
False
False
False
False
False
3
102797
False
False
False
False
False
False
True
False
False
...
False
False
False
False
False
False
False
False
False
False
4
102797
False
False
False
False
False
False
False
False
False
...
False
False
False
False
False
False
False
False
False
False
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
219208
499
False
False
False
False
False
False
False
False
False
...
False
False
False
False
False
False
False
False
False
False
219209
500
True
False
False
False
False
False
False
False
False
...
False
False
False
False
False
False
False
False
False
False
219210
500
False
False
False
False
False
False
False
False
False
...
False
False
False
False
False
False
False
False
False
False
219211
500
False
False
False
False
False
False
False
False
False
...
True
False
False
False
False
False
False
False
False
False
219212
0
False
False
False
False
False
False
False
True
False
...
False
False
False
False
False
False
False
False
False
False
Out[24]:
219213 rows × 31 columns
In [25]: print(merged)
0
1
2
3
4
...
219208
219209
219210
219211
219212
dataId
102795
102796
102796
102797
102797
...
499
500
500
500
0
genre
Drama
Documentary
Comedy
Documentary
Horror
...
Fantasy
Action
Mystery
Sci-Fi
Drama
0
1
2
3
4
...
219208
219209
219210
219211
219212
Crime
False
False
False
False
False
...
False
False
False
False
False
0
1
2
3
4
...
219208
219209
219210
219211
219212
Thriller
False
False
False
False
False
...
False
False
False
False
False
Documentary
False
True
False
True
False
...
False
False
False
False
False
War
False
False
False
False
False
...
False
False
False
False
False
Drama
True
False
False
False
False
...
False
False
False
False
True
Western
False
False
False
False
False
...
False
False
False
False
False
[219213 rows x 32 columns]
In [ ]:
Action
False
False
False
False
False
...
False
True
False
False
False
Adventure
False
False
False
False
False
...
False
False
False
False
False
...
...
...
...
...
...
...
...
...
...
...
...
l
False
False
False
False
False
...
False
False
False
False
False
Animation
False
False
False
False
False
...
False
False
False
False
False
Sci-Fi
False
False
False
False
False
...
False
False
False
True
False
Short
False
False
False
False
False
...
False
False
False
False
False
n
False
False
False
False
False
...
False
False
False
False
False
u
False
False
False
False
False
...
False
False
False
False
False
Biography
False
False
False
False
False
...
False
False
False
False
False
Sport
False
False
False
False
False
...
False
False
False
False
False
Comedy
False
False
True
False
False
...
False
False
False
False
False
Talk-Show
False
False
False
False
False
...
False
False
False
False
False
\
\
Download