728x90
In [68]:
import pandas as pd
import datetime as dt
In [69]:
avo_df = pd.read_csv('Avocado.csv')
avo_df
Out[69]:
Date | AveragePrice | Total Volume | type | region | |
---|---|---|---|---|---|
0 | 2015-12-27 | 1.33 | 64236.62 | conventional | Albany |
1 | 2015-12-20 | 1.35 | 54876.98 | conventional | Albany |
2 | 2015-12-13 | 0.93 | 118220.22 | conventional | Albany |
3 | 2015-12-06 | 1.08 | 78992.15 | conventional | Albany |
4 | 2015-11-29 | 1.28 | 51039.60 | conventional | Albany |
... | ... | ... | ... | ... | ... |
18244 | 2018-02-04 | 1.63 | 17074.83 | organic | WestTexNewMexico |
18245 | 2018-01-28 | 1.71 | 13888.04 | organic | WestTexNewMexico |
18246 | 2018-01-21 | 1.87 | 13766.76 | organic | WestTexNewMexico |
18247 | 2018-01-14 | 1.93 | 16205.22 | organic | WestTexNewMexico |
18248 | 2018-01-07 | 1.62 | 17489.58 | organic | WestTexNewMexico |
18249 rows × 5 columns
In [70]:
avo_df['Date'] = pd.to_datetime(avo_df['Date'])
avo_df.set_index(keys=['Date'], inplace = True)
avo_df
Out[70]:
AveragePrice | Total Volume | type | region | |
---|---|---|---|---|
Date | ||||
2015-12-27 | 1.33 | 64236.62 | conventional | Albany |
2015-12-20 | 1.35 | 54876.98 | conventional | Albany |
2015-12-13 | 0.93 | 118220.22 | conventional | Albany |
2015-12-06 | 1.08 | 78992.15 | conventional | Albany |
2015-11-29 | 1.28 | 51039.60 | conventional | Albany |
... | ... | ... | ... | ... |
2018-02-04 | 1.63 | 17074.83 | organic | WestTexNewMexico |
2018-01-28 | 1.71 | 13888.04 | organic | WestTexNewMexico |
2018-01-21 | 1.87 | 13766.76 | organic | WestTexNewMexico |
2018-01-14 | 1.93 | 16205.22 | organic | WestTexNewMexico |
2018-01-07 | 1.62 | 17489.58 | organic | WestTexNewMexico |
18249 rows × 4 columns
In [92]:
# Aggregating the data by month
avo_df.resample(rule='M').mean()
Out[92]:
AveragePrice | Total Volume | Day | Month | Year | |
---|---|---|---|---|---|
Date | |||||
2015-01-31 | 1.341444 | 8.102564e+05 | 17.800000 | 1.0 | 2015.0 |
2015-02-28 | 1.353580 | 7.505690e+05 | 14.000000 | 2.0 | 2015.0 |
2015-03-31 | 1.361981 | 7.751538e+05 | 15.000000 | 3.0 | 2015.0 |
2015-04-30 | 1.383449 | 7.966305e+05 | 15.500000 | 4.0 | 2015.0 |
2015-05-31 | 1.344685 | 8.958964e+05 | 17.000000 | 5.0 | 2015.0 |
2015-06-30 | 1.399074 | 8.838582e+05 | 17.500000 | 6.0 | 2015.0 |
2015-07-31 | 1.409444 | 8.256988e+05 | 15.500000 | 7.0 | 2015.0 |
2015-08-31 | 1.444481 | 7.763793e+05 | 16.000000 | 8.0 | 2015.0 |
2015-09-30 | 1.449144 | 7.620806e+05 | 16.500000 | 9.0 | 2015.0 |
2015-10-31 | 1.391968 | 7.078151e+05 | 14.500000 | 10.0 | 2015.0 |
2015-11-30 | 1.330611 | 6.841482e+05 | 15.000000 | 11.0 | 2015.0 |
2015-12-31 | 1.305244 | 6.868351e+05 | 16.524362 | 12.0 | 2015.0 |
2016-01-31 | 1.236991 | 8.828596e+05 | 13.500000 | 1.0 | 2016.0 |
2016-02-29 | 1.240278 | 9.678819e+05 | 15.000000 | 2.0 | 2016.0 |
2016-03-31 | 1.224213 | 9.055843e+05 | 16.500000 | 3.0 | 2016.0 |
2016-04-30 | 1.205023 | 9.301684e+05 | 13.500000 | 4.0 | 2016.0 |
2016-05-31 | 1.199722 | 1.039316e+06 | 15.000000 | 5.0 | 2016.0 |
2016-06-30 | 1.283056 | 9.498337e+05 | 15.500000 | 6.0 | 2016.0 |
2016-07-31 | 1.401333 | 8.653247e+05 | 17.000000 | 7.0 | 2016.0 |
2016-08-31 | 1.411111 | 8.583263e+05 | 17.500000 | 8.0 | 2016.0 |
2016-09-30 | 1.427870 | 8.386650e+05 | 14.500000 | 9.0 | 2016.0 |
2016-10-31 | 1.557093 | 6.670404e+05 | 16.000000 | 10.0 | 2016.0 |
2016-11-30 | 1.550023 | 6.023364e+05 | 16.500000 | 11.0 | 2016.0 |
2016-12-31 | 1.315995 | 7.672402e+05 | 14.500000 | 12.0 | 2016.0 |
2017-01-31 | 1.277500 | 9.876538e+05 | 15.000000 | 1.0 | 2017.0 |
2017-02-28 | 1.214792 | 1.091359e+06 | 15.500000 | 2.0 | 2017.0 |
2017-03-31 | 1.398935 | 8.307949e+05 | 15.500000 | 3.0 | 2017.0 |
2017-04-30 | 1.502611 | 9.070639e+05 | 16.000000 | 4.0 | 2017.0 |
2017-05-31 | 1.532593 | 9.854874e+05 | 17.500000 | 5.0 | 2017.0 |
2017-06-30 | 1.545395 | 9.544434e+05 | 14.467442 | 6.0 | 2017.0 |
2017-07-31 | 1.564926 | 8.794513e+05 | 16.000000 | 7.0 | 2017.0 |
2017-08-31 | 1.698958 | 7.831782e+05 | 16.500000 | 8.0 | 2017.0 |
2017-09-30 | 1.840949 | 6.592119e+05 | 13.500000 | 9.0 | 2017.0 |
2017-10-31 | 1.752444 | 6.718110e+05 | 15.000000 | 10.0 | 2017.0 |
2017-11-30 | 1.517292 | 7.512021e+05 | 15.500000 | 11.0 | 2017.0 |
2017-12-31 | 1.363852 | 8.506164e+05 | 17.000000 | 12.0 | 2017.0 |
2018-01-31 | 1.387431 | 9.946923e+05 | 17.500000 | 1.0 | 2018.0 |
2018-02-28 | 1.318704 | 1.152264e+06 | 14.500000 | 2.0 | 2018.0 |
2018-03-31 | 1.336458 | 1.053827e+06 | 14.500000 | 3.0 | 2018.0 |
In [93]:
import matplotlib.pyplot as plt
In [96]:
# average price per month
avo_df.resample(rule='M').mean()['AveragePrice'].plot(figsize=(10,5), marker='o', color = 'b')
Out[96]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21ab8b340>
In [95]:
# average price per quarter
avo_df.resample(rule='Q').mean()['AveragePrice'].plot(figsize=(10,5), marker='o', color = 'g')
Out[95]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21b475220>
In [97]:
# average price per annual
avo_df.resample(rule='A').mean()['AveragePrice'].plot(figsize=(10,5), marker='o', color = 'k')
Out[97]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21ab6ca00>
In [98]:
import seaborn as sns
In [99]:
plt.figure(figsize=(20,10))
sns.violinplot(y='AveragePrice', x='type', data = avo_df)
Out[99]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe212e686a0>
In [101]:
plt.figure(figsize=(15,8))
sns.distplot(avo_df['AveragePrice'], color='b')
/usr/local/lib/python3.8/dist-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
Out[101]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe20e4eaee0>
In [102]:
# categorical plots onto a FacetGrid
# show the relationship between a numerical and categorical variables
# obtain a lot of information from catplot
conventional = sns.catplot('AveragePrice', 'region', data = avo_df[avo_df['type']=='conventional'],hue = 'Year', height =10 )
/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
[Text]S8_02_Text_in_pandas_1 (0) | 2023.01.21 |
---|---|
[Text]S8_01_upper_lower (0) | 2023.01.21 |
[datetime]S7_04_Practical_example2 (0) | 2023.01.21 |
[datetime]S7_03_Practical_example1 (0) | 2023.01.21 |
[datetime]S7_02_Timestamp (0) | 2023.01.21 |
댓글