728x90
In [1]:
import pandas as pd
In [2]:
sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape')
In [12]:
sales_df.head()
Out[12]:
InvoiceNo | StockCode | Description | Quantity | InvoiceDate | UnitPrice | CustomerID | Country | |
---|---|---|---|---|---|---|---|---|
0 | 536365 | 85123A | WHITE HANGING HEART T-LIGHT HOLDER | 6 | 2010-12-01 08:26:00 | 2.55 | 17850.0 | United Kingdom |
1 | 536365 | 71053 | WHITE METAL LANTERN | 6 | 2010-12-01 08:26:00 | 3.39 | 17850.0 | United Kingdom |
2 | 536365 | 84406B | CREAM CUPID HEARTS COAT HANGER | 8 | 2010-12-01 08:26:00 | 2.75 | 17850.0 | United Kingdom |
3 | 536365 | 84029G | KNITTED UNION FLAG HOT WATER BOTTLE | 6 | 2010-12-01 08:26:00 | 3.39 | 17850.0 | United Kingdom |
4 | 536365 | 84029E | RED WOOLLY HOTTIE WHITE HEART. | 6 | 2010-12-01 08:26:00 | 3.39 | 17850.0 | United Kingdom |
In [29]:
sales_df.set_index(keys = ['InvoiceDate'], inplace=True)
sales_df
Out[29]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | Country | |
---|---|---|---|---|---|---|---|
InvoiceDate | |||||||
2010-12-01 08:26:00 | 536365 | 85123A | WHITE HANGING HEART T-LIGHT HOLDER | 6 | 2.55 | 17850.0 | United Kingdom |
2010-12-01 08:26:00 | 536365 | 71053 | WHITE METAL LANTERN | 6 | 3.39 | 17850.0 | United Kingdom |
2010-12-01 08:26:00 | 536365 | 84406B | CREAM CUPID HEARTS COAT HANGER | 8 | 2.75 | 17850.0 | United Kingdom |
2010-12-01 08:26:00 | 536365 | 84029G | KNITTED UNION FLAG HOT WATER BOTTLE | 6 | 3.39 | 17850.0 | United Kingdom |
2010-12-01 08:26:00 | 536365 | 84029E | RED WOOLLY HOTTIE WHITE HEART. | 6 | 3.39 | 17850.0 | United Kingdom |
... | ... | ... | ... | ... | ... | ... | ... |
2011-12-09 12:50:00 | 581587 | 22613 | PACK OF 20 SPACEBOY NAPKINS | 12 | 0.85 | 12680.0 | France |
2011-12-09 12:50:00 | 581587 | 22899 | CHILDREN'S APRON DOLLY GIRL | 6 | 2.10 | 12680.0 | France |
2011-12-09 12:50:00 | 581587 | 23254 | CHILDRENS CUTLERY DOLLY GIRL | 4 | 4.15 | 12680.0 | France |
2011-12-09 12:50:00 | 581587 | 23255 | CHILDRENS CUTLERY CIRCUS PARADE | 4 | 4.15 | 12680.0 | France |
2011-12-09 12:50:00 | 581587 | 22138 | BAKING SET 9 PIECE RETROSPOT | 3 | 4.95 | 12680.0 | France |
541909 rows × 7 columns
In [30]:
sales_df['Country'].unique()
Out[30]:
array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
'European Community', 'Malta', 'RSA'], dtype=object)
In [33]:
# Multi-index DataFrame
sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape')
sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True)
sales_df
Out[33]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | ||
---|---|---|---|---|---|---|---|
Country | InvoiceDate | ||||||
United Kingdom | 12/1/2010 8:26 | 536365 | 85123A | WHITE HANGING HEART T-LIGHT HOLDER | 6 | 2.55 | 17850.0 |
12/1/2010 8:26 | 536365 | 71053 | WHITE METAL LANTERN | 6 | 3.39 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84406B | CREAM CUPID HEARTS COAT HANGER | 8 | 2.75 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84029G | KNITTED UNION FLAG HOT WATER BOTTLE | 6 | 3.39 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84029E | RED WOOLLY HOTTIE WHITE HEART. | 6 | 3.39 | 17850.0 | |
... | ... | ... | ... | ... | ... | ... | ... |
France | 12/9/2011 12:50 | 581587 | 22613 | PACK OF 20 SPACEBOY NAPKINS | 12 | 0.85 | 12680.0 |
12/9/2011 12:50 | 581587 | 22899 | CHILDREN'S APRON DOLLY GIRL | 6 | 2.10 | 12680.0 | |
12/9/2011 12:50 | 581587 | 23254 | CHILDRENS CUTLERY DOLLY GIRL | 4 | 4.15 | 12680.0 | |
12/9/2011 12:50 | 581587 | 23255 | CHILDRENS CUTLERY CIRCUS PARADE | 4 | 4.15 | 12680.0 | |
12/9/2011 12:50 | 581587 | 22138 | BAKING SET 9 PIECE RETROSPOT | 3 | 4.95 | 12680.0 |
541909 rows × 6 columns
In [34]:
# sort countries
sales_df.sort_index(inplace = True)
sales_df
Out[34]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | ||
---|---|---|---|---|---|---|---|
Country | InvoiceDate | ||||||
Australia | 1/10/2011 9:58 | 540557 | 22523 | CHILDS GARDEN FORK PINK | 96 | 0.85 | 12415.0 |
1/11/2011 9:47 | 540700 | 21581 | SKULLS DESIGN COTTON TOTE BAG | 6 | 2.25 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22619 | SET OF 6 SOLDIER SKITTLES | 8 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 84997B | RED 3 PIECE RETROSPOT CUTLERY SET | 6 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 20727 | LUNCH BAG BLACK SKULL. | 20 | 1.65 | 12393.0 | |
... | ... | ... | ... | ... | ... | ... | ... |
Unspecified | 9/2/2011 12:17 | 565303 | 85227 | SET OF 6 3D KIT CARDS FOR KIDS | 4 | 0.85 | NaN |
9/2/2011 12:17 | 565303 | 22138 | BAKING SET 9 PIECE RETROSPOT | 2 | 4.95 | NaN | |
9/2/2011 12:17 | 565303 | 21889 | WOODEN BOX OF DOMINOES | 5 | 1.25 | NaN | |
9/2/2011 12:17 | 565303 | 22550 | HOLIDAY FUN LUDO | 2 | 3.75 | NaN | |
9/2/2011 12:17 | 565303 | 23127 | FELTCRAFT GIRL NICOLE KIT | 1 | 4.95 | NaN |
541909 rows × 6 columns
In [35]:
# access any element
sales_df.index
Out[35]:
MultiIndex([( 'Australia', '1/10/2011 9:58'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
( 'Australia', '1/11/2011 9:47'),
...
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17'),
('Unspecified', '9/2/2011 12:17')],
names=['Country', 'InvoiceDate'], length=541909)
In [36]:
sales_df.index.names
Out[36]:
FrozenList(['Country', 'InvoiceDate'])
In [37]:
# multiindex objects
type(sales_df.index)
Out[37]:
pandas.core.indexes.multi.MultiIndex
In [38]:
sales_df.index[0]
Out[38]:
('Australia', '1/10/2011 9:58')
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
[Pandas][DataFrame][MultiIndex]S4_05_Multi_indexing_operations2 (0) | 2023.01.21 |
---|---|
[Pandas][DataFrame][MultiIndex]S4_04_Multi_indexing_operations1 (0) | 2023.01.21 |
[Pandas][DataFrame][MultiIndex]S4_02_grouping (0) | 2023.01.21 |
[Pandas][DataFrame][MultiIndex]S4_01_Explore_dataset (0) | 2023.01.21 |
[Pandas][DataFrame][concat]S3_02_concatenation_with_multi_indexing (0) | 2023.01.21 |
댓글