728x90
In [ ]:
import pandas as pd
In [57]:
# Multi-index DataFrame
sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape')
sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True)
sales_df.sort_index(inplace=True)
sales_df
Out[57]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | ||
---|---|---|---|---|---|---|---|
Country | InvoiceDate | ||||||
Australia | 1/10/2011 9:58 | 540557 | 22523 | CHILDS GARDEN FORK PINK | 96 | 0.85 | 12415.0 |
1/11/2011 9:47 | 540700 | 21581 | SKULLS DESIGN COTTON TOTE BAG | 6 | 2.25 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22619 | SET OF 6 SOLDIER SKITTLES | 8 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 84997B | RED 3 PIECE RETROSPOT CUTLERY SET | 6 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 20727 | LUNCH BAG BLACK SKULL. | 20 | 1.65 | 12393.0 | |
... | ... | ... | ... | ... | ... | ... | ... |
Unspecified | 9/2/2011 12:17 | 565303 | 85227 | SET OF 6 3D KIT CARDS FOR KIDS | 4 | 0.85 | NaN |
9/2/2011 12:17 | 565303 | 22138 | BAKING SET 9 PIECE RETROSPOT | 2 | 4.95 | NaN | |
9/2/2011 12:17 | 565303 | 21889 | WOODEN BOX OF DOMINOES | 5 | 1.25 | NaN | |
9/2/2011 12:17 | 565303 | 22550 | HOLIDAY FUN LUDO | 2 | 3.75 | NaN | |
9/2/2011 12:17 | 565303 | 23127 | FELTCRAFT GIRL NICOLE KIT | 1 | 4.95 | NaN |
541909 rows × 6 columns
In [58]:
# access specific elements
sales_df.loc['Australia','1/11/2011 9:47']
Out[58]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | ||
---|---|---|---|---|---|---|---|
Country | InvoiceDate | ||||||
Australia | 1/11/2011 9:47 | 540700 | 21581 | SKULLS DESIGN COTTON TOTE BAG | 6 | 2.25 | 12393.0 |
1/11/2011 9:47 | 540700 | 22619 | SET OF 6 SOLDIER SKITTLES | 8 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 84997B | RED 3 PIECE RETROSPOT CUTLERY SET | 6 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 20727 | LUNCH BAG BLACK SKULL. | 20 | 1.65 | 12393.0 | |
1/11/2011 9:47 | 540700 | 20726 | LUNCH BAG WOODLAND | 20 | 1.65 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22383 | LUNCH BAG SUKI DESIGN | 10 | 1.65 | 12393.0 | |
1/11/2011 9:47 | 540700 | 21249 | WOODLAND HEIGHT CHART STICKERS | 6 | 2.95 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22378 | WALL TIDY RETROSPOT | 20 | 0.85 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22175 | PINK OWL SOFT TOY | 12 | 2.95 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22176 | BLUE OWL SOFT TOY | 12 | 2.95 | 12393.0 | |
1/11/2011 9:47 | 540700 | 84997C | BLUE 3 PIECE POLKADOT CUTLERY SET | 6 | 3.75 | 12393.0 | |
1/11/2011 9:47 | 540700 | 20728 | LUNCH BAG CARS BLUE | 20 | 1.65 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22382 | LUNCH BAG SPACEBOY DESIGN | 20 | 1.65 | 12393.0 | |
1/11/2011 9:47 | 540700 | 21915 | RED HARMONICA IN BOX | 12 | 1.25 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22549 | PICTURE DOMINOES | 12 | 1.45 | 12393.0 | |
1/11/2011 9:47 | 540700 | 21578 | WOODLAND DESIGN COTTON TOTE BAG | 12 | 2.25 | 12393.0 | |
1/11/2011 9:47 | 540700 | 21577 | SAVE THE PLANET COTTON TOTE BAG | 12 | 2.25 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22245 | HOOK, 1 HANGER ,MAGIC GARDEN | 12 | 0.85 | 12393.0 | |
1/11/2011 9:47 | 540700 | 22244 | 3 HOOK HANGER MAGIC GARDEN | 12 | 1.95 | 12393.0 |
In [61]:
# first argument references rows and the second argument references a column
# feed index as a tuple (to avoid confusion about arguments)
sales_df.loc[('Australia','1/11/2011 9:47'),'UnitPrice']
Out[61]:
Country InvoiceDate
Australia 1/11/2011 9:47 2.25
1/11/2011 9:47 3.75
1/11/2011 9:47 3.75
1/11/2011 9:47 1.65
1/11/2011 9:47 1.65
1/11/2011 9:47 1.65
1/11/2011 9:47 2.95
1/11/2011 9:47 0.85
1/11/2011 9:47 2.95
1/11/2011 9:47 2.95
1/11/2011 9:47 3.75
1/11/2011 9:47 1.65
1/11/2011 9:47 1.65
1/11/2011 9:47 1.25
1/11/2011 9:47 1.45
1/11/2011 9:47 2.25
1/11/2011 9:47 2.25
1/11/2011 9:47 0.85
1/11/2011 9:47 1.95
Name: UnitPrice, dtype: float64
In [62]:
# the numeric indexing
# use one index so no tuples are required
# 첫 행
sales_df.iloc[0]
Out[62]:
InvoiceNo 540557
StockCode 22523
Description CHILDS GARDEN FORK PINK
Quantity 96
UnitPrice 0.85
CustomerID 12415.0
Name: (Australia, 1/10/2011 9:58), dtype: object
In [63]:
# rows, columns with numeric indexing
sales_df.iloc[0,0]
Out[63]:
'540557'
In [64]:
# transpose indexes and columns
sales_df = sales_df.transpose()
sales_df.head(10)
Out[64]:
Country | Australia | ... | Unspecified | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
InvoiceDate | 1/10/2011 9:58 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | 1/11/2011 9:47 | ... | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 | 9/2/2011 12:17 |
InvoiceNo | 540557 | 540700 | 540700 | 540700 | 540700 | 540700 | 540700 | 540700 | 540700 | 540700 | ... | 565303 | 565303 | 565303 | 565303 | 565303 | 565303 | 565303 | 565303 | 565303 | 565303 |
StockCode | 22523 | 21581 | 22619 | 84997B | 20727 | 20726 | 22383 | 21249 | 22378 | 22175 | ... | 20983 | 20977 | 20982 | 23084 | 22549 | 85227 | 22138 | 21889 | 22550 | 23127 |
Description | CHILDS GARDEN FORK PINK | SKULLS DESIGN COTTON TOTE BAG | SET OF 6 SOLDIER SKITTLES | RED 3 PIECE RETROSPOT CUTLERY SET | LUNCH BAG BLACK SKULL. | LUNCH BAG WOODLAND | LUNCH BAG SUKI DESIGN | WOODLAND HEIGHT CHART STICKERS | WALL TIDY RETROSPOT | PINK OWL SOFT TOY | ... | 12 PENCILS TALL TUBE RED RETROSPOT | 36 PENCILS TUBE WOODLAND | 12 PENCILS TALL TUBE SKULLS | RABBIT NIGHT LIGHT | PICTURE DOMINOES | SET OF 6 3D KIT CARDS FOR KIDS | BAKING SET 9 PIECE RETROSPOT | WOODEN BOX OF DOMINOES | HOLIDAY FUN LUDO | FELTCRAFT GIRL NICOLE KIT |
Quantity | 96 | 6 | 8 | 6 | 20 | 20 | 10 | 6 | 20 | 12 | ... | 4 | 4 | 5 | 1 | 1 | 4 | 2 | 5 | 2 | 1 |
UnitPrice | 0.85 | 2.25 | 3.75 | 3.75 | 1.65 | 1.65 | 1.65 | 2.95 | 0.85 | 2.95 | ... | 0.85 | 1.25 | 0.85 | 2.08 | 1.45 | 0.85 | 4.95 | 1.25 | 3.75 | 4.95 |
CustomerID | 12415.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | 12393.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
6 rows × 541909 columns
In [67]:
sales_df.loc['UnitPrice',('Australia','1/10/2011 9:58')]
Out[67]:
Country InvoiceDate
Australia 1/10/2011 9:58 0.85
Name: UnitPrice, dtype: object
In [68]:
sales_df.loc['UnitPrice',('Australia',"1/10/2011 9:58"):('Belgium',"1/10/2011 9:58")]
Out[68]:
Country InvoiceDate
Australia 1/10/2011 9:58 0.85
1/11/2011 9:47 2.25
1/11/2011 9:47 3.75
1/11/2011 9:47 3.75
1/11/2011 9:47 1.65
...
Bahrain 5/9/2011 13:49 2.95
5/9/2011 13:49 2.95
5/9/2011 13:49 4.25
5/9/2011 13:49 4.25
5/9/2011 13:49 4.25
Name: UnitPrice, Length: 1679, dtype: object
In [69]:
sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape')
sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True)
In [71]:
# indexes are swopped
sales_df = sales_df.swaplevel()
sales_df
Out[71]:
InvoiceNo | StockCode | Description | Quantity | UnitPrice | CustomerID | ||
---|---|---|---|---|---|---|---|
Country | InvoiceDate | ||||||
United Kingdom | 12/1/2010 8:26 | 536365 | 85123A | WHITE HANGING HEART T-LIGHT HOLDER | 6 | 2.55 | 17850.0 |
12/1/2010 8:26 | 536365 | 71053 | WHITE METAL LANTERN | 6 | 3.39 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84406B | CREAM CUPID HEARTS COAT HANGER | 8 | 2.75 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84029G | KNITTED UNION FLAG HOT WATER BOTTLE | 6 | 3.39 | 17850.0 | |
12/1/2010 8:26 | 536365 | 84029E | RED WOOLLY HOTTIE WHITE HEART. | 6 | 3.39 | 17850.0 | |
... | ... | ... | ... | ... | ... | ... | ... |
France | 12/9/2011 12:50 | 581587 | 22613 | PACK OF 20 SPACEBOY NAPKINS | 12 | 0.85 | 12680.0 |
12/9/2011 12:50 | 581587 | 22899 | CHILDREN'S APRON DOLLY GIRL | 6 | 2.10 | 12680.0 | |
12/9/2011 12:50 | 581587 | 23254 | CHILDRENS CUTLERY DOLLY GIRL | 4 | 4.15 | 12680.0 | |
12/9/2011 12:50 | 581587 | 23255 | CHILDRENS CUTLERY CIRCUS PARADE | 4 | 4.15 | 12680.0 | |
12/9/2011 12:50 | 581587 | 22138 | BAKING SET 9 PIECE RETROSPOT | 3 | 4.95 | 12680.0 |
541909 rows × 6 columns
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
[matplotlib]S5_02_yahoo_finance (0) | 2023.01.21 |
---|---|
[matplotlib]S5_01_single_line_plot (0) | 2023.01.21 |
[Pandas][DataFrame][MultiIndex]S4_04_Multi_indexing_operations1 (0) | 2023.01.21 |
[Pandas][DataFrame][MultiIndex]S4_03_Multi_index_dataframe (0) | 2023.01.21 |
[Pandas][DataFrame][MultiIndex]S4_02_grouping (0) | 2023.01.21 |
댓글