728x90
In [1]:
import pandas as pd
In [2]:
bank_df = pd.read_csv('bank customers.csv')
bank_df
Out[2]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | 9996 | 15606229 | Obijiaku | 771 | France | Male | 39 | 5 | 0.00 | 2 | 1 | 0 | 96270.64 | 0 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 57369.61 | 1 | 1 | 1 | 101699.77 | 0 |
9997 | 9998 | 15584532 | Liu | 709 | France | Female | 36 | 7 | 0.00 | 1 | 0 | 1 | 42085.58 | 1 |
9998 | 9999 | 15682355 | Sabbatini | 772 | Germany | Male | 42 | 3 | 75075.31 | 2 | 1 | 0 | 92888.52 | 1 |
9999 | 10000 | 15628319 | Walker | 792 | France | Female | 28 | 4 | 130142.79 | 1 | 1 | 0 | 38190.78 | 0 |
10000 rows × 14 columns
In [3]:
# example
bank_df = bank_df[:10].copy()
bank_df
Out[3]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 |
5 | 6 | 15574012 | Chu | 645 | Spain | Male | 44 | 8 | 113755.78 | 2 | 1 | 0 | 149756.71 | 1 |
6 | 7 | 15592531 | Bartlett | 822 | France | Male | 50 | 7 | 0.00 | 2 | 1 | 1 | 10062.80 | 0 |
7 | 8 | 15656148 | Obinna | 376 | Germany | Female | 29 | 4 | 115046.74 | 4 | 1 | 0 | 119346.88 | 1 |
8 | 9 | 15792365 | He | 501 | France | Male | 44 | 4 | 142051.07 | 2 | 0 | 1 | 74940.50 | 0 |
9 | 10 | 15592389 | H? | 684 | France | Male | 27 | 2 | 134603.88 | 1 | 1 | 1 | 71725.73 | 0 |
In [4]:
# add a new column to the dataframe
bank_df['Years with Bank'] = [9,12,15,5,7,2,20,25,12,10]
dataframe.insert(location, column= , value=[])
In [5]:
# insert a new column in a given position
bank_df.insert(0, column = 'Postal Code',
value=['N94 3M0','N8S 14K','S1T 4E6','N7T 3E6','K8N 5H6','J7H 3HY','K8Y 3M8','J8Y 3M0','M6U 5U7','K2D 4M9'])
bank_df
Out[5]:
Postal Code | RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Years with Bank | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | N94 3M0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 | 9 |
1 | N8S 14K | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 | 12 |
2 | S1T 4E6 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 | 15 |
3 | N7T 3E6 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 | 5 |
4 | K8N 5H6 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 | 7 |
5 | J7H 3HY | 6 | 15574012 | Chu | 645 | Spain | Male | 44 | 8 | 113755.78 | 2 | 1 | 0 | 149756.71 | 1 | 2 |
6 | K8Y 3M8 | 7 | 15592531 | Bartlett | 822 | France | Male | 50 | 7 | 0.00 | 2 | 1 | 1 | 10062.80 | 0 | 20 |
7 | J8Y 3M0 | 8 | 15656148 | Obinna | 376 | Germany | Female | 29 | 4 | 115046.74 | 4 | 1 | 0 | 119346.88 | 1 | 25 |
8 | M6U 5U7 | 9 | 15792365 | He | 501 | France | Male | 44 | 4 | 142051.07 | 2 | 0 | 1 | 74940.50 | 0 | 12 |
9 | K2D 4M9 | 10 | 15592389 | H? | 684 | France | Male | 27 | 2 | 134603.88 | 1 | 1 | 1 | 71725.73 | 0 | 10 |
In [6]:
# Delete a column from a DataFrame
del bank_df['Postal Code']
bank_df
Out[6]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Years with Bank | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 | 9 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 | 12 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 | 15 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 | 5 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 | 7 |
5 | 6 | 15574012 | Chu | 645 | Spain | Male | 44 | 8 | 113755.78 | 2 | 1 | 0 | 149756.71 | 1 | 2 |
6 | 7 | 15592531 | Bartlett | 822 | France | Male | 50 | 7 | 0.00 | 2 | 1 | 1 | 10062.80 | 0 | 20 |
7 | 8 | 15656148 | Obinna | 376 | Germany | Female | 29 | 4 | 115046.74 | 4 | 1 | 0 | 119346.88 | 1 | 25 |
8 | 9 | 15792365 | He | 501 | France | Male | 44 | 4 | 142051.07 | 2 | 0 | 1 | 74940.50 | 0 | 12 |
9 | 10 | 15592389 | H? | 684 | France | Male | 27 | 2 | 134603.88 | 1 | 1 | 1 | 71725.73 | 0 | 10 |
In [7]:
# Remove a column
# column is axis = 1
bank_df.drop(labels=['RowNumber','Balance'], axis = 1, inplace = True)
bank_df
Out[7]:
CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Years with Bank | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 1 | 1 | 1 | 101348.88 | 1 | 9 |
1 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 1 | 0 | 1 | 112542.58 | 0 | 12 |
2 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 3 | 1 | 0 | 113931.57 | 1 | 15 |
3 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 2 | 0 | 0 | 93826.63 | 0 | 5 |
4 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 1 | 1 | 1 | 79084.10 | 0 | 7 |
5 | 15574012 | Chu | 645 | Spain | Male | 44 | 8 | 2 | 1 | 0 | 149756.71 | 1 | 2 |
6 | 15592531 | Bartlett | 822 | France | Male | 50 | 7 | 2 | 1 | 1 | 10062.80 | 0 | 20 |
7 | 15656148 | Obinna | 376 | Germany | Female | 29 | 4 | 4 | 1 | 0 | 119346.88 | 1 | 25 |
8 | 15792365 | He | 501 | France | Male | 44 | 4 | 2 | 0 | 1 | 74940.50 | 0 | 12 |
9 | 15592389 | H? | 684 | France | Male | 27 | 2 | 1 | 1 | 1 | 71725.73 | 0 | 10 |
In [8]:
# Remove a column from a DataFrame and store it somwhere else
Years_with_bank = bank_df.pop('Years with Bank')
Years_with_bank
Out[8]:
0 9
1 12
2 15
3 5
4 7
5 2
6 20
7 25
8 12
9 10
Name: Years with Bank, dtype: int64
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
[Pandas][DataFrame]S2_08_integer_index_Based_elements_selection (0) | 2023.01.21 |
---|---|
[Pandas][DataFrame]S2_07_Label_Based_elements_selection (0) | 2023.01.21 |
[Pandas][DataFrame]S2_05_selecting_columns (0) | 2023.01.21 |
[Pandas][DataFrame]S2_04_index_setting (0) | 2023.01.21 |
[Pandas][DataFrame]S2_03_Outputs (0) | 2023.01.21 |
댓글