728x90
In [1]:
import pandas as pd
In [2]:
bank_df = pd.read_csv('bank customers.csv')
bank_df
Out[2]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | 9996 | 15606229 | Obijiaku | 771 | France | Male | 39 | 5 | 0.00 | 2 | 1 | 0 | 96270.64 | 0 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 57369.61 | 1 | 1 | 1 | 101699.77 | 0 |
9997 | 9998 | 15584532 | Liu | 709 | France | Female | 36 | 7 | 0.00 | 1 | 0 | 1 | 42085.58 | 1 |
9998 | 9999 | 15682355 | Sabbatini | 772 | Germany | Male | 42 | 3 | 75075.31 | 2 | 1 | 0 | 92888.52 | 1 |
9999 | 10000 | 15628319 | Walker | 792 | France | Female | 28 | 4 | 130142.79 | 1 | 1 | 0 | 38190.78 | 0 |
10000 rows × 14 columns
In [12]:
# Pick certain rows that satisfy a certain criteria
df_loyal = bank_df[ (bank_df['EstimatedSalary'] > 100000) ]
df_loyal
Out[12]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.000 | 1 | 1 | 1 | 101348.88 | 1 | 1 | 8 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 92188.646 | 1 | 0 | 1 | 112542.58 | 0 | 744 | 4 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 175626.880 | 3 | 1 | 0 | 113931.57 | 1 | 5794 | 4 |
5 | 6 | 15574012 | Chu | 645 | Spain | Male | 44 | 8 | 125131.358 | 2 | 1 | 0 | 149756.71 | 1 | 2675 | 3 |
7 | 8 | 15656148 | Obinna | 376 | Germany | Female | 29 | 4 | 126551.414 | 4 | 1 | 0 | 119346.88 | 1 | 2782 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9983 | 9984 | 15656710 | Cocci | 613 | France | Male | 40 | 4 | 0.000 | 1 | 0 | 0 | 151325.24 | 0 | 1 | 5 |
9989 | 9990 | 15605622 | McMillan | 841 | Spain | Male | 28 | 4 | 0.000 | 2 | 1 | 1 | 179436.60 | 0 | 1 | 8 |
9992 | 9993 | 15657105 | Chukwualuka | 726 | Spain | Male | 36 | 2 | 0.000 | 1 | 1 | 0 | 195192.40 | 0 | 1 | 11 |
9994 | 9995 | 15719294 | Wood | 800 | France | Female | 29 | 2 | 0.000 | 2 | 0 | 0 | 167773.55 | 0 | 1 | 4 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 63106.571 | 1 | 1 | 1 | 101699.77 | 0 | 125 | 9 |
5010 rows × 16 columns
In [13]:
# Pick certain rows that satisfy 2 or more critirea
mask_1 = bank_df['EstimatedSalary'] > 100000
mask_2 = bank_df['Exited'] == 0
df_loyal = bank_df[mask_1 & mask_2]
df_loyal
Out[13]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 92188.646 | 1 | 0 | 1 | 112542.58 | 0 | 744 | 4 |
13 | 14 | 15691483 | Chin | 549 | France | Female | 25 | 5 | 0.000 | 2 | 0 | 0 | 190857.79 | 0 | 1 | 4 |
18 | 19 | 15661507 | Muldrow | 587 | Spain | Male | 45 | 6 | 0.000 | 1 | 0 | 0 | 158684.81 | 0 | 1 | 7 |
20 | 21 | 15577657 | McDonald | 732 | France | Male | 41 | 8 | 0.000 | 2 | 1 | 1 | 170886.17 | 0 | 1 | 8 |
21 | 22 | 15597945 | Dellucci | 636 | Spain | Female | 32 | 8 | 0.000 | 2 | 1 | 0 | 138555.46 | 0 | 1 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9983 | 9984 | 15656710 | Cocci | 613 | France | Male | 40 | 4 | 0.000 | 1 | 0 | 0 | 151325.24 | 0 | 1 | 5 |
9989 | 9990 | 15605622 | McMillan | 841 | Spain | Male | 28 | 4 | 0.000 | 2 | 1 | 1 | 179436.60 | 0 | 1 | 8 |
9992 | 9993 | 15657105 | Chukwualuka | 726 | Spain | Male | 36 | 2 | 0.000 | 1 | 1 | 0 | 195192.40 | 0 | 1 | 11 |
9994 | 9995 | 15719294 | Wood | 800 | France | Female | 29 | 2 | 0.000 | 2 | 0 | 0 | 167773.55 | 0 | 1 | 4 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 63106.571 | 1 | 1 | 1 | 101699.77 | 0 | 125 | 9 |
3966 rows × 16 columns
In [14]:
# Pick certain rows that satisfy a certain criteria
df_filtered = bank_df[bank_df['Surname'] == 'Hill']
df_filtered
Out[14]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 92188.646 | 1 | 0 | 1 | 112542.58 | 0 | 744 | 4 |
899 | 900 | 15632210 | Hill | 657 | Germany | Male | 25 | 2 | 188947.605 | 1 | 1 | 0 | 22745.50 | 0 | 6099 | 4 |
1123 | 1124 | 15596800 | Hill | 779 | Germany | Male | 33 | 1 | 174302.436 | 1 | 1 | 1 | 197000.92 | 1 | 5766 | 4 |
2112 | 2113 | 15631201 | Hill | 472 | Spain | Female | 28 | 4 | 0.000 | 2 | 1 | 0 | 1801.77 | 0 | 1 | 4 |
2366 | 2367 | 15623430 | Hill | 672 | France | Male | 34 | 9 | 0.000 | 2 | 1 | 0 | 161800.77 | 0 | 1 | 4 |
3903 | 3904 | 15678129 | Hill | 643 | Spain | Female | 45 | 9 | 165924.033 | 2 | 1 | 0 | 155516.35 | 0 | 5450 | 4 |
4070 | 4071 | 15611551 | Hill | 676 | Spain | Male | 48 | 1 | 144825.549 | 2 | 0 | 1 | 14152.15 | 0 | 4220 | 4 |
4115 | 4116 | 15584505 | Hill | 580 | France | Female | 23 | 5 | 125316.191 | 2 | 0 | 0 | 196241.43 | 0 | 2694 | 4 |
4388 | 4389 | 15801152 | Hill | 553 | Spain | Female | 39 | 1 | 157164.678 | 2 | 1 | 0 | 44363.42 | 0 | 5014 | 4 |
6556 | 6557 | 15612207 | Hill | 840 | Germany | Female | 51 | 1 | 96557.813 | 1 | 0 | 1 | 36687.11 | 1 | 889 | 4 |
7412 | 7413 | 15631693 | Hill | 697 | France | Male | 36 | 7 | 0.000 | 2 | 1 | 1 | 74760.32 | 0 | 1 | 4 |
7429 | 7430 | 15644878 | Hill | 685 | Spain | Female | 43 | 6 | 129032.882 | 1 | 0 | 0 | 68701.73 | 0 | 2983 | 4 |
8599 | 8600 | 15708713 | Hill | 633 | France | Male | 35 | 3 | 0.000 | 2 | 1 | 1 | 36249.76 | 0 | 1 | 4 |
8741 | 8742 | 15762855 | Hill | 622 | Spain | Female | 23 | 8 | 0.000 | 2 | 1 | 1 | 131389.39 | 0 | 1 | 4 |
8804 | 8805 | 15773973 | Hill | 765 | France | Male | 41 | 2 | 0.000 | 2 | 0 | 1 | 191215.61 | 0 | 1 | 4 |
9072 | 9073 | 15777315 | Hill | 529 | France | Male | 43 | 6 | 102977.985 | 2 | 0 | 0 | 98348.66 | 0 | 1168 | 4 |
9430 | 9431 | 15591980 | Hill | 753 | France | Male | 33 | 5 | 134824.855 | 2 | 1 | 1 | 82820.85 | 0 | 3441 | 4 |
In [17]:
mask = bank_df['Surname'].isin(['McMillan','Wood'])
bank_df[mask]
Out[17]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
169 | 170 | 15611325 | Wood | 682 | Germany | Male | 24 | 9 | 63722.791 | 2 | 0 | 0 | 53134.30 | 0 | 133 | 4 |
630 | 631 | 15712403 | McMillan | 589 | France | Female | 61 | 1 | 0.000 | 1 | 1 | 0 | 61108.56 | 1 | 1 | 8 |
952 | 953 | 15606149 | Wood | 571 | Germany | Female | 66 | 9 | 122734.711 | 1 | 0 | 1 | 189271.90 | 0 | 2499 | 4 |
1061 | 1062 | 15692991 | Wood | 710 | Spain | Female | 38 | 4 | 0.000 | 2 | 1 | 1 | 136390.88 | 0 | 1 | 4 |
2168 | 2169 | 15805260 | Wood | 705 | Germany | Female | 56 | 2 | 157574.637 | 1 | 1 | 0 | 88428.41 | 1 | 5036 | 4 |
3597 | 3598 | 15671630 | McMillan | 796 | Germany | Female | 40 | 1 | 109720.545 | 1 | 1 | 0 | 177524.19 | 0 | 1560 | 8 |
3932 | 3933 | 15763798 | McMillan | 680 | France | Male | 23 | 5 | 154007.909 | 1 | 0 | 1 | 31714.08 | 0 | 4836 | 8 |
3969 | 3970 | 15743673 | Wood | 551 | Spain | Male | 27 | 2 | 125260.542 | 1 | 1 | 1 | 85129.77 | 1 | 2685 | 4 |
4540 | 4541 | 15613017 | McMillan | 586 | Germany | Male | 32 | 1 | 164795.994 | 1 | 1 | 0 | 31830.06 | 0 | 5405 | 8 |
4880 | 4881 | 15611580 | Wood | 751 | Spain | Male | 33 | 4 | 87209.771 | 1 | 1 | 0 | 117547.76 | 0 | 553 | 4 |
5166 | 5167 | 15694644 | Wood | 455 | Spain | Female | 43 | 6 | 0.000 | 1 | 1 | 1 | 81250.79 | 0 | 1 | 4 |
5546 | 5547 | 15633818 | McMillan | 786 | France | Male | 32 | 9 | 0.000 | 2 | 1 | 0 | 133112.41 | 0 | 1 | 8 |
5724 | 5725 | 15784286 | Wood | 641 | Spain | Male | 40 | 5 | 112359.643 | 1 | 1 | 1 | 100637.07 | 0 | 1733 | 4 |
5897 | 5898 | 15709604 | McMillan | 781 | France | Male | 23 | 2 | 118176.828 | 1 | 1 | 0 | 173843.21 | 0 | 2147 | 8 |
5919 | 5920 | 15815043 | McMillan | 645 | Spain | Male | 49 | 8 | 0.000 | 2 | 1 | 0 | 162012.60 | 0 | 1 | 8 |
7016 | 7017 | 15790254 | Wood | 741 | Spain | Male | 50 | 1 | 86611.371 | 1 | 1 | 1 | 13018.96 | 0 | 535 | 4 |
7141 | 7142 | 15643056 | McMillan | 755 | Germany | Female | 38 | 1 | 90291.872 | 1 | 0 | 1 | 10333.78 | 0 | 665 | 8 |
7313 | 7314 | 15583067 | McMillan | 687 | France | Female | 36 | 4 | 106873.756 | 1 | 0 | 1 | 63185.05 | 0 | 1383 | 8 |
7692 | 7693 | 15807889 | Wood | 634 | Germany | Male | 74 | 5 | 119780.870 | 1 | 1 | 0 | 10078.02 | 0 | 2279 | 4 |
8337 | 8338 | 15568519 | Wood | 534 | France | Male | 41 | 9 | 0.000 | 2 | 1 | 0 | 13871.34 | 0 | 1 | 4 |
8832 | 8833 | 15765345 | Wood | 753 | France | Male | 35 | 4 | 0.000 | 2 | 1 | 1 | 106303.40 | 0 | 1 | 4 |
8856 | 8857 | 15759733 | McMillan | 774 | France | Female | 26 | 5 | 0.000 | 2 | 1 | 1 | 64716.08 | 0 | 1 | 8 |
8874 | 8875 | 15684318 | McMillan | 582 | Germany | Female | 50 | 6 | 106135.227 | 2 | 1 | 1 | 20344.02 | 0 | 1335 | 8 |
9369 | 9370 | 15795458 | McMillan | 718 | Spain | Female | 39 | 2 | 0.000 | 1 | 1 | 1 | 52138.49 | 0 | 1 | 8 |
9483 | 9484 | 15675983 | Wood | 692 | France | Female | 36 | 3 | 87506.232 | 1 | 0 | 1 | 178267.07 | 0 | 566 | 4 |
9604 | 9605 | 15687363 | McMillan | 770 | France | Male | 31 | 3 | 170552.316 | 2 | 1 | 1 | 186064.34 | 0 | 5643 | 8 |
9742 | 9743 | 15620577 | Wood | 715 | France | Male | 45 | 4 | 0.000 | 2 | 1 | 1 | 55043.93 | 0 | 1 | 4 |
9989 | 9990 | 15605622 | McMillan | 841 | Spain | Male | 28 | 4 | 0.000 | 2 | 1 | 1 | 179436.60 | 0 | 1 | 8 |
9994 | 9995 | 15719294 | Wood | 800 | France | Female | 29 | 2 | 0.000 | 2 | 0 | 0 | 167773.55 | 0 | 1 | 4 |
In [18]:
bank_df[bank_df['Balance'].between(5000,100000)]
Out[18]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 92188.646 | 1 | 0 | 1 | 112542.58 | 0 | 744 | 4 |
29 | 30 | 15656300 | Lucciano | 411 | France | Male | 29 | 0 | 65666.887 | 2 | 1 | 1 | 53483.21 | 0 | 153 | 8 |
31 | 32 | 15706552 | Odinakachukwu | 533 | France | Male | 36 | 7 | 93842.870 | 1 | 0 | 1 | 156731.91 | 0 | 800 | 13 |
37 | 38 | 15729599 | Lorenzo | 804 | Spain | Male | 33 | 7 | 84203.460 | 1 | 0 | 1 | 98453.45 | 0 | 468 | 7 |
39 | 40 | 15585768 | Cameron | 582 | Germany | Male | 41 | 6 | 77384.428 | 2 | 0 | 1 | 178074.04 | 0 | 320 | 7 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9984 | 9985 | 15696175 | Echezonachukwu | 602 | Germany | Male | 35 | 7 | 99662.662 | 2 | 1 | 1 | 51695.41 | 0 | 1019 | 14 |
9990 | 9991 | 15798964 | Nkemakonam | 714 | Germany | Male | 33 | 3 | 38518.260 | 1 | 1 | 0 | 53667.08 | 0 | 18 | 10 |
9991 | 9992 | 15769959 | Ajuluchukwu | 597 | France | Female | 53 | 4 | 97219.331 | 1 | 1 | 0 | 69384.71 | 1 | 917 | 11 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 63106.571 | 1 | 1 | 1 | 101699.77 | 0 | 125 | 9 |
9998 | 9999 | 15682355 | Sabbatini | 772 | Germany | Male | 42 | 3 | 82582.841 | 2 | 1 | 0 | 92888.52 | 1 | 428 | 9 |
1028 rows × 16 columns
In [20]:
# Check duplicated rows
bank_df['Surname'].duplicated(keep=False)
Out[20]:
0 False
1 True
2 True
3 True
4 True
...
9995 True
9996 True
9997 True
9998 True
9999 True
Name: Surname, Length: 10000, dtype: bool
In [21]:
# Not is symbol ~(a tilde)
~bank_df['Surname'].duplicated(keep=False)
Out[21]:
0 True
1 False
2 False
3 False
4 False
...
9995 False
9996 False
9997 False
9998 False
9999 False
Name: Surname, Length: 10000, dtype: bool
In [22]:
# Delete duplicated rows
mask = ~bank_df['Surname'].duplicated(keep=False)
bank_df[mask]
Out[22]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.000 | 1 | 1 | 1 | 101348.88 | 1 | 1 | 8 |
10 | 11 | 15767821 | Bearce | 528 | France | Male | 31 | 6 | 112218.392 | 2 | 0 | 0 | 80181.12 | 0 | 1720 | 6 |
15 | 16 | 15643966 | Goforth | 616 | Germany | Male | 45 | 3 | 157442.351 | 2 | 0 | 1 | 64327.26 | 0 | 5029 | 7 |
16 | 17 | 15737452 | Romeo | 653 | Germany | Male | 58 | 1 | 145863.168 | 1 | 1 | 0 | 5097.67 | 1 | 4294 | 5 |
18 | 19 | 15661507 | Muldrow | 587 | Spain | Male | 45 | 6 | 0.000 | 1 | 0 | 0 | 158684.81 | 0 | 1 | 7 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9950 | 9951 | 15638494 | Salinas | 625 | Germany | Female | 39 | 10 | 142829.786 | 1 | 1 | 1 | 96444.88 | 0 | 4085 | 7 |
9952 | 9953 | 15696355 | Cleveland | 724 | Germany | Male | 37 | 6 | 138038.340 | 1 | 1 | 0 | 118570.53 | 0 | 3695 | 9 |
9955 | 9956 | 15611338 | Kashiwagi | 714 | Spain | Male | 29 | 4 | 0.000 | 2 | 1 | 1 | 37605.90 | 0 | 1 | 9 |
9961 | 9962 | 15566543 | Aldridge | 573 | Spain | Male | 44 | 9 | 0.000 | 2 | 1 | 0 | 107124.17 | 0 | 1 | 8 |
9981 | 9982 | 15672754 | Burbidge | 498 | Germany | Male | 42 | 3 | 167243.670 | 1 | 1 | 1 | 53445.17 | 1 | 5500 | 8 |
1558 rows × 16 columns
In [24]:
# Drop duplicates
# inplace = True : 메모리 값을 변경하고 싶은 경우
# 중복값 중 하나 남기고 다 제거함
bank_df.drop_duplicates(subset=['Surname'], inplace = True)
bank_df
Out[24]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | Rank | Name Length | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.000 | 1 | 1 | 1 | 101348.88 | 1 | 1 | 8 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 92188.646 | 1 | 0 | 1 | 112542.58 | 0 | 744 | 4 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 175626.880 | 3 | 1 | 0 | 113931.57 | 1 | 5794 | 4 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.000 | 2 | 0 | 0 | 93826.63 | 0 | 1 | 4 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 138061.902 | 1 | 1 | 1 | 79084.10 | 0 | 3697 | 8 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9950 | 9951 | 15638494 | Salinas | 625 | Germany | Female | 39 | 10 | 142829.786 | 1 | 1 | 1 | 96444.88 | 0 | 4085 | 7 |
9952 | 9953 | 15696355 | Cleveland | 724 | Germany | Male | 37 | 6 | 138038.340 | 1 | 1 | 0 | 118570.53 | 0 | 3695 | 9 |
9955 | 9956 | 15611338 | Kashiwagi | 714 | Spain | Male | 29 | 4 | 0.000 | 2 | 1 | 1 | 37605.90 | 0 | 1 | 9 |
9961 | 9962 | 15566543 | Aldridge | 573 | Spain | Male | 44 | 9 | 0.000 | 2 | 1 | 0 | 107124.17 | 0 | 1 | 8 |
9981 | 9982 | 15672754 | Burbidge | 498 | Germany | Male | 42 | 3 | 167243.670 | 1 | 1 | 1 | 53445.17 | 1 | 5500 | 8 |
2932 rows × 16 columns
In [26]:
# filter
bank_df = pd.read_csv('bank customers.csv')
bank_df
Out[26]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 15634602 | Hargrave | 619 | France | Female | 42 | 2 | 0.00 | 1 | 1 | 1 | 101348.88 | 1 |
1 | 2 | 15647311 | Hill | 608 | Spain | Female | 41 | 1 | 83807.86 | 1 | 0 | 1 | 112542.58 | 0 |
2 | 3 | 15619304 | Onio | 502 | France | Female | 42 | 8 | 159660.80 | 3 | 1 | 0 | 113931.57 | 1 |
3 | 4 | 15701354 | Boni | 699 | France | Female | 39 | 1 | 0.00 | 2 | 0 | 0 | 93826.63 | 0 |
4 | 5 | 15737888 | Mitchell | 850 | Spain | Female | 43 | 2 | 125510.82 | 1 | 1 | 1 | 79084.10 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | 9996 | 15606229 | Obijiaku | 771 | France | Male | 39 | 5 | 0.00 | 2 | 1 | 0 | 96270.64 | 0 |
9996 | 9997 | 15569892 | Johnstone | 516 | France | Male | 35 | 10 | 57369.61 | 1 | 1 | 1 | 101699.77 | 0 |
9997 | 9998 | 15584532 | Liu | 709 | France | Female | 36 | 7 | 0.00 | 1 | 0 | 1 | 42085.58 | 1 |
9998 | 9999 | 15682355 | Sabbatini | 772 | Germany | Male | 42 | 3 | 75075.31 | 2 | 1 | 0 | 92888.52 | 1 |
9999 | 10000 | 15628319 | Walker | 792 | France | Female | 28 | 4 | 130142.79 | 1 | 1 | 0 | 38190.78 | 0 |
10000 rows × 14 columns
In [27]:
# filter the DataFrame using the where method
# all rows that don't satisfy this critirea are set to NaN
mask = bank_df['Balance'] <= 20000
bank_df.where(mask)
Out[27]:
RowNumber | CustomerId | Surname | CreditScore | Geography | Gender | Age | Tenure | Balance | NumOfProducts | HasCrCard | IsActiveMember | EstimatedSalary | Exited | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 15634602.0 | Hargrave | 619.0 | France | Female | 42.0 | 2.0 | 0.0 | 1.0 | 1.0 | 1.0 | 101348.88 | 1.0 |
1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 4.0 | 15701354.0 | Boni | 699.0 | France | Female | 39.0 | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 93826.63 | 0.0 |
4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | 9996.0 | 15606229.0 | Obijiaku | 771.0 | France | Male | 39.0 | 5.0 | 0.0 | 2.0 | 1.0 | 0.0 | 96270.64 | 0.0 |
9996 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
9997 | 9998.0 | 15584532.0 | Liu | 709.0 | France | Female | 36.0 | 7.0 | 0.0 | 1.0 | 0.0 | 1.0 | 42085.58 | 1.0 |
9998 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
9999 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
10000 rows × 14 columns
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
[Pandas][DataFrame]S2_14_change_datatypes (0) | 2023.01.21 |
---|---|
[Pandas][DataFrame]S2_13_FeatureEngineering (0) | 2023.01.21 |
[Pandas][DataFrame]S2_11_define_functions (0) | 2023.01.21 |
[Pandas][DataFrame]S2_10_sorting_and_ordering (0) | 2023.01.21 |
[Pandas][DataFrame]S2_09_Broadcasting (0) | 2023.01.21 |
댓글