본문 바로가기
728x90

분류 전체보기279

[matplotlib]S5_03_Multiple_plots In [41]: import matplotlib.pyplot as plt import pandas as pd import datetime In [49]: # data investment_df = pd.read_csv('coin_daily_prices.csv'); investment_df Out[49]: Date BTC-USD Price ETH-USD Price LTC-USD Price 0 2013-04-29 23:59 134.444000 NaN 4.366760 1 2013-04-30 23:59 144.000000 NaN 4.403520 2 2013-05-01 23:59 139.000000 NaN 4.289540 3 2013-05-02 23:59 116.379997 NaN 3.780020 4 2013-05.. 2023. 1. 21.
[matplotlib]S5_02_yahoo_finance In [41]: import matplotlib.pyplot as plt import pandas as pd import datetime In [43]: # download market data from Yahoo finance # !pip install yfinance import yfinance as yf yfinance https://www.learndatasci.com/tutorials/python-finance-part-yahoo-finance-api-pandas-matplotlib/ In [44]: # Ticker Symbols investments_list = ['BTC-USD'] # Specify the start and end dates start = datetime.datetime(20.. 2023. 1. 21.
[matplotlib]S5_01_single_line_plot data https://www.kaggle.com/datasets/sudalairajkumar/cryptocurrencypricehistory?resource=download&select=coin_Litecoin.csv In [29]: import matplotlib.pyplot as plt import pandas as pd import datetime In [32]: investment_df = pd.read_csv('coin_daily_prices.csv') investment_df Out[32]: Date BTC-USD Price ETH-USD Price LTC-USD Price 0 2013-04-29 23:59 134.444000 NaN 4.366760 1 2013-04-30 23:59 144... 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_05_Multi_indexing_operations2 In [ ]: import pandas as pd In [57]: # Multi-index DataFrame sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape') sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True) sales_df.sort_index(inplace=True) sales_df Out[57]: InvoiceNo StockCode Description Quantity UnitPrice CustomerID Country InvoiceDate Australia 1/10/2011 9:58 540557 22523 CHILDS GARDEN FORK PINK 9.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_04_Multi_indexing_operations1 In [ ]: import pandas as pd In [ ]: # Multi-index DataFrame sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape') sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True) sales_df Out[ ]: InvoiceNo StockCode Description Quantity UnitPrice CustomerID Country InvoiceDate United Kingdom 12/1/2010 8:26 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2.55 17850.0 12/1/.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_03_Multi_index_dataframe groupby https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [12]: sales_df.head() Out[12]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_02_grouping groupby https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [12]: sales_df.head() Out[12]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_01_Explore_dataset Link to Dataset: https://www.kaggle.com/datasets/mysarahmadbhat/customersegmentation¶ In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [3]: sales_df.head() Out[3]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 12/1/2010 8:26 2.55 17850.0 United Ki.. 2023. 1. 21.
[Pandas][DataFrame][concat]S3_02_concatenation_with_multi_indexing In [1]: import pandas as pd In [2]: raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'], 'First Name': ['Robert', 'Benedict', 'Mark', 'Tom', 'Ryan'], 'Last Name': ['Downey', 'Cumberbatch', 'Ruffalo', 'Holland', 'Reynolds']} bank1_df = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name']) In [3]: raw_data = {'Bank Client ID': ['6', '7', '8', '9', '10'], 'First Name':.. 2023. 1. 21.
[Pandas][DataFrame][concat]S3_01_concatenation In [1]: import pandas as pd https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html In [2]: raw_data = {'Bank Client ID': ['1', '2', '3', '4', '5'], 'First Name': ['Robert', 'Benedict', 'Mark', 'Tom', 'Ryan'], 'Last Name': ['Downey', 'Cumberbatch', 'Ruffalo', 'Holland', 'Reynolds']} bank1_df = pd.DataFrame(raw_data, columns = ['Bank Client ID', 'First Name', 'Last Name']) bank1_df O.. 2023. 1. 21.
[Pandas][DataFrame]S2_14_change_datatypes In [59]: import pandas as pd In [60]: employee_df = pd.read_csv('Human_Resources_Employee.csv') employee_df.head() Out[60]: Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education EducationField EmployeeCount EmployeeNumber ... RelationshipSatisfaction StandardHours StockOptionLevel TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany YearsInCurrentRole Yea.. 2023. 1. 21.
[Pandas][DataFrame]S2_13_FeatureEngineering Feature Engineering and Dealing with Missing Dataset¶ https://www.kaggle.com/datasets/rishikeshkonapure/hr-analytics-prediction?resource=download 위 데이터의 일부를 Null 처리함 In [46]: import pandas as pd In [48]: employee_df = pd.read_csv('Human_Resources_Employee.csv') employee_df.head() Out[48]: Age Attrition BusinessTravel DailyRate Department DistanceFromHome Education EducationField EmployeeCount Em.. 2023. 1. 21.
[Pandas][DataFrame]S2_12_Operations_Filtering In [1]: import pandas as pd In [2]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[2]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 France.. 2023. 1. 21.
[Pandas][DataFrame]S2_11_define_functions In [1]: import pandas as pd In [2]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[2]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 France.. 2023. 1. 21.
[Pandas][DataFrame]S2_10_sorting_and_ordering In [1]: import pandas as pd In [2]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[2]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 France.. 2023. 1. 21.
[Pandas][DataFrame]S2_09_Broadcasting In [1]: import pandas as pd In [2]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[2]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 France.. 2023. 1. 21.
[Pandas][DataFrame]S2_08_integer_index_Based_elements_selection In [23]: import pandas as pd In [25]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[25]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 Fra.. 2023. 1. 21.
[Pandas][DataFrame]S2_07_Label_Based_elements_selection In [1]: import pandas as pd In [3]: bank_df = pd.read_csv('bank customers.csv', index_col='Surname') bank_df Out[3]: RowNumber CustomerId CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited Surname Hargrave 1 15634602 619 France Female 42 2 0.00 1 1 1 101348.88 1 Hill 2 15647311 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 Onio 3 1561.. 2023. 1. 21.
[Pandas][DataFrame]S2_06_Column_ADDING_DELETING In [1]: import pandas as pd In [2]: bank_df = pd.read_csv('bank customers.csv') bank_df Out[2]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1 112542.58 0 2 3 15619304 Onio 502 France.. 2023. 1. 21.
[Pandas][DataFrame]S2_05_selecting_columns 데이터 출처: https://www.kaggle.com/datasets/mathchi/churn-for-bank-customers?resource=download In [1]: import pandas as pd In [3]: bank_df = pd.read_csv('bank customers.csv'); bank_df Out[3]: RowNumber CustomerId Surname CreditScore Geography Gender Age Tenure Balance NumOfProducts HasCrCard IsActiveMember EstimatedSalary Exited 0 1 15634602 Hargrave 619 France Female 42 2 0.00 1 1 1 101348.88 1 1 2.. 2023. 1. 21.
728x90