본문 바로가기
728x90

Data Analytics with python114

[datetime]S7_05_Practical_example3 In [68]: import pandas as pd import datetime as dt In [69]: avo_df = pd.read_csv('Avocado.csv') avo_df Out[69]: Date AveragePrice Total Volume type region 0 2015-12-27 1.33 64236.62 conventional Albany 1 2015-12-20 1.35 54876.98 conventional Albany 2 2015-12-13 0.93 118220.22 conventional Albany 3 2015-12-06 1.08 78992.15 conventional Albany 4 2015-11-29 1.28 51039.60 conventional Albany ... ..... 2023. 1. 21.
[datetime]S7_04_Practical_example2 In [68]: import pandas as pd import datetime as dt In [69]: avo_df = pd.read_csv('Avocado.csv') avo_df Out[69]: Date AveragePrice Total Volume type region 0 2015-12-27 1.33 64236.62 conventional Albany 1 2015-12-20 1.35 54876.98 conventional Albany 2 2015-12-13 0.93 118220.22 conventional Albany 3 2015-12-06 1.08 78992.15 conventional Albany 4 2015-11-29 1.28 51039.60 conventional Albany ... ..... 2023. 1. 21.
[datetime]S7_03_Practical_example1 In [59]: import pandas as pd import datetime as dt In [60]: avo_df = pd.read_csv('Avocado.csv') avo_df Out[60]: Date AveragePrice Total Volume type region 0 2015-12-27 1.33 64236.62 conventional Albany 1 2015-12-20 1.35 54876.98 conventional Albany 2 2015-12-13 0.93 118220.22 conventional Albany 3 2015-12-06 1.08 78992.15 conventional Albany 4 2015-11-29 1.28 51039.60 conventional Albany ... ..... 2023. 1. 21.
[datetime]S7_02_Timestamp https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html In [31]: import pandas as pd import datetime as dt In [33]: pd.Timestamp('2023, 3, 30') Out[33]: Timestamp('2023-03-30 00:00:00') In [34]: # Pandas Timestamp pd.Timestamp(dt.datetime(2022, 3, 31, 8, 0, 15,)) Out[34]: Timestamp('2022-03-31 08:00:15') In [35]: # Difference between two dates day_1 = pd.Timestamp('1990, 3, 31, 11') d.. 2023. 1. 21.
[datetime]S7_01_datetime In [11]: # date: define dates only without including time (month, day, year) # datetime : define times and dates together (month, day, year, hour, second, microsecond) import pandas as pd import datetime as dt In [12]: # A date date_ex = dt.date(2022,1,1) date_ex Out[12]: datetime.date(2022, 1, 1) In [13]: type(date_ex) Out[13]: datetime.date In [14]: # Convert it into string to view str(now) Ou.. 2023. 1. 21.
[seaborn]S6_02_pairplot,displot,heatmap(correlations) Seaborn https://seaborn.pydata.org/examples/index.html ●pandas: data manipulation using dataframes ●numpy: data statistical analysis ●matplotlib: data visualisation ●seaborn: Statistical data visualization https://www.kaggle.com/datasets/yasserh/breast-cancer-dataset?resource=download In [58]: # Seaborn offers enhanced features compared to matplotlib # import libraries import pandas as pd import.. 2023. 1. 21.
[seaborn]S6_01_scatter&count_plot Seaborn https://seaborn.pydata.org/examples/index.html ●pandas: data manipulation using dataframes ●numpy: data statistical analysis ●matplotlib: data visualisation ●seaborn: Statistical data visualization https://www.kaggle.com/datasets/yasserh/breast-cancer-dataset?resource=download In [58]: # Seaborn offers enhanced features compared to matplotlib # import libraries import pandas as pd import.. 2023. 1. 21.
[matplotlib]S5_appendix_Making_dataset(주식 일별 수익률 계산) 데이터 가공 참고 사이트¶ https://invest-in-yourself.tistory.com/278 In [55]: # ! pip install yfinance import yfinance as yf import pandas as pd import datetime In [56]: # Download price data from Yahoo finance start = '2013-04-29' end = '2021-07-06' p_Bitcoin = yf.download('BTC-USD', start = start, end=end ) p_Ethereum = yf.download('ETH-USD', start = start, end=end ) p_Litecoin = yf.download('LTC-USD', s.. 2023. 1. 21.
[matplotlib]S5_07_histogram In [39]: import pandas as pd import matplotlib.pyplot as plt import datetime In [45]: # 일별 수익률 데이터 return_df = pd.read_csv('crypto_daily_returns.csv'); return_df Out[45]: Date BTC ETH LTC 0 09/17/2014 0.000000 0.000000 0.000000 1 09/18/2014 -7.192558 NaN -7.379983 2 09/19/2014 -6.984265 NaN -7.629499 3 09/20/2014 3.573492 NaN -0.955003 4 09/21/2014 -2.465854 NaN -0.945300 ... ... ... ... ... 247.. 2023. 1. 21.
[matplotlib]S5_06_pie_chart In [1]: import matplotlib.pyplot as plt import pandas as pd import datetime In [2]: # data crypto_dict = {'allocation': [20,55,5,17,3]} explode = (0,0,0,0.2,0) In [3]: crypto_df = pd.DataFrame(data= crypto_dict, index=['BTC','ETH','LTC','XRP','ADA']) crypto_df Out[3]: allocation BTC 20 ETH 55 LTC 5 XRP 17 ADA 3 In [4]: # a pie chart crypto_df.plot.pie(y = 'allocation',explode = explode, figsize .. 2023. 1. 21.
[matplotlib]S5_05_Scatterplot In [ ]: import matplotlib.pyplot as plt import pandas as pd import datetime In [63]: # data return_df = pd.read_csv('coin_daily_returns.csv'); return_df Out[63]: Date BTC ETH LTC 0 2013-04-29 23:59 7.509441 NaN 0.392520 1 2013-04-30 23:59 -3.472222 NaN -2.430554 2 2013-05-01 23:59 -15.834534 NaN -11.388866 3 2013-05-02 23:59 -9.597868 NaN -10.794653 4 2013-05-03 23:59 -8.000000 NaN -10.191304 .... 2023. 1. 21.
[matplotlib]S5_04_Sub_plots In [41]: import matplotlib.pyplot as plt import pandas as pd import datetime In [49]: # data investment_df = pd.read_csv('coin_daily_prices.csv'); investment_df Out[49]: Date BTC-USD Price ETH-USD Price LTC-USD Price 0 2013-04-29 23:59 134.444000 NaN 4.366760 1 2013-04-30 23:59 144.000000 NaN 4.403520 2 2013-05-01 23:59 139.000000 NaN 4.289540 3 2013-05-02 23:59 116.379997 NaN 3.780020 4 2013-05.. 2023. 1. 21.
[matplotlib]S5_03_Multiple_plots In [41]: import matplotlib.pyplot as plt import pandas as pd import datetime In [49]: # data investment_df = pd.read_csv('coin_daily_prices.csv'); investment_df Out[49]: Date BTC-USD Price ETH-USD Price LTC-USD Price 0 2013-04-29 23:59 134.444000 NaN 4.366760 1 2013-04-30 23:59 144.000000 NaN 4.403520 2 2013-05-01 23:59 139.000000 NaN 4.289540 3 2013-05-02 23:59 116.379997 NaN 3.780020 4 2013-05.. 2023. 1. 21.
[matplotlib]S5_02_yahoo_finance In [41]: import matplotlib.pyplot as plt import pandas as pd import datetime In [43]: # download market data from Yahoo finance # !pip install yfinance import yfinance as yf yfinance https://www.learndatasci.com/tutorials/python-finance-part-yahoo-finance-api-pandas-matplotlib/ In [44]: # Ticker Symbols investments_list = ['BTC-USD'] # Specify the start and end dates start = datetime.datetime(20.. 2023. 1. 21.
[matplotlib]S5_01_single_line_plot data https://www.kaggle.com/datasets/sudalairajkumar/cryptocurrencypricehistory?resource=download&select=coin_Litecoin.csv In [29]: import matplotlib.pyplot as plt import pandas as pd import datetime In [32]: investment_df = pd.read_csv('coin_daily_prices.csv') investment_df Out[32]: Date BTC-USD Price ETH-USD Price LTC-USD Price 0 2013-04-29 23:59 134.444000 NaN 4.366760 1 2013-04-30 23:59 144... 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_05_Multi_indexing_operations2 In [ ]: import pandas as pd In [57]: # Multi-index DataFrame sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape') sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True) sales_df.sort_index(inplace=True) sales_df Out[57]: InvoiceNo StockCode Description Quantity UnitPrice CustomerID Country InvoiceDate Australia 1/10/2011 9:58 540557 22523 CHILDS GARDEN FORK PINK 9.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_04_Multi_indexing_operations1 In [ ]: import pandas as pd In [ ]: # Multi-index DataFrame sales_df = pd.read_csv('Online Retail.csv', encoding = 'unicode_escape') sales_df.set_index(keys=['Country','InvoiceDate'], inplace = True) sales_df Out[ ]: InvoiceNo StockCode Description Quantity UnitPrice CustomerID Country InvoiceDate United Kingdom 12/1/2010 8:26 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2.55 17850.0 12/1/.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_03_Multi_index_dataframe groupby https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [12]: sales_df.head() Out[12]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_02_grouping groupby https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [12]: sales_df.head() Out[12]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55.. 2023. 1. 21.
[Pandas][DataFrame][MultiIndex]S4_01_Explore_dataset Link to Dataset: https://www.kaggle.com/datasets/mysarahmadbhat/customersegmentation¶ In [1]: import pandas as pd In [2]: sales_df = pd.read_csv('Online Retail.csv', encoding= 'unicode_escape') In [3]: sales_df.head() Out[3]: InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country 0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 12/1/2010 8:26 2.55 17850.0 United Ki.. 2023. 1. 21.
728x90