728x90
plotly
In [1]:
# interactive graphs
import plotly.express as px
import pandas as pd
import numpy as np
choropleth_mapbox
In [2]:
df_practice = pd.read_csv('data.csv') # 2020년 5월의 실업데이터
In [3]:
df_practice.head(1)
Out[3]:
area_fips | area_title | may2020_empl_yy_pc | may2020_empl | naics_1111 | naics_1114 | naics_1119 | naics_1121 | naics_1129 | naics_1132 | ... | naics_5211 | naics_5251 | naics_7114 | naics_7213 | naics_3162 | naics_3341 | naics_3343 | naics_3352 | naics_3161 | naics_9999 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1001 | Autauga County, Alabama | -5.4 | 10580 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 rows × 307 columns
In [4]:
df_practice.isnull().sum()
Out[4]:
area_fips 0
area_title 0
may2020_empl_yy_pc 0
may2020_empl 0
naics_1111 0
..
naics_3341 0
naics_3343 0
naics_3352 0
naics_3161 0
naics_9999 0
Length: 307, dtype: int64
In [5]:
df_practice[df_practice.duplicated()]
Out[5]:
area_fips | area_title | may2020_empl_yy_pc | may2020_empl | naics_1111 | naics_1114 | naics_1119 | naics_1121 | naics_1129 | naics_1132 | ... | naics_5211 | naics_5251 | naics_7114 | naics_7213 | naics_3162 | naics_3341 | naics_3343 | naics_3352 | naics_3161 | naics_9999 |
---|
0 rows × 307 columns
In [6]:
# df_mean = df_practice.groupby('area_fips', as_index=False)['may2020_empl'].mean()
# df_mean
In [8]:
# 정확한 fips code 정의
df_practice['area_fips'] = df_practice['area_fips'].apply(lambda x: '%05d' % (int(x)))
In [9]:
df_practice.head(10)
Out[9]:
area_fips | area_title | may2020_empl_yy_pc | may2020_empl | naics_1111 | naics_1114 | naics_1119 | naics_1121 | naics_1129 | naics_1132 | ... | naics_5211 | naics_5251 | naics_7114 | naics_7213 | naics_3162 | naics_3341 | naics_3343 | naics_3352 | naics_3161 | naics_9999 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 01001 | Autauga County, Alabama | -5.4 | 10580 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 01003 | Baldwin County, Alabama | -10.8 | 69631 | 0.0 | 5.78 | 1.26 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 01005 | Barbour County, Alabama | -3.7 | 7868 | 0.0 | 0.00 | 10.56 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 01007 | Bibb County, Alabama | -3.8 | 4459 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
4 | 01009 | Blount County, Alabama | -4.6 | 8344 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 | 01011 | Bullock County, Alabama | -5.8 | 2827 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
6 | 01013 | Butler County, Alabama | -18.3 | 5885 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
7 | 01015 | Calhoun County, Alabama | -9.7 | 40426 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
8 | 01017 | Chambers County, Alabama | -9.1 | 7741 | 0.0 | 0.00 | 0.00 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
9 | 01019 | Cherokee County, Alabama | -12.0 | 4663 | 0.0 | 23.76 | 4.39 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
10 rows × 307 columns
In [10]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
fig = px.choropleth_mapbox(df_practice, geojson=counties, locations='area_fips', color='may2020_empl', color_continuous_scale='Viridis',
range_color=(0, 100000),
mapbox_style="carto-positron",
zoom=2, center= {'lat': 37.0902, 'lon':-95.7129},
opacity=0.6,
labels={'may2020_empl'})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
Plotly animation
In [11]:
df_ani = pd.read_csv('us_simplified.csv')
df_ani.head(10)
Out[11]:
Date | Admin2 | Province/State | Confirmed | Deaths | Country/Region | |
---|---|---|---|---|---|---|
0 | 2020-01-22 | Autauga | Alabama | 0 | 0 | US |
1 | 2020-01-23 | Autauga | Alabama | 0 | 0 | US |
2 | 2020-01-24 | Autauga | Alabama | 0 | 0 | US |
3 | 2020-01-25 | Autauga | Alabama | 0 | 0 | US |
4 | 2020-01-26 | Autauga | Alabama | 0 | 0 | US |
5 | 2020-01-27 | Autauga | Alabama | 0 | 0 | US |
6 | 2020-01-28 | Autauga | Alabama | 0 | 0 | US |
7 | 2020-01-29 | Autauga | Alabama | 0 | 0 | US |
8 | 2020-01-30 | Autauga | Alabama | 0 | 0 | US |
9 | 2020-01-31 | Autauga | Alabama | 0 | 0 | US |
In [12]:
df_ani.isnull().sum()
Out[12]:
Date 0
Admin2 4896
Province/State 0
Confirmed 0
Deaths 0
Country/Region 0
dtype: int64
In [13]:
df_ani_updated = df_ani.dropna()
In [14]:
df_ani_updated['Province/State'].unique()
Out[14]:
array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
'New Jersey', 'New Mexico', 'New York', 'North Carolina',
'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
'Puerto Rico', 'Rhode Island', 'South Carolina', 'South Dakota',
'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)
In [15]:
county_search = pd.DataFrame(df_ani_updated.groupby('Province/State')['Admin2'].unique())
county_search.reset_index(inplace=True)
county_search.head()
Out[15]:
Province/State | Admin2 | |
---|---|---|
0 | Alabama | [Autauga, Baldwin, Barbour, Bibb, Blount, Bull... |
1 | Alaska | [Aleutians East, Aleutians West, Anchorage, Be... |
2 | Arizona | [Apache, Cochise, Coconino, Gila, Graham, Gree... |
3 | Arkansas | [Arkansas, Ashley, Baxter, Benton, Boone, Brad... |
4 | California | [Alameda, Alpine, Amador, Butte, Calaveras, Co... |
In [16]:
county_search[county_search['Province/State']=='New York']['Admin2']
Out[16]:
32 [Albany, Allegany, Bronx, Broome, Cattaraugus,...
Name: Admin2, dtype: object
In [17]:
county_search[county_search['Province/State']=='New Jersey']['Admin2']
Out[17]:
30 [Atlantic, Bergen, Burlington, Camden, Cape Ma...
Name: Admin2, dtype: object
In [18]:
scope = ['Texas', 'New Jersey']
df_new = df_ani_updated[df_ani_updated['Province/State'].isin(scope)]
In [19]:
# 확인 하고 싶은 지역
scope_county = ['Anderson', 'Andrews', 'Angelina', 'Aransas', 'Atlantic', 'Bergen', 'Burlington', 'Camden']
df_county = df_new[df_new['Admin2'].isin(scope_county)]
In [20]:
df_county.head(10)
Out[20]:
Date | Admin2 | Province/State | Confirmed | Deaths | Country/Region | |
---|---|---|---|---|---|---|
1504704 | 2020-01-22 | Atlantic | New Jersey | 0 | 0 | US |
1504705 | 2020-01-23 | Atlantic | New Jersey | 0 | 0 | US |
1504706 | 2020-01-24 | Atlantic | New Jersey | 0 | 0 | US |
1504707 | 2020-01-25 | Atlantic | New Jersey | 0 | 0 | US |
1504708 | 2020-01-26 | Atlantic | New Jersey | 0 | 0 | US |
1504709 | 2020-01-27 | Atlantic | New Jersey | 0 | 0 | US |
1504710 | 2020-01-28 | Atlantic | New Jersey | 0 | 0 | US |
1504711 | 2020-01-29 | Atlantic | New Jersey | 0 | 0 | US |
1504712 | 2020-01-30 | Atlantic | New Jersey | 0 | 0 | US |
1504713 | 2020-01-31 | Atlantic | New Jersey | 0 | 0 | US |
In [21]:
px.scatter(df_county, x='Confirmed', y='Deaths', animation_frame='Date', animation_group='Admin2', color='Province/State',
hover_name='Admin2', size='Confirmed', range_x=[1,100000], range_y=[1,5000], height=700)
728x90
'Data Analytics with python > [Data Analysis]' 카테고리의 다른 글
outlier (0) | 2023.02.18 |
---|---|
[Visualization] Plotly_Part1 (0) | 2023.01.22 |
[Visualization] seaborn (0) | 2023.01.22 |
[Visualization] matplotlib (0) | 2023.01.22 |
[Visualization] Basic_for _visualization (0) | 2023.01.22 |
댓글