import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
shapefile = '/Users/vanle/Documents/Obesity data/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp'
datafile = '/Users/vanle/Documents/Obesity data/obesity.csv'

gdf = gpd.read_file(shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['country', 'country_code', 'geometry']
gdf.head()


print(gdf[gdf['country'] == 'Antarctica'])
gdf = gdf.drop(gdf.index[159])

        country country_code  \
159  Antarctica          ATA   

                                              geometry  
159  MULTIPOLYGON (((-48.66062 -78.04702, -48.15140...


gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 176 entries, 0 to 176
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   country       176 non-null    object  
 1   country_code  176 non-null    object  
 2   geometry      176 non-null    geometry
dtypes: geometry(1), object(2)
memory usage: 5.5+ KB


print(gdf[gdf['country'] == 'Antarctica'])

Empty GeoDataFrame
Columns: [country, country_code, geometry]
Index: []


import pandas as pd

datafile = '/Users/vanle/Documents/Obesity data/obesity.csv'

df = pd.read_csv(datafile, names = ['entity', 'code', 'year', 'per_cent_obesity'], skiprows = 1)
df.head()


df_2016 = df[df['year'] == 2016]

#Perform left merge to preserve every row in gdf.
merged = gdf.merge(df_2016, left_on = 'country_code', right_on = 'code', how = 'left')

#Replace NaN values to string 'No data'.
merged.fillna('No data', inplace = True)


import json

#Read data to json
merged_json = json.loads(merged.to_json())

#Convert to str like object
json_data = json.dumps(merged_json)


from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]

#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40)

#Define custom tick labels for color bar.
tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

#Create figure object.
p = figure(title = 'Share of adults who are obese, 2016', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)


from bokeh.io import curdoc, output_notebook
from bokeh.models import Slider, HoverTool
from bokeh.layouts import widgetbox, row, column

#Define function that returns json_data for year selected by user.
    
def json_data(selectedYear):
    yr = selectedYear
    df_yr = df[df['year'] == yr]
    merged = gdf.merge(df_yr, left_on = 'country_code', right_on = 'code', how = 'left')
    merged.fillna('No data', inplace = True)
    merged_json = json.loads(merged.to_json())
    json_data = json.dumps(merged_json)
    return json_data

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data(2016))

#Define a sequential multi-hue color palette.
palette = brewer['PRGn'][8]

#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors. Input nan_color.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40, nan_color = '#d9d9d9')

#Define custom tick labels for color bar.
tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%', '20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}

#Add hover tool
hover = HoverTool(tooltips = [ ('Country/region','@country'),('% obesity', '@per_cent_obesity')])


#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
                     border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)


#Create figure object.
p = figure(title = 'Share of adults who are obese, 2016', plot_height = 600 , plot_width = 950, toolbar_location = None, tools = [hover])
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'per_cent_obesity', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
p.add_layout(color_bar, 'below')

# Define the callback function: update_plot
def update_plot(attr, old, new):
    yr = slider.value
    new_data = json_data(yr)
    geosource.geojson = new_data
    p.title.text = 'Share of adults who are obese, %d' %yr
    
# Make a slider object: slider 
slider = Slider(title = 'Year',start = 1975, end = 2016, step = 1, value = 2016)
slider.on_change('value', update_plot)

# Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout = column(p,widgetbox(slider))
curdoc().add_root(layout)

#Display plot inline in Jupyter notebook
output_notebook()

#Display plot
show(layout)

#export file
#from bokeh.io import export_svg

#export_svg(p, filename='/Users/vanle/Documents/Obesity data/images/worldmap.pdf')

BokehDeprecationWarning: 'WidgetBox' is deprecated and will be removed in Bokeh 3.0, use 'bokeh.models.Column' instead

WARNING:bokeh.embed.util:
You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html


import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import seaborn as sns
plt.style.use("seaborn")
sns.set()
%matplotlib notebook


import plotly.express as px
import matplotlib.pyplot as plt
df10 = pd.read_csv('/Users/vanle/Documents/Obesity data/obese_pop.csv')
fig = px.area(df10, x="year", y="obese_pop", color="country",color_discrete_sequence= ['#9970ab','#d9f0d3','#1b7837','blue'], 
              labels={
                     "country": "Country Classification",
                     "year": "Year",
                     "obese_pop": "Number of Obese Adults"
                 },
                title="Obese population from 1975 to 2016")
fig.update_layout({'plot_bgcolor': 'rgba(0,0,0,0)', 'paper_bgcolor': 'rgba(0,0,0,0)'}, #this for transparent
    legend=dict(
        x=0.05,  # value must be between 0 to 1.
        y=0.9,   # value must be between 0 to 1.
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
    )
)
fig.show()
#fig.write_image('/Users/vanle/Documents/Obesity data/images/trans_world_pop.eps')


gender = pd.read_csv("/Users/vanle/Documents/Obesity data/obesity_gender02.csv")
gender.dropna()


countries = ['United States','Egypt','Turkey','Chile','Argentina','Mexico','Canada','Australia','United Kingdom',
'South Africa','Venezuela','Irag','Czechia','Spain','Iran','Poland','Russia','Algeria','Kazakhstan','Hungary','France','Italy','Morocco','Germany','Brazil','Colombia','Peru','Ukraine','Netherlands','Uzbekistan','Malaysia','Nigeria','Thailand','Sudan','China','Kenya','Tanzania','Sri Lanka','Pakistan','Indonesia','Philippines','South Korea',
'Mozambique','Congo','Uganda','Nepal' ,'Myanmar','India','Ethiopia','Japan','Afghanistan','Bangladesh','North Korea','Vietnam']


gender_2016 = gender[["country",'gender','pct_obese','total']][gender["year"] == 2016]
gender_2016 = gender_2016.loc[gender_2016["country"].isin(countries)]
gender_2016 = gender_2016.sort_values(by= 'total',ascending=False)  
#gender_2016.head()


sns.set(style="ticks")
sns.set_style("darkgrid",{'axes.grid' : False})
g = sns.catplot(data=gender_2016,x="pct_obese", y="country" , hue="gender", s = 8, palette="PRGn", height=10, aspect = 0.8)
g.set(xlabel='Prevalence of Obesity')
g.set(ylabel= None)
#plt.legend(title='Gender',loc = 'center right', labels=['Men', 'Women'], bbox_to_anchor=(1,1))
plt.show()
#plt.savefig('/Users/vanle/Documents/Obesity data/images/pattern.eps', bbox_inches = 'tight',transparent=True)


df3 = pd.read_csv("/Users/vanle/Documents/Obesity data/country_obesity.csv")
country_16 = df3.loc[df3['year'] == 2016]
country_16 = country_16.dropna(axis=1)
country_16 = country_16.sort_values(by= 'pct_obese',ascending=True)
country_16['country_duplicate'] = country_16.loc[:,'country']
country_16 = country_16.set_index('code')


plt.figure(figsize=(12,5))
ax=sns.barplot(x = 'country',
            y = 'pct_obese',
            data = country_16,edgecolor='none')
ax.set(ylabel= None) 
ax.set(xlabel = None)
ax.yaxis.set_label_position("right")
ax.xaxis.set_ticklabels ([]) #remove x axis ticks
ax.yaxis.tick_right()
for bar in ax.patches:
    bar.set_facecolor('#bfbfbf')

#highlight 
pos01 = country_16.index.get_loc('VNM')
ax.patches[pos01].set_facecolor('#762a83')
#Annotate: https://mode.com/example-gallery/python_chart_annotations/
ax.axvline(x=0, linestyle='--', alpha=0.5,color='#762a83')
ax.text(x=1, y=40, s='Vietnam (2.1%)', alpha=0.7, color='#762a83')

pos02 = country_16.index.get_loc('KHM')
ax.patches[pos02].set_facecolor('#762a83')


pos03 = country_16.index.get_loc('LAO')
ax.patches[pos03].set_facecolor('#762a83')


pos04 = country_16.index.get_loc('MYS')
ax.patches[pos04].set_facecolor('#762a83')
ax.axvline(x=71, linestyle='--', alpha=0.5,color='#762a83')
ax.text(x=72, y=40, s='Malaysia (15.3%)', alpha=0.7, color='#762a83')

pos05 = country_16.index.get_loc('PHL')
ax.patches[pos05].set_facecolor('#762a83')

pos06 = country_16.index.get_loc('SGP')
ax.patches[pos06].set_facecolor('#762a83')

pos07 = country_16.index.get_loc('IDN')
ax.patches[pos07].set_facecolor('#762a83')

pos08 = country_16.index.get_loc('THA')
ax.patches[pos08].set_facecolor('#762a83')

pos09 = country_16.index.get_loc('MMR')
ax.patches[pos09].set_facecolor('#762a83')

pos09 = country_16.index.get_loc('OWID_WRL')
ax.patches[pos09].set_facecolor('#5ca964')

ax.axvline(x=63, linestyle='-', alpha=0.5, color ='#018837')
ax.text(x=43, y=30, s='World (13.2%)', alpha=0.7, color='#018837')

ax.legend()
#Show the plot
plt.tight_layout()
plt.ylabel('Prevalence of obese adults')
plt.title("SEA countries vs Others (2016)")
plt.show()
#plt.savefig('/Users/vanle/Documents/Obesity data/images/SEA-Others.eps', bbox_inches = 'tight',transparent=True)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


df3 = pd.read_csv("/Users/vanle/Documents/Obesity data/country_obesity.csv")
df4 = df3.loc[df3["code"].isin(['VNM','KHM','MMR','IDN','LAO','MYS','SGP','THA','PHL'])]


#fig = px.area(df6, x='year', y='pct_obese', color='country') #for area chart
fig = px.line(df4, x='year', y='pct_obese', color='country', title = 'Obesity growing rate in SEA countries',
             color_discrete_sequence= ['#9970ab','#5ca964','#9970ab','#5ca964','#9970ab','#9970ab','#9970ab','#5ca964','#9970ab'], 
              labels={
                     "country": "Country",
                     "year": "Year",
                     "pct_obese": "Prevalence of obese adults"
                 },
                )
fig.add_annotation(x=2016, y=10,
            text="THA-CAGR: 6.9%",
            showarrow=False,
            yshift=10)
fig.add_annotation(x=2016, y=15.6,
            text="MYS-CAGR: 6.2% ",
            showarrow=False,
            yshift=10)
fig.add_annotation(x=2016, y=6.9,
            text="IDN-CAGR: 7.19%",
            showarrow=False,
            yshift=10)
#fig.update_layout(showlegend=False)
fig.update_layout(
#     {'plot_bgcolor': 'rgba(0,0,0,0)',
#     'paper_bgcolor': 'rgba(0,0,0,0)'},
    legend=dict(
        x=0.05,  # value must be between 0 to 1.
        y=0.9,   # value must be between 0 to 1.
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
    )
)
fig.show()
#plt.savefig('/Users/vanle/Documents/Obesity data/images/SEA-growing.pdf', bbox_inches = 'tight')
#fig.write_image('/Users/vanle/Documents/Obesity data/images/SEA-growing.eps',width=1000, height=500)


df5 = pd.read_csv("/Users/vanle/Documents/Obesity data/SEA_age_sorted.csv")
tha = df5[(df5.code == 'THA') & (df5.gender == 'Total')]


fig = px.line(tha, x="year", y="pct_obese", color = 'age',color_discrete_sequence= ['#9970ab','blue','#1b7837'], labels = {'year': 'Year','pct_obese' : 'prevalence of obesity', 'age': 'Age Group'},title='Obesity rate in Thailand for different age groups')
fig.update_layout(
    #{'plot_bgcolor': 'rgba(0,0,0,0)',
    #'paper_bgcolor': 'rgba(0,0,0,0)'},
    legend=dict(
        x=0.05,  # value must be between 0 to 1.
        y=0.9,   # value must be between 0 to 1.
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=12,
            color="black"
        ),
    )
)
fig.show()
#plt.savefig('/Users/vanle/Documents/Obesity data/images/SEA-growing.pdf', bbox_inches = 'tight')
#fig.write_image('/Users/vanle/Documents/Obesity data/images/thai.eps',width=1000, height=500)


import plotly.express as px
from plotly.subplots import make_subplots

# data for px.sunburst1
data1 = dict(character=[ "Total GDP 2016", ' ',"Obesity"],
             parent=["", "Total GDP 2016",   "Total GDP 2016"],
             value=[519.6,513,6.6])

# extract data and structure FROM px.sunburst1
sb1 = px.sunburst(data1,
                  names='character',
                  parents='parent',
                  values='value',
                  branchvalues="total",
                  color_discrete_sequence = ['#9970ab','#d9f0d3','#5aae61']
                  )

# data for px.sunburst2
# data for px.sunburst2
data2 = dict(character=[ "Total GDP 2056", ' ',"Obesity"],
             parent=["", "Total GDP 2056", "Total GDP 2056"],
             value=[519.6,494.24,25.36])

# extract data and structure FROM px.sunburst2
sb2 = px.sunburst(data2,
                  names='character',
                  parents='parent',
                  values='value',
                  branchvalues="total",
                  color_discrete_sequence = ['#9970ab','#d9f0d3','#5aae61']
                  )

fig = make_subplots(rows=1, cols=2, specs=[
    [{"type": "sunburst"}, {"type": "sunburst"}]
])
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=1, col=2)
fig.show()


sea_country = ['Vietnam','Cambodia','Myanmar','Laos','Philippines','Indonesia','Singapore','Thailand','Malaysia']
obese_pop = [1355023,368182,1952005,206979,3884363,11727354,283080,5379703,3266162]
data = pd.DataFrame(list(zip(sea_country, obese_pop)),
               columns =['country', 'obese_pop'])
fig = px.treemap(data,
                 path= [px.Constant("SEA"),'country'],
                 values='obese_pop',
                 color='obese_pop',
                 color_continuous_scale='PRGn',
                  )

fig.update_layout(title="SEA Obesity Popuplation Distribution",
                  width=1000, height=600
                  #, uniformtext=dict(minsize=8, mode='hide')
                 )

fig.show()
fig.write_image('/Users/vanle/Documents/Obesity data/images/treemap_sea.eps', width = 720, height = 400)

/Users/vanle/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/vanle/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


df7 = pd.read_csv('/Users/vanle/Documents/Obesity data/annual-working-hours-per-worker.csv')


hours = df7[df7.Year == 2017]
hours= hours.sort_values(by= 'Average annual working hours per worker',ascending=False)  
hours.reset_index()


hours[hours.Code == 'THA']


sns.set(style="ticks")
sns.set_style("darkgrid",{'axes.grid' : False})
g = sns.catplot(data=hours,x="Average annual working hours per worker", y="Entity",palette=['seagreen'], alpha=0.5,s = 8, height=10, aspect = 0.8)
g.set(xlabel='Average annual working hours per worker')
g.set(ylabel= None)
plt.axhline(y=9, color='r', linestyle='--')
#plt.legend(title='Gender',loc = 'center right', labels=['Men', 'Women'], bbox_to_anchor=(1,1))
#g.axvline(x=2185.4456, linestyle='-', alpha=0.5, color ='#018837')
plt.text(x=2225, y = 8, s='2185.4456 hrs/year', alpha=0.7, color='r')
plt.show()
plt.savefig('/Users/vanle/Documents/Obesity data/images/working_hour.eps', bbox_inches = 'tight',transparent=True)

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


food_ads = pd.read_csv('/Users/vanle/Documents/Obesity data/Food ads.csv')


food_ads


fig = px.sunburst(food_ads,
                  path=["F&B category", "F&B types"],
                  values='Rate of advertisements (ads per channel hour (n))',
                  title="Starbucks Store Count Distribution World Wide [Country, State, City]",
                  width=750, height=750)
fig.show()

/Users/vanle/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/vanle/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

	country	country_code	geometry
0	Fiji	FJI	MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1	United Republic of Tanzania	TZA	POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
2	Western Sahara	SAH	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3	Canada	CAN	MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
4	United States of America	USA	MULTIPOLYGON (((-122.84000 49.00000, -120.0000...

	country	year	code	pct_obese	gender	total
0	Afghanistan	2016	AFG	7.6	female	10.8
1	Afghanistan	2016	AFG	3.2	male	10.8
2	Albania	2016	ALB	21.6	male	43.4
3	Albania	2016	ALB	21.8	female	43.4
4	Algeria	2016	DZA	19.9	male	54.8
...	...	...	...	...	...	...
371	Yemen, Rep.	2016	YEM	22.0	female	34.0
372	Zambia	2016	ZMB	3.6	male	16.0
373	Zambia	2016	ZMB	12.4	female	16.0
374	Zimbabwe	2016	ZWE	25.3	female	30.0
375	Zimbabwe	2016	ZWE	4.7	male	30.0

	Topic	F&B category	F&B types	Rate of advertisements (ads per channel hour (n))
0	Total 997 Food ads	Non-core and unhealthy food	High sugar and/or low fibre cereals	0.1
1	Total 997 Food ads	Non-core and unhealthy food	Instant rice and noodle products	0.2
2	Total 997 Food ads	Non-core and unhealthy food	Sweet breads, cakes, muffins, etc.	0.1
3	Total 997 Food ads	Non-core and unhealthy food	Sweet snack foods	0.4
4	Total 997 Food ads	Non-core and unhealthy food	Savoury snack foods	0.4
5	Total 997 Food ads	Non-core and unhealthy food	Fruit juice/drinks	0.1
6	Total 997 Food ads	Non-core and unhealthy food	Full cream milks and yoghurts	0.5
7	Total 997 Food ads	Non-core and unhealthy food	Desserts and Sweets	0.1
8	Total 997 Food ads	Non-core and unhealthy food	Fast food	0.2
9	Total 997 Food ads	Non-core and unhealthy food	Other high fat/salt products	0.1
10	Total 997 Food ads	Non-core and unhealthy food	Sugar sweetened drinks	0.7
11	Total 997 Food ads	Core and healthy food	Milks and yoghurts	0.1
12	Total 997 Food ads	Core and healthy food	Bottled water	0.1
13	Total 997 Food ads	Miscellaneous food/food-related	Recipe additions	0.1
14	Total 997 Food ads	Miscellaneous food/food-related	Vitamin/mineral	0.2
15	Total 997 Food ads	Miscellaneous food/food-related	Tea and coffee	0.1
16	Total 997 Food ads	Miscellaneous food/food-related	Baby and toddler milk formulae	0.2
17	Total 997 Food ads	Miscellaneous food/food-related	Fast food	0.2
18	Total 997 Food ads	Miscellaneous food/food-related	Fast-food restaurant	0.1

	entity	code	year	per_cent_obesity
0	Afghanistan	AFG	1975	0.4
1	Afghanistan	AFG	1976	0.4
2	Afghanistan	AFG	1977	0.5
3	Afghanistan	AFG	1978	0.5
4	Afghanistan	AFG	1979	0.5

	index	Entity	Code	Year	Average annual working hours per worker
0	460	Cambodia	KHM	2017	2455.5508
1	2026	Myanmar	MMR	2017	2437.8633
2	1978	Mexico	MEX	2017	2255.0000
3	1886	Malaysia	MYS	2017	2238.2729
4	2606	Singapore	SGP	2017	2237.7263
...	...	...	...	...	...
61	1376	Iceland	ISL	2017	1493.3651
62	2101	Netherlands	NLD	2017	1430.0229
63	2225	Norway	NOR	2017	1417.4723
64	895	Denmark	DNK	2017	1400.3815
65	1159	Germany	DEU	2017	1353.8868