%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.pyplot import figure
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import Legend
from bokeh.transform import factor_cmap, factor_mark
from bokeh.io import output_notebook
from bokeh.models.markers import marker_types
from bokeh.models import Legend
from bokeh.palettes import Category20c_20
from random import choice
from bokeh.models import HoverTool
import math
import random
import statistics
from bokeh.models import Span
from bokeh.models import Label
from bokeh.models import Slope
from bokeh.models import SingleIntervalTicker, LinearAxis
from bokeh.models.annotations import Title
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
from bokeh.models import DataRange1d
from bokeh.io import export_png
from selenium import webdriver
lat_lon = pd.read_csv(r"C:\Users\james\Downloads\Human Freedom\country_lat_lon.csv")
df = pd.read_csv(r"C:\Users\james\Downloads\Human Freedom\Human Freedom.csv", encoding='latin-1')
df.head()
df = df[df['year']==2017]
df[df.columns[3:]] = df[df.columns[3:]].apply(pd.to_numeric, errors='coerce')
df.dtypes
df['personal freedom score'] = round(df['personal freedom score'], 1)
df['economic freedom score'] = round(df['economic freedom score'], 1)
df['average score'] = round(df[['personal freedom score', 'economic freedom score']].mean(axis=1), 1)
df[['countries', 'average score']]
df['overall rank'] = df['average score'].rank(ascending=False, method="max")
df.columns
df['classification'] = ''
def classification(df, countriescolumn):
countries = df[countriescolumn].unique()
for c in countries:
country_idx = np.where(df[countriescolumn] == c)
overall = df[df['countries'] == c]['average score'].mean()
if overall >= 8.75:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "definition of freedom"
elif overall >= 8.5:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "elite freedom"
elif overall >= 8.25:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "very high freedom"
elif overall >= 8:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "high freedom"
elif overall >= 7.5:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "medium freedom"
elif overall >= 7:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "average freedom"
elif overall >= 6.5:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "slightly below average freedom"
elif overall >= 6:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "mediocre freedom"
elif overall >= 5:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "poor freedom"
elif overall >= 4:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "very poor freedom"
else:
for i in country_idx:
for j in i:
df.at[j, 'classification'] = "not free"
return df
df = classification(df, 'countries')
df['classification'].head(5)
df_lat_lon = pd.merge(df, lat_lon, left_on='countries', right_on='country', how='inner')
df_lat_lon.head()
def assign_color(df, countriescolumn):
colour_by_country = {}
countries = df[countriescolumn].unique()
for c in countries:
overall = df[df['countries'] == c]['average score'].mean()
if overall >= 8.75:
colour_by_country[c] = 'black'
elif overall >= 8.5:
colour_by_country[c] = 'limegreen'
elif overall >= 8.25:
colour_by_country[c] = 'cyan'
elif overall >= 8:
colour_by_country[c] = 'darkcyan'
elif overall >= 7.5:
colour_by_country[c] = 'steelblue'
elif overall >= 7:
colour_by_country[c] = 'blue'
elif overall >= 6.5:
colour_by_country[c] = 'yellow'
elif overall >= 6:
colour_by_country[c] = 'pink'
elif overall >= 5:
colour_by_country[c] = 'magenta'
elif overall >= 4:
colour_by_country[c] = 'orange'
else:
colour_by_country[c] = 'red'
return colour_by_country
colour_by_country = assign_color(df, 'countries')
{k: colour_by_country[k] for k in sorted(colour_by_country.keys())[:5]}
def assign_markers(df, column):
markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']
markers = [x.lower() for x in markers]
countries = df[column].unique()
n_markers = len(countries) / len(markers)
markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
random.shuffle(markers)
return markers
markers = assign_markers(df, 'countries')
def tooltip_items(columns=[], fields=[]):
list = []
hoveritems= p.select(dict(type=HoverTool))
for c,f in zip(columns, fields):
list.append((c,f))
hoveritems.tooltips = list
tooltip_items(["country", "rank", "class", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification",
"@{average score}", "@{personal freedom score}",
"@{economic freedom score}"])
output_notebook()
# clustered
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'
p = figure(plot_width=1100, plot_height=475, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))
colour_by_country = assign_color(df, 'countries')
markers = assign_markers(df, 'countries')
tooltip_items(["country", "rank", "class", "overall score", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification",
"@{average score}", "@{personal freedom score}",
"@{economic freedom score}"])
hover.mode = 'mouse'
for country, marker in zip(countries, markers):
c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country], fill_alpha=0.4, size=12, marker=marker, color=colour_by_country[country])
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')
avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])
vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1,
text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03,
text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")
p.add_layout(vlinetext)
p.add_layout(hlinetext)
gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)
slope = Slope(gradient=gradient, y_intercept=y_intercept,
line_color='green', line_dash='dashed', line_width=3.5)
slopetext = Label(x=0.5, y=y_intercept + 0.5,
text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient, angle_units='deg')
p.add_layout(slope)
p.add_layout(slopetext)
p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'
p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"
show(p)
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'
p = figure(plot_width=1100, plot_height=3800, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))
countries = df['countries'].unique()
colour_by_country = assign_color(df, 'countries')
map_options = GMapOptions(lat=0, lng=-0, map_type="roadmap", zoom=1)
p = gmap("AIzaSyBLk2OaT0XhP0ZJsSb9iDce1AGJ2eBUbfM", map_options)
for c in countries:
p.circle(x="longitude", y="latitude", size=10, fill_color=colour_by_country[c], fill_alpha=0.5, source=df_lat_lon[df_lat_lon['countries']==c])
hover = HoverTool()
p.add_tools(hover)
tooltip_items(["country", "rank", "class", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification",
"@{average score}", "@{personal freedom score}",
"@{economic freedom score}"])
hover.mode = 'mouse'
p.title.text = 'Human Freedom Index 2017'
p.title.text_font_size="18pt"
p.title.align="center"
show(p)
# clustered larger version with legend
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'
p = figure(plot_width=1100, plot_height=3800, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))
colour_by_country = assign_color(df, 'countries')
markers = assign_markers(df, 'countries')
tooltip_items(["country", "rank", "class", "overall score", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification",
"@{average score}", "@{personal freedom score}",
"@{economic freedom score}"])
hover.mode = 'mouse'
legend_it = []
for country, marker in zip(countries, markers):
c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country], fill_alpha=0.4, size=12, marker=marker, color=colour_by_country[country])
legend_it.append((country, [c]))
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')
avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])
vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1,
text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03,
text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")
p.add_layout(vlinetext)
p.add_layout(hlinetext)
gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)
slope = Slope(gradient=gradient, y_intercept=y_intercept,
line_color='green', line_dash='dashed', line_width=3.5)
slopetext = Label(x=0.5, y=y_intercept + 0.5,
text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient*100, angle_units='deg')
p.add_layout(slope)
p.add_layout(slopetext)
legend = Legend(items=legend_it)
p.add_layout(legend, 'right')
p.legend.click_policy="hide"
p.legend.label_text_font_size = '8pt'
p.legend.glyph_width = 17
p.legend.glyph_height = 17
p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'
p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"
show(p)
#non-clustered with legend, personal freedom on x, economic freedom on y
''''
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'
p = figure(plot_width=1150, plot_height=3800, tools=tools_to_show,x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))
markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']
markers = [x.lower() for x in markers]
countries = df['countries'].unique()
n_markers = len(countries) / len(markers)
markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
random.shuffle(markers)
colours = []
def colour():
hex_chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
randomColour = '#'
for i in range(0, 6):
randomColour = randomColour + choice(hex_chars)
return randomColour
countries = df['countries'].unique()
for c in countries:
colours.append(colour())
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("country", "@countries"),
("average score", "@{average score}"),
("personal freedom score", "$x"),
("economic freedom score", "$y"),
]
hover.mode = 'mouse'
legend_it = []
for country, marker, colour in zip(countries, markers, colours):
c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country], fill_alpha=0.4, size=12, marker=marker, color=colour)
legend_it.append((country, [c]))
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')
ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')
avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])
vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1,
text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03,
text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")
p.add_layout(vlinetext)
p.add_layout(hlinetext)
gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)
slope = Slope(gradient=gradient, y_intercept=y_intercept,
line_color='green', line_dash='dashed', line_width=3.5)
slopetext = Label(x=0.5, y=y_intercept + 0.5,
text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient*100, angle_units='deg')
p.add_layout(slope)
p.add_layout(slopetext)
p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'
legend = Legend(items=legend_it)
p.add_layout(legend, 'right')
p.legend.click_policy="hide"
p.legend.label_text_font_size = '8pt'
p.legend.glyph_width = 17
p.legend.glyph_height = 17
p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"
show(p)
'''
#non-iterative version with non-interactive legend and non-custom legend location
'''tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'
p = figure(plot_width=1000, plot_height=1000, tools=tools_to_show)
markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']
markers = [x.lower() for x in markers]
countries = df['countries'].unique()
n_markers = len(countries) / len(markers)
markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
random.shuffle(markers)
colours = []
def colour():
hex_chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
randomColour = '#'
for i in range(0, 6):
randomColour = randomColour + choice(hex_chars)
return randomColour
countries = df['countries'].unique()
for c in countries:
colours.append(colour())
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("country", "@countries"),
("personal freedom score", "$x"),
("economic freedom score", "$y"),
]
hover.mode = 'mouse'
sp = p.scatter("personal freedom score", "economic freedom score", source=df, legend_field="countries", fill_alpha=0.4, size=12,
marker=factor_mark('countries', markers, countries),
color=factor_cmap('countries', palette=colours, factors=countries))
p.legend.click_policy="hide"
show(p)
'''