In [101]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.pyplot import figure
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import Legend
from bokeh.transform import factor_cmap, factor_mark
from bokeh.io import output_notebook
from bokeh.models.markers import marker_types
from bokeh.models import Legend
from bokeh.palettes import Category20c_20
from random import choice
from bokeh.models import HoverTool
import math
import random
import statistics
from bokeh.models import Span
from bokeh.models import Label
from bokeh.models import Slope
from bokeh.models import SingleIntervalTicker, LinearAxis
from bokeh.models.annotations import Title
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
from bokeh.models import DataRange1d
from bokeh.io import export_png
from selenium import webdriver
In [73]:
lat_lon = pd.read_csv(r"C:\Users\james\Downloads\Human Freedom\country_lat_lon.csv")
In [74]:
df = pd.read_csv(r"C:\Users\james\Downloads\Human Freedom\Human Freedom.csv", encoding='latin-1')
df.head()
Out[74]:
year countries region score rank quartile homicide disappearances safety and security foreign movement ... identity personal freedom score presonal freedom rank government legal trade entrepreneurship business regulation economic freedom score economic freedom rank
0 2017 Angola Sub-Saharan Africa 5.4 151 4 8.1 10 8.1 5 ... 5 5.98 121 6.8 3 3.2 8.7 4.9 4.83 158
1 2017 Albania Eastern Europe 7.84 38 1 9.1 10 9.3 10 ... 5.8 8.01 46 7.5 5.1 8.3 9.7 6.7 7.67 30
2 2017 Argentina Latin America & the Caribbean 6.86 77 2 8 5 8.8 10 ... 10 8.04 41 5.7 4 6.5 9.6 5.7 5.67 147
3 2017 Armenia Caucasus & Central Asia 7.42 54 2 9 10 9.1 5 ... 8.2 7.15 72 7.4 5.9 8.2 9.9 6.9 7.7 27
4 2017 United Arab Emirates Middle East & North Africa 6.13 128 4 9.6 5 7.7 0 ... 0 5.09 148 5.8 5.7 8 9.6 8.3 7.17 61

5 rows × 24 columns

In [75]:
df = df[df['year']==2017]
df[df.columns[3:]] = df[df.columns[3:]].apply(pd.to_numeric, errors='coerce')
df.dtypes
Out[75]:
year                        int64
countries                  object
region                     object
score                     float64
rank                      float64
quartile                    int64
homicide                  float64
disappearances            float64
safety and security       float64
foreign movement          float64
religion                  float64
political association     float64
internet                  float64
expression                float64
identity                  float64
personal freedom score    float64
presonal freedom rank       int64
government                float64
legal                     float64
trade                     float64
entrepreneurship          float64
business regulation       float64
economic freedom score    float64
economic freedom rank       int64
dtype: object
In [76]:
df['personal freedom score'] = round(df['personal freedom score'], 1)
df['economic freedom score'] = round(df['economic freedom score'], 1)
df['average score'] = round(df[['personal freedom score', 'economic freedom score']].mean(axis=1), 1)
df[['countries', 'average score']]
Out[76]:
countries average score
0 Angola 5.4
1 Albania 7.8
2 Argentina 6.8
3 Armenia 7.4
4 United Arab Emirates 6.2
... ... ...
157 Vietnam 6.3
158 Yemen 4.3
159 South Africa 7.1
160 Zambia 6.4
161 Zimbabwe 5.6

162 rows × 2 columns

In [77]:
df['overall rank'] = df['average score'].rank(ascending=False, method="max")
In [78]:
df.columns
Out[78]:
Index(['year', 'countries', 'region', 'score', 'rank', 'quartile', 'homicide',
       'disappearances', 'safety and security', 'foreign movement', 'religion',
       'political association', 'internet', 'expression', 'identity',
       'personal freedom score', 'presonal freedom rank', 'government',
       'legal', 'trade', 'entrepreneurship', 'business regulation',
       'economic freedom score', 'economic freedom rank', 'average score',
       'overall rank'],
      dtype='object')
In [79]:
df['classification'] = ''

def classification(df, countriescolumn):
    countries = df[countriescolumn].unique()
    for c in countries:
        country_idx = np.where(df[countriescolumn] == c)
        overall = df[df['countries'] == c]['average score'].mean()
        if overall >= 8.75:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "definition of freedom"
        elif overall >= 8.5:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "elite freedom"
        elif overall >= 8.25:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "very high freedom"
        elif overall >= 8:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "high freedom"
        elif overall >= 7.5: 
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "medium freedom"
        elif overall >= 7: 
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "average freedom"
        elif overall >= 6.5:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "slightly below average freedom"
        elif overall >= 6:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "mediocre freedom"
        elif overall >= 5:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "poor freedom"
        elif overall >= 4:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "very poor freedom"
        else:
            for i in country_idx:
                for j in i:
                    df.at[j, 'classification'] = "not free"
    return df

df = classification(df, 'countries')
df['classification'].head(5)
Out[79]:
0                      poor freedom
1                    medium freedom
2    slightly below average freedom
3                   average freedom
4                  mediocre freedom
Name: classification, dtype: object
In [80]:
df_lat_lon = pd.merge(df, lat_lon, left_on='countries', right_on='country', how='inner')
df_lat_lon.head()
Out[80]:
year countries region score rank quartile homicide disappearances safety and security foreign movement ... entrepreneurship business regulation economic freedom score economic freedom rank average score overall rank classification latitude longitude country
0 2017 Angola Sub-Saharan Africa 5.40 151.0 4 8.1 10.0 8.1 5.0 ... 8.7 4.9 4.8 158 5.4 152.0 poor freedom -11.202692 17.873887 Angola
1 2017 Albania Eastern Europe 7.84 38.0 1 9.1 10.0 9.3 10.0 ... 9.7 6.7 7.7 30 7.8 40.0 medium freedom 41.153332 20.168331 Albania
2 2017 Argentina Latin America & the Caribbean 6.86 77.0 2 8.0 5.0 8.8 10.0 ... 9.6 5.7 5.7 147 6.8 90.0 slightly below average freedom -38.416097 -63.616672 Argentina
3 2017 Armenia Caucasus & Central Asia 7.42 54.0 2 9.0 10.0 9.1 5.0 ... 9.9 6.9 7.7 27 7.4 57.0 average freedom 40.069099 45.038189 Armenia
4 2017 United Arab Emirates Middle East & North Africa 6.13 128.0 4 9.6 5.0 7.7 0.0 ... 9.6 8.3 7.2 61 6.2 128.0 mediocre freedom 23.424076 53.847818 United Arab Emirates

5 rows × 30 columns

In [81]:
def assign_color(df, countriescolumn):
    colour_by_country = {}
    countries = df[countriescolumn].unique()
    for c in countries:
        overall = df[df['countries'] == c]['average score'].mean()
        if overall >= 8.75:
            colour_by_country[c] = 'black'
        elif overall >= 8.5:
            colour_by_country[c] = 'limegreen'
        elif overall >= 8.25:
            colour_by_country[c] = 'cyan'
        elif overall >= 8:
            colour_by_country[c] = 'darkcyan'
        elif overall >= 7.5: 
            colour_by_country[c] = 'steelblue'
        elif overall >= 7: 
            colour_by_country[c] = 'blue'
        elif overall >= 6.5:
            colour_by_country[c] = 'yellow'
        elif overall >= 6:
            colour_by_country[c] = 'pink'
        elif overall >= 5:
            colour_by_country[c] = 'magenta'
        elif overall >= 4:
            colour_by_country[c] = 'orange'
        else:
            colour_by_country[c] = 'red'
    return colour_by_country
    
colour_by_country = assign_color(df, 'countries')
{k: colour_by_country[k] for k in sorted(colour_by_country.keys())[:5]}
Out[81]:
{'Albania': 'steelblue',
 'Algeria': 'magenta',
 'Angola': 'magenta',
 'Argentina': 'yellow',
 'Armenia': 'blue'}
In [82]:
def assign_markers(df, column):
    markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']

    markers = [x.lower() for x in markers]
    countries = df[column].unique()

    n_markers = len(countries) / len(markers)
    markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
    random.shuffle(markers)
    
    return markers

markers = assign_markers(df, 'countries')
In [83]:
def tooltip_items(columns=[], fields=[]):
    list = []
    hoveritems= p.select(dict(type=HoverTool))
    for c,f in zip(columns, fields):
        list.append((c,f))
    hoveritems.tooltips = list

tooltip_items(["country", "rank", "class", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification", 
                                                                                                 "@{average score}", "@{personal freedom score}", 
                                                                                                 "@{economic freedom score}"])
In [84]:
output_notebook()
Loading BokehJS ...
In [106]:
# clustered
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'   
p = figure(plot_width=1100, plot_height=475, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))

colour_by_country = assign_color(df, 'countries')
markers = assign_markers(df, 'countries')
    
tooltip_items(["country", "rank", "class", "overall score", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification", 
                                                                                                 "@{average score}", "@{personal freedom score}", 
                                                                                                 "@{economic freedom score}"])
hover.mode = 'mouse'

for country, marker in zip(countries, markers):
     c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country],  fill_alpha=0.4, size=12, marker=marker, color=colour_by_country[country])

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')
    
avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])

vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1, 
                  text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03, 
                  text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")

p.add_layout(vlinetext)
p.add_layout(hlinetext)

gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)

slope = Slope(gradient=gradient, y_intercept=y_intercept,
              line_color='green', line_dash='dashed', line_width=3.5)

slopetext = Label(x=0.5, y=y_intercept + 0.5, 
                  text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient, angle_units='deg')

p.add_layout(slope)
p.add_layout(slopetext)

p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'

p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"
show(p)
Out[106]:
'C:\\Users\\james\\hn1.png'
In [105]:
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'   
p = figure(plot_width=1100, plot_height=3800, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))

countries = df['countries'].unique()

colour_by_country = assign_color(df, 'countries')

map_options = GMapOptions(lat=0, lng=-0, map_type="roadmap", zoom=1)

p = gmap("AIzaSyBLk2OaT0XhP0ZJsSb9iDce1AGJ2eBUbfM", map_options)


for c in countries:
    p.circle(x="longitude", y="latitude", size=10, fill_color=colour_by_country[c], fill_alpha=0.5, source=df_lat_lon[df_lat_lon['countries']==c])
    
hover = HoverTool()
p.add_tools(hover)

tooltip_items(["country", "rank", "class", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification", 
                                                                                                 "@{average score}", "@{personal freedom score}", 
                                                                                                 "@{economic freedom score}"])

hover.mode = 'mouse'
p.title.text = 'Human Freedom Index 2017'
p.title.text_font_size="18pt"
p.title.align="center"
show(p)
Out[105]:
'C:\\Users\\james\\hn2.png'
In [104]:
# clustered larger version with legend
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'   
p = figure(plot_width=1100, plot_height=3800, tools=tools_to_show, x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))

colour_by_country = assign_color(df, 'countries')
markers = assign_markers(df, 'countries')
    
tooltip_items(["country", "rank", "class", "overall score", "personal freedom score", "economic freesdom score"], ["@countries", "@{overall rank}", "@classification", 
                                                                                                 "@{average score}", "@{personal freedom score}", 
                                                                                                 "@{economic freedom score}"])

hover.mode = 'mouse'

legend_it = []

for country, marker in zip(countries, markers):
     c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country],  fill_alpha=0.4, size=12, marker=marker, color=colour_by_country[country])
     legend_it.append((country, [c]))

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')
    
avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])

vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1, 
                  text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03, 
                  text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")

p.add_layout(vlinetext)
p.add_layout(hlinetext)

gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)

slope = Slope(gradient=gradient, y_intercept=y_intercept,
              line_color='green', line_dash='dashed', line_width=3.5)

slopetext = Label(x=0.5, y=y_intercept + 0.5, 
                  text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient*100, angle_units='deg')

p.add_layout(slope)
p.add_layout(slopetext)

legend = Legend(items=legend_it)
p.add_layout(legend, 'right')
p.legend.click_policy="hide"
p.legend.label_text_font_size = '8pt'
p.legend.glyph_width = 17
p.legend.glyph_height = 17

p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'

p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"

show(p)
Out[104]:
'C:\\Users\\james\\hn1.png'
In [ ]:
#non-clustered with legend, personal freedom on x, economic freedom on y
''''
tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'   
p = figure(plot_width=1150, plot_height=3800, tools=tools_to_show,x_axis_type=None, y_axis_type=None, x_range=(0, 10), y_range=(0, 10))

markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']

markers = [x.lower() for x in markers]
countries = df['countries'].unique()

n_markers = len(countries) / len(markers)
markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
random.shuffle(markers)

colours = []

def colour():
    hex_chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
    randomColour = '#'
    for i in range(0, 6):
        randomColour = randomColour + choice(hex_chars)
    return randomColour

countries = df['countries'].unique()

for c in countries:
    colours.append(colour())

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("country", "@countries"),
    ("average score", "@{average score}"),
    ("personal freedom score", "$x"),
    ("economic freedom score", "$y"),
]
hover.mode = 'mouse'

legend_it = []

for country, marker, colour in zip(countries, markers, colours):
     c = p.scatter("personal freedom score", "economic freedom score", source=df[df['countries']==country],  fill_alpha=0.4, size=12, marker=marker, color=colour)
     legend_it.append((country, [c]))

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
xaxis = LinearAxis(ticker=ticker)
p.add_layout(xaxis, 'below')

ticker = SingleIntervalTicker(interval=1, num_minor_ticks=2)
yaxis = LinearAxis(ticker=ticker)
p.add_layout(yaxis, 'left')

avg_personal_freedom = round(df['personal freedom score'].mean(), 1)
avg_economic_freedom = round(df['economic freedom score'].mean(), 1)
vline = Span(location=avg_personal_freedom, dimension='height', line_color='black', line_width=1.5, line_dash = "dashed")
hline = Span(location=avg_economic_freedom, dimension='width', line_color='black', line_width=1.5, line_dash = "dashed")
p.renderers.extend([vline, hline])

vlinetext = Label(x=avg_personal_freedom - 0.03, y=0.1, 
                  text='AVG Personal Freedom: ' + str(avg_personal_freedom), text_font_size = "11pt", angle=90, angle_units='deg')
hlinetext = Label(x=0.1, y=avg_economic_freedom + 0.03, 
                  text='AVG Economic Freedom: ' + str(avg_economic_freedom), text_font_size = "11pt")

p.add_layout(vlinetext)
p.add_layout(hlinetext)

gradient, y_intercept = np.polyfit(df['personal freedom score'], df['economic freedom score'], 1)

slope = Slope(gradient=gradient, y_intercept=y_intercept,
              line_color='green', line_dash='dashed', line_width=3.5)

slopetext = Label(x=0.5, y=y_intercept + 0.5, 
                  text='Slope: ' + str(round(gradient, 2)), text_font_size = "11pt", angle=gradient*100, angle_units='deg')

p.add_layout(slope)
p.add_layout(slopetext)

p.xaxis.axis_label = 'personal freedom score'
p.yaxis.axis_label = 'economic freedom score'
p.axis.axis_label_text_font_size = '12pt'
p.axis.axis_label_text_font_style = 'bold'

legend = Legend(items=legend_it)
p.add_layout(legend, 'right')
p.legend.click_policy="hide"
p.legend.label_text_font_size = '8pt'
p.legend.glyph_width = 17
p.legend.glyph_height = 17

p.title.text = "Human Freedom Index 2017"
p.title.align = "center"
p.title.text_font_size="18pt"

show(p)
'''
In [ ]:
#non-iterative version with non-interactive legend and non-custom legend location

'''tools_to_show = 'box_zoom,pan,save,hover,reset,tap,wheel_zoom'   
p = figure(plot_width=1000, plot_height=1000, tools=tools_to_show)

markers = ['Asterisk', 'Circle', 'Circle_Cross', 'Circle_Dot', 'Circle_Y', 'Circle_X', 'Cross', 'Dash', 'Diamond', 'Diamond_Cross','Diamond_Dot', 'Dot', 'Hex', 'Hex_Dot',
'Inverted_Triangle', 'Plus', 'Square', 'Square_Cross', 'Square_Dot', 'Square_Pin', 'Square_X', 'Triangle', 'Triangle_Dot', 'Triangle_Pin', 'X', 'Y']

markers = [x.lower() for x in markers]
countries = df['countries'].unique()

n_markers = len(countries) / len(markers)
markers = [i for i in markers for j in range(int(math.ceil(n_markers)))]
random.shuffle(markers)

colours = []

def colour():
    hex_chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
    randomColour = '#'
    for i in range(0, 6):
        randomColour = randomColour + choice(hex_chars)
    return randomColour

countries = df['countries'].unique()

for c in countries:
    colours.append(colour())

hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("country", "@countries"),
    ("personal freedom score", "$x"),
    ("economic freedom score", "$y"),
]
hover.mode = 'mouse'

sp = p.scatter("personal freedom score", "economic freedom score", source=df, legend_field="countries",  fill_alpha=0.4, size=12,
          marker=factor_mark('countries', markers, countries),
          color=factor_cmap('countries', palette=colours, factors=countries))



p.legend.click_policy="hide"
show(p)
'''
In [ ]: