In [2]:
'''


UNIVERSITY RANKINGS FROM THE TIMES 2011 - 2016


'''
Out[2]:
'\n\n\n\nUNIVERSITY RANKINGS FROM THE TIMES 2011 - 2016\n\n\n'
In [49]:
import pandas as pd
import plotly.offline as pyo
from plotly.offline import init_notebook_mode, plot_mpl
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
import plotly
import pandas as pd
from random import choice
from plotly.subplots import make_subplots
import math
import numpy as np
import ipywidgets
from ipywidgets import widgets
from ipywidgets import interactive, HBox, VBox
import statistics
pio.renderers.default='jupyterlab'
pyo.init_notebook_mode(connected=True)
In [50]:
df=pd.read_csv(r"C:\Users\james\Downloads\University Rankings\timesData.csv", encoding='latin-1')
In [51]:
df
Out[51]:
world_rank university_name country teaching international research citations income total_score num_students student_staff_ratio international_students female_male_ratio year latitude longitude
0 1 Harvard University United States 99.7 72.4 98.7 98.8 34.5 96.1 20152 8.9 25% NaN 2011 37.090240 -95.712891
1 2 California Institute of Technology United States 97.7 54.6 98.0 99.9 83.7 96 2243 6.9 27% 33 : 67 2011 37.090240 -95.712891
2 3 Massachusetts Institute of Technology United States 97.8 82.3 91.4 99.9 87.5 95.6 11074 9.0 33% 37 : 63 2011 37.090240 -95.712891
3 4 Stanford University United States 98.3 29.5 98.1 99.2 64.3 94.3 15596 7.8 22% 42:58:00 2011 37.090240 -95.712891
4 6 University of Cambridge United Kingdom 90.5 77.7 94.1 94.0 57.0 91.2 18812 11.8 34% 46:54:00 2011 55.378051 -3.435973
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2314 601-800 Yeungnam University South Korea 18.6 24.3 10.9 26.5 35.4 - 21958 15.3 3% 48:52:00 2016 35.907757 127.766922
2315 601-800 Yıldız Technical University Turkey 14.5 14.9 7.6 19.3 44.0 - 31268 28.7 2% 36 : 64 2016 38.963745 35.243322
2316 601-800 Yokohama City University Japan 24.0 16.1 10.2 36.4 37.9 - 4122 3.7 3% NaN 2016 36.204824 138.252924
2317 601-800 Yokohama National University Japan 20.1 23.3 16.0 13.5 40.4 - 10117 12.1 8% 28 : 72 2016 36.204824 138.252924
2318 601-800 Yuan Ze University Taiwan 16.2 17.7 18.3 28.6 39.8 - 8663 20.6 4% 43:57:00 2016 23.697810 120.960515

2319 rows × 16 columns

In [52]:
df.dtypes
Out[52]:
world_rank              object
university_name            object
country                    object
teaching                  float64
international              object
research                  float64
citations                 float64
income                    float64
total_score                object
num_students                int64
student_staff_ratio       float64
international_students     object
female_male_ratio          object
year                        int64
latitude                  float64
longitude                 float64
dtype: object
In [53]:
df['country'] = df['country'].astype(str)
df['country'].dtypes
Out[53]:
dtype('O')
In [54]:
df_with_count = df.groupby(['country', 'year', 'university_name'], as_index=False).mean()
df_with_count['count'] = ""
for c in df_with_count['country'].unique():
    for y in df_with_count['year'].unique():
        for i in  df_with_count[(df_with_count['country']==c) & (df_with_count['year']==y)].index:
            df_with_count.loc[i, 'count'] = 1

df_with_count['uni_count'] = ""
for u in df_with_count['university_name'].unique():
    for y in df_with_count['year'].unique():
        for i in  df_with_count[(df_with_count['university_name']==u) & (df_with_count['year']==y)].index:
            df_with_count.loc[i, 'uni_count'] = 1
            
df_with_count['count'] = pd.to_numeric(df_with_count['count']) 

df_with_count['running_country_sum_year'] = df_with_count.groupby(['country', 'year'])['count'].apply(lambda x: x.cumsum())

df_with_count['running_country_sum_year'] = pd.to_numeric(df_with_count['running_country_sum_year'])

df_with_count['max_country_sum_year'] = df_with_count.groupby(['country', 'year'])['running_country_sum_year'].transform(max)

df_with_count['max_country_sum_year'] = pd.to_numeric(df_with_count['max_country_sum_year'])

df_with_count['running_country_sum'] = df_with_count.groupby(['country'])['count'].apply(lambda x: x.cumsum())

df_with_count['running_country_sum'] = pd.to_numeric(df_with_count['running_country_sum'])

df_with_count['countries total times on list'] = df_with_count.groupby(['country'])['running_country_sum'].transform(max)

df_with_count['countries total times on list'] = pd.to_numeric(df_with_count['countries total times on list'])

df_with_count['uni_count'] = pd.to_numeric(df_with_count['uni_count'])
            
df_with_count['running_uni_sum'] = df_with_count.groupby(['university_name'])['uni_count'].apply(lambda x: x.cumsum())

df_with_count['running_uni_sum'] = pd.to_numeric(df_with_count['running_uni_sum'])

df_with_count['uni total times on list'] = df_with_count.groupby(['university_name'])['running_uni_sum'].transform(max)

df_with_count['uni total times on list'] = pd.to_numeric(df_with_count['uni total times on list'])
     
df_with_count = df_with_count.sort_values('income', ascending = False)



df_count_country_with_year= df_with_count.groupby(['country', 'year'], as_index=False).mean()
df_count_country_with_year['income'] = round(df_count_country_with_year['income'], 2)

df_count_country = df_with_count.groupby('country', as_index=False).mean()

df_university_count = df.groupby(['year', 'university_name', 'country'], as_index=False).mean()
df_university_count['count'] = ""
for u in df_university_count['university_name'].unique():
    for y in df_university_count['year'].unique():
        for i in  df_university_count[(df_university_count['university_name']==u) & (df_university_count['year']==y)].index:
            df_university_count.loc[i, 'count'] = int(len(df_with_count[(df_with_count['university_name']==u) & (df_with_count['year']==y)]['university_name']))
            
df_university_count['running_sum'] = df_university_count.groupby(['university_name'])['count'].apply(lambda x: x.cumsum())

df_university_count['total times on list'] = df_university_count.groupby(['university_name'])['running_sum'].transform(max)
In [82]:
df_with_count['income and research average'] = (df_with_count['research'] + df_with_count['income'])/2
df_count_country['income and research average'] = (df_count_country['research'] + df_count_country['income'])/2
df_university_count['income and research average'] = (df_university_count['research'] + df_university_count['income'])/2
df_count_country_with_year['income and research average'] = round((df_count_country_with_year['research'] + df_count_country_with_year['income'])/2, 2)
In [56]:
df_with_count_2016 = df_with_count.query('year==2016')
df_with_count_2016['income and research average'] = (df_with_count_2016['research'] + df_with_count_2016['income'])/2
df_with_count_2016 = df_with_count_2016.sort_values('income and research average', ascending=False)
<ipython-input-56-28503ba25fd6>:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [84]:
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{'type': 'xy'}, {'type':'xy'}],
           [{"colspan": 2}, None]],
vertical_spacing=0.35)

fig.add_trace(go.Bar(
                x=df_with_count_2016['university_name'],
                y= df_with_count_2016['income and research average'],
                customdata=np.stack((list(round(df_with_count_2016['income'], 2)), 
                                     list(df_with_count_2016['uni total times on list']), list(df_with_count_2016['research']), df_with_count_2016['income and research average']), axis=-1),
                text=df_with_count_2016['country'],
                hovertemplate =
     '<br><b>University Name</b>: %{x}' +
    '<br><b># of Appearances from 2011 - 2016</b>: %{customdata[1]}' +
    '<br><b>Income Score/100</b>: %{customdata[0]}' +
    '<br><b>Research Score/100</b>: %{customdata[2]}' +
    '<br><b>Income and Research Score/100</b>: %{customdata[3]}'
    '<br><b>Country</b>: %{text}<extra></extra>',  
                marker=dict(color= df_with_count_2016['income and research average'],
                colorscale='Inferno'),
    showlegend=False)
        ,
                 row=1, col=1
)

fig.update_layout(xaxis={'categoryorder':'total descending'})

fig.add_annotation(
           x=df_with_count_2016['university_name'].loc[int(len(df_with_count_2016['university_name'])/2)],
           y=round(df_with_count_2016['income and research average'].mean(), 2)+5,
           xref="x1",
           yref="y1",
           text="Average of research and income / 100: " + str(round(df_with_count_2016['income and research average'].mean(), 2)),
           showarrow=False,
           font=dict(
               family="Courier New, monospace",
               size=20,
               color="white"
           ))
fig['layout']['annotations'][0].update({'font': {'size': 12}})

colours = []

def colour():
    hex_chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
    randomColour = '#'
    for i in range(0, 6):
        randomColour = randomColour + choice(hex_chars)
    return randomColour

df_with_count_2016 = df_with_count_2016.sort_values('countries total times on list', ascending = False)

countries = df_with_count_2016['country'].unique()

for c in countries:
    colours.append(colour())
    

for i, c in enumerate(countries):
  fig.add_trace(go.Scatter(y = df_with_count_2016[df_with_count_2016['country']==c]['research'], x = df_with_count_2016[df_with_count_2016['country']==c]['income'], mode = 'markers',
                                     marker = dict(
                size=df_with_count_2016[df_with_count_2016['country']==c]['num_students']/100,
                color = colours[i],
                line_color='rgb(40,40,40)',
                line_width=0.5,
                sizemode = 'area',
                colorscale='Inferno',
                reversescale=False),
                customdata=np.stack((list(round(df_with_count_2016[df_with_count_2016['country']==c]['income'], 2)), 
                                         list(df_with_count_2016[df_with_count_2016['country']==c]['uni total times on list']), list(df_with_count_2016[df_with_count_2016['country']==c]['research']), list(df_with_count_2016[df_with_count_2016['country']==c]['income and research average'])), axis=-1),
                    text=np.stack((list(df_with_count_2016[df_with_count_2016['country']==c]['university_name']), list(df_with_count_2016[df_with_count_2016['country']==c]['country'])), axis=-1),
                    hovertemplate =
        '<br><b>University Name</b>: %{text[0]}' +
        '<br><b># of Appearances from 2011 - 2016</b>: %{customdata[1]}' +
        '<br><b>Income Score/100</b>: %{customdata[0]}' +
        '<br><b>Research Score/100</b>: %{customdata[2]}' +
        '<br><b>Average Income and Research Score/100</b>: %{customdata[3]}'
        '<br><b>Country</b>: %{text[1]}<extra></extra>', 
                legendgroup = c,
                showlegend=True,
                name=str(c),
                opacity=.9),
            row=1, col=2)

fig['layout'].update(
    annotations=[
        dict(
     x=92, y=98, # annotation point
        xref='x2', 
        yref='y2',
        text="",
        showarrow=False,
        font=dict(
        color="white",
                size=14
        ),
 
    ),
     dict(
     x=92, y=98, # annotation point
        xref='x2', 
        yref='y2',
        text="High income, high research",
        showarrow=False,
        font=dict(
        color="white",
                size=14
        ),
 
    ),
    dict(
     x=8, y=5,
    xref='x2',
    yref='y2',
    text="Low income, low research",
    showarrow=False,
    font=dict(
    color="white",
            size=14
    ),
), dict(
    x=5, y=98,
    xref = 'x2',
    yref = 'y2',
    text="High research, low income",
    showarrow=False,
    font=dict(
    color="white",
            size=14
    ),
    ),
 dict(
     x=92, y=5, # annotation point
        xref='x2', 
        yref='y2',
        text="High income, low research",
        showarrow=False,
        font=dict(
        color="white",
                size=14
        ),
 
    ),
    ])

fig.add_annotation(
           x=5,
           y=round(df_with_count_2016['research'].mean(), 2)+5,
           xref="x2",
           yref="y2",
           text="Average income: " + str(round(df_with_count_2016['income'].mean(), 2)),
           showarrow=False,
           font=dict(
               family="Courier New, monospace",
               size=12,
               color="white"
           ))
fig['layout']['annotations'][1].update({'font': {'size': 12}})

fig.add_annotation(
           x=0,
           y=17,
           xref="x2",
           yref="y2",
           text="Average research: " +str(round(df_with_count_2016['research'].mean(), 2)),
            textangle=-90,
           showarrow=False,
           font=dict(
               family="Courier New, monospace",
               size=12,
               color="white"
           ))
fig['layout']['annotations'][1].update({'font': {'size': 14}})

df_with_count_2016_mean = df_with_count_2016.groupby('country', as_index=False).mean()

fig.add_trace(go.Choropleth(
        locations = df_with_count_2016_mean['country'],
        locationmode = 'country names',
    z= df_with_count_2016_mean['income and research average'],
        colorscale = 'Inferno',
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    customdata=np.stack((list(round(df_with_count_2016_mean['income'], 2)), 
                                     list(df_with_count_2016_mean['max_country_sum_year']), list(round(df_with_count_2016_mean['research'], 2)), (list(round(df_with_count_2016_mean['income and research average'], 2)))), axis=-1),
                text=df_with_count_2016_mean['country'], 
                hovertemplate =
    '<br><b># of Appearances in 2016</b>: %{customdata[1]}' +
    '<br><b>Income Score/100</b>: %{customdata[0]}' +
    '<br><b>Research Score/100</b>: %{customdata[2]}' +
    '<br><b>Average Income and Research Score/100</b>: %{customdata[3]}'
    '<br><b>Country</b>: %{text}<extra></extra>',  
    showlegend=False,
    showscale=False
))


fig.add_shape(go.layout.Shape(type="line",
                                        yref="y1",
                                        xref="x1",
                                        x0=0,
                                        y0= statistics.mean([(x + y)/2 for x, y in zip(df_with_count_2016['income'], df_with_count_2016['research'])]),
                                        x1=len(df_with_count_2016['university_name']),
                                        y1= statistics.mean([(x + y)/2 for x, y in zip(df_with_count_2016['income'], df_with_count_2016['research'])]),
                                        line=dict(color='blue', width=3, dash='dash')
                             )
             )

fig.add_shape(go.layout.Shape(type="line",
                                        yref="y2",
                                        xref="x2",
                                        x0=0,
                                        y0=df_with_count_2016['research'].mean(),
                                        x1=100,
                                        y1=df_with_count_2016['research'].mean(),
                                        line=dict(color='blue', width=3, dash='dash')
                             )
             )

fig.add_shape(go.layout.Shape(type="line",
                                        yref="y2",
                                        xref="x2",
                                        x0=df_with_count_2016['income'].mean(),
                                        y0=0,
                                        x1=df_with_count_2016['income'].mean(),
                                        y1=100,
                                        line=dict(color='blue', width=3, dash='dash')))



fig.update_xaxes(showticklabels=False, row=1, col=1)

fig.update_layout(
    hoverlabel=dict(
        bgcolor="white", 
        font_size=16, 
        font_family="Rockwell"
    )
)

fig.update_layout(plot_bgcolor='grey')

fig.update_layout(height=2000, width=1250)

fig.update_layout(
    title={
        'text': "Income and Research Average Score / 100",
        'y':1,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    font=dict(
        color="white",
        size=18)
)


fig.update_layout(
                  template='plotly_dark')

fig.show()
In [70]:
df_with_count_2016_mean.columns
Out[70]:
Index(['country', 'year', 'teaching', 'research', 'citations', 'income',
       'num_students', 'student_staff_ratio', 'latitude', 'longitude', 'count',
       'uni_count', 'running_country_sum_year', 'max_country_sum_year',
       'running_country_sum', 'countries total times on list',
       'running_uni_sum', 'uni total times on list',
       'income and research average'],
      dtype='object')
In [81]:
df_with_count_2016 = df_with_count[df_with_count['year']==2016]

f = go.FigureWidget([go.Scatter(y = df_with_count_2016['income'], x = df_with_count_2016['income'], mode = 'markers',
                                 marker = dict(
            size=df_with_count_2016['num_students']/100,
            color = df_with_count_2016['income'],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area',
            colorscale='Inferno',
            reversescale=False,
            colorbar={'title': 'Income Score/100', "tickvals": list(range(0, 100, 10)), "nticks": 10, 'ticksuffix': '%'
                     },
        ),
           customdata=np.stack((list(round(df_with_count_2016['income'], 2)), 
                                     list(df_with_count_2016['uni total times on list']), list(round(df_with_count_2016['research'], 2)), list(df_with_count_2016['num_students'])), axis=-1),
                text=df_with_count_2016['university_name'],
                hovertemplate =
    '<br><b>University</b>: %{text}' +
    '<br><b>Times on List</b>: %{customdata[1]}' +
    '<br><b># Students</b>: %{customdata[3]}' +
    '<br><b>x</b>: %{x}' +
    '<br><b>y</b>: %{y}<extra></extra>'                              
                                ,                      
                                 )]) 
scatter = f.data[0]
N = len(df_with_count_2016)
scatter.x = scatter.x + np.random.rand(N)/10 *(df_with_count_2016['income'].max() - df_with_count_2016['income'].min())
scatter.y =  scatter.y + np.random.rand(N)/10 *(df_with_count_2016['income'].max() - df_with_count_2016['income'].min())
scatter.marker.opacity = 0.5




def update_axes(xaxis, yaxis):
    scatter = f.data[0]
    scatter.x = df_with_count_2016[xaxis]
    scatter.y = df_with_count_2016[yaxis]
    with f.batch_update():
        f.layout.xaxis.title = xaxis
        f.layout.yaxis.title = yaxis
        scatter.x = scatter.x + np.random.rand(N)/10 *(df_with_count_2016[xaxis].max() - df_with_count_2016[xaxis].min())
        scatter.y = scatter.y + np.random.rand(N)/10 *(df_with_count_2016[yaxis].max() - df_with_count_2016[yaxis].min())
        
        
axis_dropdowns = interactive(update_axes, yaxis = df_with_count_2016[['teaching', 'research',
       'citations', 'income', 'num_students', 'student_staff_ratio']].columns, xaxis = df_with_count_2016[['teaching', 'research',
       'citations', 'income', 'num_students', 'student_staff_ratio']].columns)


t = go.FigureWidget([go.Table(
    header=dict(values=['country', 'year', 'university_name', 'teaching', 'research',
       'citations', 'income', 'num_students', 'student_staff_ratio','uni total times on list'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 5),
    cells=dict(values=[df_with_count_2016[col] for col in ['country', 'year', 'university_name', 'teaching', 'research',
       'citations', 'income', 'num_students', 'student_staff_ratio', 'uni total times on list']],
               fill = dict(color='#F5F8FF'),
               align = ['left'] * 5))])

def selection_fn(trace,points,selector):
    t.data[0].cells.values = [df_with_count_2016.iloc[points.point_inds][col] for col in ['country', 'year', 'university_name', 'teaching', 'research',
       'citations', 'income', 'num_students', 'student_staff_ratio', 'uni total times on list']]

scatter.on_selection(selection_fn)

VBox((HBox(axis_dropdowns.children),f,t))
In [83]:
fig = px.scatter_geo(df_count_country_with_year.sort_values('year', ascending=True), lat='latitude', lon='longitude', size=[1]*df_count_country_with_year['country'].size, color="income and research average",
                     hover_name="country", hover_data={
                     'income': True,
                     'research': True,
                     'income and research average': True,
                      'max_country_sum_year': True,
                      'count': False,
                     'latitude': False,
                     'longitude': False,
                     }, animation_frame="year", projection='natural earth', 
                     opacity=0.45
                    )

fig.update_layout(
    autosize=False,
    width=900,
    height=600,)

fig.update_layout(
    title_text='Universities by Country Median Income Score/100', title_x=0.5)

fig.update_layout(
                  template='plotly_dark')


fig.show()
In [38]:
df_count_country_with_year
Out[38]:
country year teaching research citations income num_students student_staff_ratio latitude longitude count uni_count running_country_sum_year max_country_sum_year running_country_sum countries total times on list running_uni_sum uni total times on list income and research average
0 Argentina 2016 16.000000 9.000000 12.500000 28.60 108373.000000 38.100000 -38.416097 -63.616672 1.0 1.0 1.0 1.0 1.0 1.0 1.000000 1.000000 18.800000
1 Australia 2011 49.283333 51.833333 69.800000 58.18 37779.333333 28.850000 -25.274398 133.775136 1.0 1.0 3.5 6.0 3.5 116.0 1.000000 6.000000 55.006667
2 Australia 2012 29.019048 31.952381 42.338095 49.14 26553.952381 26.147619 -25.274398 133.775136 1.0 1.0 11.0 21.0 17.0 116.0 1.285714 4.714286 40.546190
3 Australia 2013 34.968421 40.921053 53.805263 53.44 26095.947368 27.442105 -25.274398 133.775136 1.0 1.0 10.0 19.0 37.0 116.0 2.210526 5.052632 47.180526
4 Australia 2014 29.857895 34.389474 56.631579 54.26 26341.157895 28.173684 -25.274398 133.775136 1.0 1.0 10.0 19.0 56.0 116.0 3.157895 5.105263 44.324737
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
244 United States 2012 47.215000 45.938750 73.341250 47.13 23853.687500 14.303750 37.090240 -95.712891 1.0 1.0 40.5 80.0 69.5 493.0 1.350000 4.925000 46.534375
245 United States 2013 52.444304 51.430380 79.150633 47.65 24365.518987 13.617722 37.090240 -95.712891 1.0 1.0 40.0 79.0 149.0 493.0 2.227848 5.126582 49.540190
246 United States 2014 46.854217 44.490361 78.269880 47.79 23865.566265 13.593976 37.090240 -95.712891 1.0 1.0 42.0 83.0 230.0 493.0 3.084337 5.048193 46.140181
247 United States 2015 47.693103 45.572414 78.600000 48.83 23939.643678 13.504598 37.090240 -95.712891 1.0 1.0 44.0 87.0 315.0 493.0 3.885057 4.873563 47.201207
248 United States 2016 41.148889 37.815556 71.886667 44.56 22021.837037 14.394815 37.090240 -95.712891 1.0 1.0 68.0 135.0 426.0 493.0 3.637037 3.637037 41.187778

249 rows × 19 columns

In [73]:
df_with_count_2016_mean
Out[73]:
country year teaching research citations income num_students student_staff_ratio latitude longitude count uni_count running_country_sum_year max_country_sum_year running_country_sum countries total times on list running_uni_sum uni total times on list income and research average
0 Argentina 2016.0 16.000000 9.000000 12.500000 28.600000 108373.000000 38.100000 -38.416097 -63.616672 1.0 1.0 1.0 1.0 1.0 1.0 1.000000 1.000000 18.800000
1 Australia 2016.0 28.761290 32.832258 60.603226 48.806452 23987.967742 26.919355 -25.274398 133.775136 1.0 1.0 16.0 31.0 101.0 116.0 3.741935 3.741935 40.819355
2 Austria 2016.0 27.885714 22.842857 61.771429 47.771429 19211.000000 22.957143 47.516231 14.550072 1.0 1.0 4.0 7.0 27.0 30.0 4.285714 4.285714 35.307143
3 Belarus 2016.0 20.200000 8.700000 6.000000 28.000000 29303.000000 10.600000 53.709807 27.953389 1.0 1.0 1.0 1.0 1.0 1.0 1.000000 1.000000 18.350000
4 Belgium 2016.0 36.600000 43.171429 64.257143 73.528571 24237.285714 31.057143 50.503887 4.469936 1.0 1.0 4.0 7.0 33.0 36.0 5.142857 5.142857 58.350000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
61 Uganda 2016.0 15.100000 10.600000 70.700000 28.000000 37340.000000 29.400000 1.373333 32.290275 1.0 1.0 1.0 1.0 1.0 1.0 1.000000 1.000000 19.300000
62 Ukraine 2016.0 24.800000 9.950000 2.950000 28.400000 18548.000000 8.950000 48.379433 31.165580 1.0 1.0 1.5 2.0 1.5 2.0 1.000000 1.000000 19.175000
63 United Arab Emirates 2016.0 15.600000 12.700000 17.500000 31.700000 7223.500000 13.300000 23.424076 53.847818 1.0 1.0 1.5 2.0 1.5 2.0 1.000000 1.000000 22.200000
64 United Kingdom 2016.0 31.898649 30.440541 60.759459 34.386486 17663.364865 17.120270 55.378051 -3.435973 1.0 1.0 37.5 74.0 248.5 285.0 3.851351 3.851351 32.413514
65 United States 2016.0 41.148889 37.815556 71.886667 44.557778 22021.837037 14.394815 37.090240 -95.712891 1.0 1.0 68.0 135.0 426.0 493.0 3.637037 3.637037 41.186667

66 rows × 19 columns

In [167]:
fig = px.bar(df_count_country, y='income and research average', x='country', text='countries total times on list', color='income and research average', color_continuous_scale='Jet')
fig.update_traces(texttemplate='%{text:}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_layout(
    title_text='Country Average Income Score/100 and # of Universities from 2011 - 2016'
    , title_x=0.5)
fig.update_layout(
    autosize=False,
    width=900,
    height=600,)

fig.add_shape(
        # Line Vertical
        dict(
            type="line",
            x0=-0.7,
            y0=df_count_country['income and research average'].mean(),
            x1=len(df_count_country['country']),
            y1=df_count_country['income and research average'].mean(),
            line=dict(
                color="LightSeaGreen",
                width=4,
                dash="dashdot",
            )
))

fig.update_layout(
                  template='plotly_dark')

fig.add_annotation(
           x=df_count_country['country'].loc[int(len(df_count_country['country'])/2)+1],
           y=round(df_count_country['income and research average'].mean(), 2)+5,
           xref="x1",
           yref="y1",
           text="Average of research and income / 100: " + str(round(df_count_country['income and research average'].mean(), 2)),
           showarrow=False,
           font=dict(
               family="Courier New, monospace",
               size=12,
               color="white"
           ))
fig.show()
In [160]:
df_university_count_2 = df_university_count.groupby('university_name', as_index=False).mean()
fig = px.bar(df_university_count_2, y='income and research average', x='university_name', text='total times on list', color='income and research average', color_continuous_scale='jet',
              hover_name="university_name", hover_data={
                     'income and research average': True,
                     'total times on list': True,})

fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.update_layout(
    title_text='Universities by Average Income Score/100 and # of Universities from 2011 - 2016', title_x=0.5
    )
fig.update_layout(
    autosize=False,
    width=900,
    height=700,
    font=dict(
        size=10,
    ))

fig.add_shape(
        dict(
            type="line",
            x0=-0.7,
            y0=df_university_count_2['income and research average'].mean(),
            x1=len(df_university_count_2['university_name']),
            y1=df_university_count_2['income and research average'].mean(),
            line=dict(
                color="LightSeaGreen",
                width=4,
                dash="dashdot",
            )
))

fig.add_annotation(
           x=df_university_count['university_name'].loc[int(len(df_university_count['university_name'])/2)],
           y=round(df_university_count['income and research average'].mean(), 2)+2,
           xref="x1",
           yref="y1",
           text="Average of research and income / 100: " + str(round(df_university_count['income and research average'].mean(), 2)),
           showarrow=False,
           font=dict(
               family="Courier New, monospace",
               size=12,
               color="white"
           ))

fig.update_layout(
                  template='plotly_dark')
fig.update_xaxes(tickangle=45)
fig.show()
In [134]:
df_all_years_groupby = df_with_count.groupby(['university_name', 'country'], as_index=False).mean()
df_all_years_groupby = df_all_years_groupby.sort_values('countries total times on list', ascending = False)
country_names=df_all_years_groupby ['country'].unique()

country_names = df_all_years_groupby ['country'].unique()

country_appearances = {}
n=len(country_names)
n_cols=3

def create_bar(df, row, col, c):
    fig.add_trace(
            go.Bar(
                x=df[df['country']==c]['university_name'],
                y=df[df['country']==c]['income and research average'],
                customdata=np.stack((list(round(df[df['country']==c]['income and research average'], 2)), 
                                     list(df[df['country']==c]['uni total times on list'])), axis=-1),
                text=df[df['country']==c]['country'],
                hovertemplate =
    '<br><b>University Name</b>: %{x}' +
    '<br><b># of Appearances</b>: %{customdata[1]}' +
    '<br><b>Average Income and Resarch Score/100</b>: %{y}<extra></extra>',
                name=str(c),
                marker=dict(color=df[df['country']==c]['income and research average'])
        ),
            row=row, col=col,)
def create_line(df, row, col, c):
        x = list(df[df['country']==c]['university_name'])
        y = [round(df[df['country']==c]['income and research average'].mean(), 2)] * len(df[df['country']==c]['university_name'])
        fig.add_trace(go.Scatter(
                x=x,
                y=y,
                hovertemplate =
    '<br><b>Average Income Score</b>: %{y:}</br><extra></extra>',
            showlegend=False,
            mode="lines"),
            row=row, col=col,)
        
def annotate(df, i):
    x = list(df[df['country']==c]['university_name'])
    y = [round(df[df['country']==c]['income and research average'].mean(), 2)] * len(df[df['country']==c]['university_name'])
    fig['layout']['yaxis'+str(i+1)].update(title='', range=[0, 100], dtick=20, autorange=False)
    fig.add_annotation(
       x=x[int(len(x)/2)-1],
       y=round(df[df['country']==c]['income and research average'].mean(), 2)+10,
       xref="x" + str(i+1),
       yref="y" + str(i+1),
       text="Average: " + str(round(df[df['country']==c]['income and research average'].mean(), 2)),
       showarrow=False,
       font=dict(
           family="Courier New, monospace",
           size=16,
           color="white"
       ))
    fig['layout']['annotations'][i].update({'font': {'size': 12}})
        
title_items = []
for c, t in zip(df_all_years_groupby['country'], df_all_years_groupby['countries total times on list']):
    country_appearances[c] = t
    
for k, v in country_appearances.items():
    title_items.append(str(k) + ', ' + str(int(v)) + ' appearances')

fig = make_subplots(math.ceil(len(country_names[:n])/n_cols), n_cols, shared_yaxes=True,
                   subplot_titles=title_items)

df_all_years_groupby = df_all_years_groupby.sort_values('income and research average', ascending = False)

row = 1
col = 1
for i, c in enumerate(country_names[:n]):
    if col <= n_cols:
        create_bar(df_all_years_groupby, row, col, c)
        create_line(df_all_years_groupby, row, col, c)
        fig['layout']['yaxis'+str(i+1)].update(title='', range=[0, 100], dtick=20, autorange=False)
        annotate(df_all_years_groupby, i)
        col += 1
    else: 
        col = 1
        row = row + 1
        create_bar(df_all_years_groupby, row, col, c)
        create_line(df_all_years_groupby, row, col, c)
        fig['layout']['yaxis'+str(i+1)].update(title='', range=[0, 100], dtick=20, autorange=False)
        annotate(df_all_years_groupby, i)
        col+=1


fig.update_layout(
    autosize=False,
    width = 1000,
    height=3000,
    font=dict(
        size=10,
    ))

fig.update_xaxes(showticklabels=False)
fig.update_layout(
    hoverlabel=dict(
        bgcolor="white", 
        font_size=16, 
        font_family="Rockwell"
    )
)

fig.update_layout(
                  template='plotly_dark')

fig.update_layout(
    title_text='Average University Income and Research Scores by All Years (2011 - 2016)', title_x=0.5
    )

fig.show()
In [135]:
# average of all years

fig = go.Figure()

fig.add_trace(go.Scattergeo(
        lon = df_count_country['longitude'],
        lat = df_count_country['latitude'],
        marker = dict(
            size=df_count_country['count']*10,
            color = df_count_country['income'],
            line_color='rgb(40,40,40)',
            line_width=0.5,
            sizemode = 'area',
            colorscale='Inferno',
            reversescale=False,
            colorbar={'title': 'Income Score/100', "tickvals": list(range(0, 100, 10)), "nticks": 10, 'ticksuffix': '%'
                     },
        ),
    text=df_count_country['country'],
    customdata=np.stack((list(round(df_count_country['income'], 2)), list(round(df_count_country['count'], 0))), axis=-1),
    hovertemplate =
    '<br><b>Country</b>: %{text}' +
    '<br><b># of Universities</b>: %{customdata[1]}' +
    '<br><b>Average Income Score/100</b>: %{customdata[0]}<extra></extra>',
    opacity=0.7
))
        
    
fig.update_layout(
    autosize=False,
    width=900,
    height=600,)

fig.update_layout(
    title_text='Universities by Country Median Income Score/100', title_x=0.5,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='natural earth'
    ))

fig.show()

my_list