%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import matplotlib.ticker as mtick
from matplotlib.ticker import PercentFormatter
from matplotlib.pyplot import figure
import seaborn as sns
import pandas as pd

df = pd.read_csv(r"C:\Users\james\Downloads\McDonalds Nutrition\menu.csv")
df.head()

df.columns

Index(['Category', 'Item', 'Serving Size', 'Calories', 'Calories from Fat',
       'Total Fat', 'Total Fat (% Daily Value)', 'Saturated Fat',
       'Saturated Fat (% Daily Value)', 'Trans Fat', 'Cholesterol',
       'Cholesterol (% Daily Value)', 'Sodium', 'Sodium (% Daily Value)',
       'Carbohydrates', 'Carbohydrates (% Daily Value)', 'Dietary Fiber',
       'Dietary Fiber (% Daily Value)', 'Sugars', 'Protein',
       'Vitamin A (% Daily Value)', 'Vitamin C (% Daily Value)',
       'Calcium (% Daily Value)', 'Iron (% Daily Value)'],
      dtype='object')

calories = df['Calories']

n_bins = 8
fig, axs = plt.subplots(1, 1, figsize=(20, 6))
fig.suptitle('Average Calories Histogram', fontsize = 16)

axs.axvline(calories.mean(), color='w', linestyle='dashed', linewidth=1)

N, bins, patches = axs.hist(calories, bins=n_bins, color='green', edgecolor='black')
fracs = N / N.max()
norm = colors.Normalize(fracs.min(), fracs.max())
for thisfrac, thispatch in zip(fracs, patches):
    color = plt.cm.viridis(norm(thisfrac))
    thispatch.set_facecolor(color)
 
axs.yaxis.set_major_formatter(PercentFormatter(xmax=len(calories)))

axs.set_ylabel('Percentage', fontsize=14, labelpad=15)
axs.set_xlabel('Calories', fontsize=14)

axs.tick_params(labelsize=12)

axs.tick_params(axis='x', labelrotation=45)

major_ticks = np.arange(0, max(axs.get_ylim()), 15)

axs.set_yticks(major_ticks)

axs.set_facecolor('k')
fig.set_facecolor('w')

df2= pd.read_csv(r"C:\Users\james\Downloads\McDonalds Nutrition\menu.csv")
df2.drop(['Serving Size'], axis=1, inplace=True)
df2[df2.columns[2:]] = df2[df2.columns[2:]].astype(int)
df2.head()

val_vars=['Category', 'Item']
other_vars = df2.columns.difference(val_vars)
df2 = pd.melt(df2, id_vars=val_vars, value_vars=other_vars)
df2 = df2.sort_values('value', ascending = False).reset_index(drop=True)
df2.head()

fig, ax = plt.subplots(figsize=(20, 6))
sns.set_style("whitegrid")
bplot = sns.boxplot(y='value', x='Category', data=df2[df2['variable'] == 'Calories'], width=0.75, color='linen')
splot = sns.stripplot(y='value', x ='Category', data=df2[df2['variable'] == 'Calories'], jitter=True, marker='o', alpha=0.5, color='r')
plt.xticks(rotation='vertical', fontsize=12)

ax.set_xlabel('Categories', fontsize=14)
ax.set_ylabel('Calories', fontsize=14)
plt.title('Calories Boxplot and Jitter', fontsize=14)
mediancalories = int(df2['value'].loc[df2['variable']=='Calories'].median())
ax.axhline(mediancalories, color='k', linestyle='dashed', linewidth=2)
ax.text(0.27, mediancalories -200, 'Median: ' f'{mediancalories:,}', size=12, color='k')
ax.set_facecolor('whitesmoke')
ax.grid(color='k')
fig.set_facecolor('w')

categories = df2['Category'].unique()
variables = df2['variable'].unique()
variables_percentage = []
variables_total = []

for j, variable in enumerate(variables):
    if variable.find('%') > -1:
        variables_percentage.append(variable)

for j, variable in enumerate(variables):
    if variable.find('%') == -1:
        variables_total.append(variable)

df2 = df2.sort_values('value', ascending = False)

categories

array(['Chicken & Fish', 'Breakfast', 'Beef & Pork', 'Smoothies & Shakes',
       'Salads', 'Snacks & Sides', 'Coffee & Tea', 'Desserts',
       'Beverages'], dtype=object)

fig, axs = plt.subplots(5, 2, figsize=(30, 30))
g = 0
axs = axs.flatten()
for j, variable in enumerate(variables_percentage):
    color_set = False
    while color_set == False:
        color=np.random.rand(3,)
        k = 0
        for i, c in enumerate(color):
            if c < 0.3:
                k = i + 1
        if k <= 1:
            color_set = True
    value = df2['value'].loc[(df2['Category'] == categories[g]) & (df2['variable'] == variable)]
    sns.violinplot(value, ax=axs[j], orient="v", color=np.random.rand(3,), saturation=0.075)
    sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
    axs[j].set_title(variable, fontsize=14)
    mean = int(value.mean())
    axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
    axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}', 
            size=14, color='w')
    axs[j].tick_params(labelsize=12)
    axs[j].set_ylabel('Value', fontsize=14)
    axs[j].set_facecolor('k')
    
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(categories[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')
plt.show()

fig, axs = plt.subplots(5, 2, figsize=(30, 40))
        
g = 1
axs = axs.flatten()
for j, variable in enumerate(variables_total):
    color_set = False
    while color_set == False:
        color=np.random.rand(3,)
        k = 0
        for i, c in enumerate(color):
            if c < 0.3:
                k = i + 1
        if k <= 1:
            color_set = True
    value = df2['value'].loc[(df2['Category'] == categories[g]) & (df2['variable'] == variable)]
    sns.violinplot(value, ax=axs[j], orient="v", color=color)
    sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
    axs[j].set_title(variable, fontsize=10)
    mean = int(value.mean())
    axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
    axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}', size=14, color='w')
    axs[j].set_facecolor('k')

    
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(categories[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')
plt.show()

fig, axs = plt.subplots(5, 2, figsize=(30, 30))
g = 1
axs = axs.flatten()
for j, category in enumerate(categories):
    color_set = False
    while color_set == False:
        color=np.random.rand(3,)
        k = 0
        for i, c in enumerate(color):
            if c < 0.3:
                k = i + 1
        if k <= 1:
            color_set = True
    value = df2['value'].loc[(df2['Category'] == category) & (df2['variable'] == variables[j])]
    sns.violinplot(value, ax=axs[j], orient="v", color=np.random.rand(3,), saturation=0.075)
    sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
    axs[j].set_title(category, fontsize=14)
    mean = int(value.mean())
    axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
    axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}', 
            size=14, color='w')
    axs[j].tick_params(labelsize=12)
    axs[j].set_ylabel('Value', fontsize=14)
    axs[j].set_facecolor('k')
    
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(variables[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')

plt.show()

df3 = df.drop(['Item', 'Serving Size'], axis=1)
for i, variable in enumerate(variables_percentage):
    if variable in df3:
        df3.drop([variable], inplace=True, axis=1)

g = sns.pairplot(df3, hue='Category', height=8)
g.fig.set_size_inches(15,15)
handles = g._legend_data.values()
labels = g._legend_data.keys()
g._legend.remove()
g.fig.legend(handles=handles, labels=labels, loc='upper center', ncol=9)
sns.set(font_scale=1.5)

c:\users\james\appdata\local\programs\python\python38-32\lib\site-packages\seaborn\distributions.py:283: UserWarning: Data must have variance to compute a kernel density estimate.
  warnings.warn(msg, UserWarning)
c:\users\james\appdata\local\programs\python\python38-32\lib\site-packages\seaborn\distributions.py:283: UserWarning: Data must have variance to compute a kernel density estimate.
  warnings.warn(msg, UserWarning)
c:\users\james\appdata\local\programs\python\python38-32\lib\site-packages\seaborn\distributions.py:283: UserWarning: Data must have variance to compute a kernel density estimate.
  warnings.warn(msg, UserWarning)
c:\users\james\appdata\local\programs\python\python38-32\lib\site-packages\seaborn\distributions.py:283: UserWarning: Data must have variance to compute a kernel density estimate.
  warnings.warn(msg, UserWarning)

variables_total_2 = variables_total
variables_total_2.remove('Calories')

fig, axs = plt.subplots(len(variables_total_2), 1, figsize=(20, 50))
axs = axs.flatten()

for j, v2 in enumerate(variables_total_2):
    df4=df[['Category', 'Calories', v2]]
    df4=df4.pivot_table(index='Category', columns=v2, values='Calories')
    g=sns.heatmap(df4, ax=axs[j], linewidths=0.005, cmap = 'OrRd')
    g.set_facecolor('k')
    
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle('Calories Heatmap VS Other Nutritional Categories', y=0.89, fontsize = 16)

	Category	Item	Serving Size	Calories	Calories from Fat	Total Fat	Total Fat (% Daily Value)	Saturated Fat	Saturated Fat (% Daily Value)	...	Carbohydrates	Carbohydrates (% Daily Value)	Dietary Fiber	Dietary Fiber (% Daily Value)	Sugars	Protein	Vitamin A (% Daily Value)	Calcium (% Daily Value)	Iron (% Daily Value)
0	Breakfast	Egg McMuffin	4.8 oz (136 g)	300	120	13.0	20	5.0	25	...	31	10	4	17	3	17	10	25	15
1	Breakfast	Egg White Delight	4.8 oz (135 g)	250	70	8.0	12	3.0	15	...	30	10	4	17	3	18	6	25	8
2	Breakfast	Sausage McMuffin	3.9 oz (111 g)	370	200	23.0	35	8.0	42	...	29	10	4	17	2	14	8	25	10
3	Breakfast	Sausage McMuffin with Egg	5.7 oz (161 g)	450	250	28.0	43	10.0	52	...	30	10	4	17	2	21	15	30	15
4	Breakfast	Sausage McMuffin with Egg Whites	5.7 oz (161 g)	400	210	23.0	35	8.0	42	...	30	10	4	17	2	21	6	25	10

	Category	Item	variable	value
0	Chicken & Fish	Chicken McNuggets (40 piece)	Sodium	3600
1	Breakfast	Big Breakfast with Hotcakes and Egg Whites (La...	Sodium	2290
2	Breakfast	Big Breakfast with Hotcakes (Large Biscuit)	Sodium	2260
3	Breakfast	Big Breakfast with Hotcakes and Egg Whites (Re...	Sodium	2170
4	Breakfast	Big Breakfast with Hotcakes (Regular Biscuit)	Sodium	2150