%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
import matplotlib.ticker as mtick
from matplotlib.ticker import PercentFormatter
from matplotlib.pyplot import figure
import seaborn as sns
import pandas as pd
df = pd.read_csv(r"C:\Users\james\Downloads\McDonalds Nutrition\menu.csv")
df.head()
df.columns
calories = df['Calories']
n_bins = 8
fig, axs = plt.subplots(1, 1, figsize=(20, 6))
fig.suptitle('Average Calories Histogram', fontsize = 16)
axs.axvline(calories.mean(), color='w', linestyle='dashed', linewidth=1)
N, bins, patches = axs.hist(calories, bins=n_bins, color='green', edgecolor='black')
fracs = N / N.max()
norm = colors.Normalize(fracs.min(), fracs.max())
for thisfrac, thispatch in zip(fracs, patches):
color = plt.cm.viridis(norm(thisfrac))
thispatch.set_facecolor(color)
axs.yaxis.set_major_formatter(PercentFormatter(xmax=len(calories)))
axs.set_ylabel('Percentage', fontsize=14, labelpad=15)
axs.set_xlabel('Calories', fontsize=14)
axs.tick_params(labelsize=12)
axs.tick_params(axis='x', labelrotation=45)
major_ticks = np.arange(0, max(axs.get_ylim()), 15)
axs.set_yticks(major_ticks)
axs.set_facecolor('k')
fig.set_facecolor('w')
df2= pd.read_csv(r"C:\Users\james\Downloads\McDonalds Nutrition\menu.csv")
df2.drop(['Serving Size'], axis=1, inplace=True)
df2[df2.columns[2:]] = df2[df2.columns[2:]].astype(int)
df2.head()
val_vars=['Category', 'Item']
other_vars = df2.columns.difference(val_vars)
df2 = pd.melt(df2, id_vars=val_vars, value_vars=other_vars)
df2 = df2.sort_values('value', ascending = False).reset_index(drop=True)
df2.head()
fig, ax = plt.subplots(figsize=(20, 6))
sns.set_style("whitegrid")
bplot = sns.boxplot(y='value', x='Category', data=df2[df2['variable'] == 'Calories'], width=0.75, color='linen')
splot = sns.stripplot(y='value', x ='Category', data=df2[df2['variable'] == 'Calories'], jitter=True, marker='o', alpha=0.5, color='r')
plt.xticks(rotation='vertical', fontsize=12)
ax.set_xlabel('Categories', fontsize=14)
ax.set_ylabel('Calories', fontsize=14)
plt.title('Calories Boxplot and Jitter', fontsize=14)
mediancalories = int(df2['value'].loc[df2['variable']=='Calories'].median())
ax.axhline(mediancalories, color='k', linestyle='dashed', linewidth=2)
ax.text(0.27, mediancalories -200, 'Median: ' f'{mediancalories:,}', size=12, color='k')
ax.set_facecolor('whitesmoke')
ax.grid(color='k')
fig.set_facecolor('w')
categories = df2['Category'].unique()
variables = df2['variable'].unique()
variables_percentage = []
variables_total = []
for j, variable in enumerate(variables):
if variable.find('%') > -1:
variables_percentage.append(variable)
for j, variable in enumerate(variables):
if variable.find('%') == -1:
variables_total.append(variable)
df2 = df2.sort_values('value', ascending = False)
categories
fig, axs = plt.subplots(5, 2, figsize=(30, 30))
g = 0
axs = axs.flatten()
for j, variable in enumerate(variables_percentage):
color_set = False
while color_set == False:
color=np.random.rand(3,)
k = 0
for i, c in enumerate(color):
if c < 0.3:
k = i + 1
if k <= 1:
color_set = True
value = df2['value'].loc[(df2['Category'] == categories[g]) & (df2['variable'] == variable)]
sns.violinplot(value, ax=axs[j], orient="v", color=np.random.rand(3,), saturation=0.075)
sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
axs[j].set_title(variable, fontsize=14)
mean = int(value.mean())
axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}',
size=14, color='w')
axs[j].tick_params(labelsize=12)
axs[j].set_ylabel('Value', fontsize=14)
axs[j].set_facecolor('k')
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(categories[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')
plt.show()
fig, axs = plt.subplots(5, 2, figsize=(30, 40))
g = 1
axs = axs.flatten()
for j, variable in enumerate(variables_total):
color_set = False
while color_set == False:
color=np.random.rand(3,)
k = 0
for i, c in enumerate(color):
if c < 0.3:
k = i + 1
if k <= 1:
color_set = True
value = df2['value'].loc[(df2['Category'] == categories[g]) & (df2['variable'] == variable)]
sns.violinplot(value, ax=axs[j], orient="v", color=color)
sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
axs[j].set_title(variable, fontsize=10)
mean = int(value.mean())
axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}', size=14, color='w')
axs[j].set_facecolor('k')
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(categories[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')
plt.show()
fig, axs = plt.subplots(5, 2, figsize=(30, 30))
g = 1
axs = axs.flatten()
for j, category in enumerate(categories):
color_set = False
while color_set == False:
color=np.random.rand(3,)
k = 0
for i, c in enumerate(color):
if c < 0.3:
k = i + 1
if k <= 1:
color_set = True
value = df2['value'].loc[(df2['Category'] == category) & (df2['variable'] == variables[j])]
sns.violinplot(value, ax=axs[j], orient="v", color=np.random.rand(3,), saturation=0.075)
sns.stripplot(value, marker='o', alpha=0.5, color='w', orient="v", ax=axs[j])
axs[j].set_title(category, fontsize=14)
mean = int(value.mean())
axs[j].axhline(mean, color='w', linestyle='dashed', linewidth=1)
axs[j].text(-0.495, mean, 'AVG: ' f'{mean:,}',
size=14, color='w')
axs[j].tick_params(labelsize=12)
axs[j].set_ylabel('Value', fontsize=14)
axs[j].set_facecolor('k')
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle(variables[g], y=0.93, fontsize = 16)
fig.set_facecolor('w')
plt.show()
df3 = df.drop(['Item', 'Serving Size'], axis=1)
for i, variable in enumerate(variables_percentage):
if variable in df3:
df3.drop([variable], inplace=True, axis=1)
g = sns.pairplot(df3, hue='Category', height=8)
g.fig.set_size_inches(15,15)
handles = g._legend_data.values()
labels = g._legend_data.keys()
g._legend.remove()
g.fig.legend(handles=handles, labels=labels, loc='upper center', ncol=9)
sns.set(font_scale=1.5)
variables_total_2 = variables_total
variables_total_2.remove('Calories')
fig, axs = plt.subplots(len(variables_total_2), 1, figsize=(20, 50))
axs = axs.flatten()
for j, v2 in enumerate(variables_total_2):
df4=df[['Category', 'Calories', v2]]
df4=df4.pivot_table(index='Category', columns=v2, values='Calories')
g=sns.heatmap(df4, ax=axs[j], linewidths=0.005, cmap = 'OrRd')
g.set_facecolor('k')
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.1, hspace=0.5)
fig.suptitle('Calories Heatmap VS Other Nutritional Categories', y=0.89, fontsize = 16)