2023-07-29

Matplotlib Notes

This post covers useful notes for using matplotlib (ploting charts for data analysis). I’ve listed quite lots of useful notes for future reference.

The most commonly used methods I've encountered

1. Presettings

# import the package:
import matplotlib.pyplot as plt

# check avaiable predefined styles:
>> plt.style.available

>> ['Solarize_Light2',
    '_classic_test_patch',
    '_mpl-gallery',
    '_mpl-gallery-nogrid',
    'bmh',
    'classic',
    'dark_background',
    'fast',
    'fivethirtyeight',
    'ggplot',
    'grayscale',
    'seaborn-v0_8',
    ...
    'seaborn-v0_8-white',
    'seaborn-v0_8-whitegrid',
    'tableau-colorblind10']

# to use the style:
>> plt.style.use("seaborn-notebook")


#NOTE: if want to save file must have plt.show() after plt.savefig() methods;

#We can use the command plt.savefig() to save out to many different file formats, such as png, svg, or pdf. After plotting, we can call plt.savefig('name_of_graph.png'):
plt.savefig('image.png')   # is high quality but larger file;
plt.savefig('image.png', dpi = 300)  # dpi is high quality resolation image; and lager
plt.savefig('subfolder/filename.png')  
plt.savefig('filename.jpg', quality = 50)  # jpg is small size image but lower quality; quality is only for 'jpg' or 'jpeg';

2. More examples for plotting charts

# Create a Figure and an array of subplots with 1 chart;

fig, ax = plt.subplots(figsize=(20,8))
x_axis= np.arange(len(data.index))
ax.plot(x_axis, data.gdp)
ax.plot(x_axis, data.djia)
ax.set_xticks(x_axis)
ax.set_xticklabels(data.index.strftime("%Y-%m-%d"), rotation=90)   # change date format:
plt.show()


# Create a Figure and an array of subplots with 2 rows and 2 columns
fig, ax = plt.subplots(2, 2, figsize=(12,8))

# Addressing the top left Axes as index 0, 0, plot month and Seattle precipitation
ax[0, 0].plot(austin_weather.DATE, seattle_weather.groupby('DATE')["MLY-PRCP-NORMAL"].mean().values)

# In the top right (index 0,1), plot month and Seattle temperatures
ax[0, 1].plot(austin_weather.DATE, seattle_weather.groupby('DATE')["MLY-TAVG-NORMAL"].mean().values)

# In the bottom left (1, 0) plot month and Austin precipitations
ax[1, 0].plot(austin_weather.DATE, austin_weather["MLY-PRCP-NORMAL"])

# In the bottom right (1, 1) plot month and Austin temperatures
ax[1, 1].plot(austin_weather.DATE, austin_weather["MLY-TAVG-NORMAL"])


# Create a figure and an array of axes: 2 rows, 1 column with shared y axis
fig, ax = plt.subplots(2, 1, sharey=True)  # sharey=True, means share Y axis for all subcharts
fig.suptitle("Title text")
# Plot Seattle precipitation data in the top axes
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-NORMAL"], color = 'b')
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-25PCTL"], color = 'b', linestyle = '--')
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-75PCTL"], color = 'b', linestyle = '--')

# Plot Austin precipitation data in the bottom axes
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-NORMAL"], color = 'r')
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-25PCTL"], color = 'r', linestyle = '--')
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-75PCTL"], color = 'r', linestyle = '--')
plt.show()

# Moving average using groupby generator;
fig, ax = plt.subplots(figsize = (18,8))
groupby_generator = rel.groupby('Country')
for key, group in groupby_generator:
    ax.plot(group['Year'], group['moving_avg'], label = key)
    ax.legend()
plt.show()

 
# Create a figure and an array of axes: 2 rows, 1 column with figure size = 18*12
fig, ax=plt.subplots(2, 1, figsize=(18,12))
fig.suptitle("Title text")
x_axis=np.array(range(len(climate_change['2015':].index)))  # create the x-axis first;
ax[0].plot(x_axis, climate_change['2015':].relative_temp)
ax[0].set_xticks(x_axis)                                   # pass on the x-axis first to xticks; IF ax[0].set_xticks([]) will hide the x_axis ticks;
ax[0].set_xticklabels(climate_change['2015':].index, rotation=50) # must set the ticks first then change the labels; (二者必须合起来用才能改label)

ax[1].plot(x_axis, climate_change['2015':].co2)
ax[1].set_xticks(x_axis)
ax[1].set_xticklabels(climate_change['2015':].index, rotation=50)

ax[0].set_ylabel("Relative Temp")
ax[1].set_ylabel("CO2")
plt.show()


# A good eample to plot 3 bar chars horizontally in the same row:
fig, ax = plt.subplots(1, 3, figsize=(18, 7))
plt.suptitle('Accumulated Gold Medals among each years for China, South Korea and Japan', fontsize=18)
x_axis = np.arange(len(final.year.unique()))
for i in range(3):
    ax[i].bar(x_axis, final.loc[final.country==targets[i],'runningSUM'], label=targets[i], color = color_ls[i])
    ax[i].set_xticks(x_axis)
    ax[i].set_xticklabels(final.year.unique())
    ax[i].set_yticks([0, 25, 50, 100, 150, 200, 270])
    ax[i].set_xlabel('Year', fontsize=12)
    ax[i].legend()
    ax[i].grid(True, alpha=0.2)
    
ax[0].set_ylabel('Gold Medals Accumulated', fontsize=16) 
plt.show()


# good example to generate random color:
@Method 1 - nongroup
import random
fig, ax = plt.subplots(1,3, figsize=(12,6))
country_obj = final.Country.unique()
for i in range(len(country_obj)):
    x_axis = np.arange(len(final.loc[final.Country == country_obj[i], 'Year']))
    ax[i].bar(x_axis, final.loc[final.Country == country_obj[i], 'cum_medals'], label=country_obj[i],
              color=(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))  #uniform includes 1;
    ax[i].set_xticks(x_axis)
    ax[i].set_xticklabels(final.loc[final.Country == country_obj[i], 'Year'])
    ax[i].set_xlabel(country_obj[i])
    ax[i].legend()
plt.show()


@Method 2 - groupby
fig, ax = plt.subplots(1,3, figsize=(12,6))
groupby_country_obj = final.groupby('Country')
index = 0
for key, group in groupby_country_obj:
    x_axis = np.arange(len(group.Year))
    ax[index].bar(x_axis, group.cum_medals, label=key, 
                  color=(np.random.uniform(0,1),np.random.uniform(0,1),np.random.uniform(0,1)))
    ax[index].set_xticks(x_axis)
    ax[index].set_xticklabels(group.Year)
    ax[index].set_xlabel(key)
    ax[index].legend()
    index+=1
plt.show()



#@Horizontal-bar >>>>>>>> 

fig, ax = plt.subplots(len(final.Country.unique()), 1, figsize=(12, 18))
fig.suptitle('Cummulative Medals Earned for Countries', y=0.9, fontsize=15)
groupby_generator = final.groupby('Country')
index=0
for key, group in groupby_generator:
    rand_color=(np.random.uniform(0,1),np.random.uniform(0,1),np.random.uniform(0,1))
    y_axis =np.arange(len(group.Year))
    ax[index].barh(y_axis, group.cumsum_3_year, label=key, color=rand_color)
    ax[index].set_yticks(y_axis)
    ax[index].set_yticklabels(group.Year)
    ax[index].set_xlabel(key, fontsize=15,  color=rand_color)
    ax[index].legend()
    index+=1
plt.show()


# Initalize a Figure and Axes
fig, ax= plt.subplots(figsize=(12,8))
fig.suptitle("Title text")
# Plot the CO2 variable in blue
ax.plot(climate_change.index, climate_change.co2, color='b')
ax.set_ylabel('CO2 Level', fontsize=15, color='b')
ax.tick_params('y', colors='b')   # Change the tick paramaters for y axis, you can also pass on 'x' or 'both'; 
# Create a twin Axes that shares the x-axis
ax2 = ax.twinx()  # This is make the two plots share the same x axis and eventually made a dual y axis;

# Plot the relative temperature in red
ax2.plot(climate_change.index, climate_change.relative_temp, color='r')
ax2.set_ylabel('relative temperature', fontsize=15, color='r')
ax2.tick_params('y', colors='r')
plt.show()


# Using functions: 
# Define a function called plot_timeseries
def plot_timeseries(axes, x, y, color, xlabel, ylabel, label_font_size):
    axes.plot(x, y, color=color)
    axes.set_xlabel(xlabel, fontsize=label_font_size)
    axes.set_ylabel(ylabel, color=color, fontsize=label_font_size)
    axes.tick_params('y', colors=color)

fig, ax = plt.subplots(figsize=(12,10))
fig.suptitle("Title text")
# Plot the CO2 levels time-series in blue
plot_timeseries(ax, climate_change.index, climate_change['co2'], "blue", "Time (years)", "CO2 levels", 15)
# Create a twin Axes object that shares the x-axis
ax2 = ax.twinx()
# Plot the relative temperature data in red
plot_timeseries(ax2, climate_change.index, climate_change['relative_temp'], "red", "Time (years)", "Relative temperature (Celsius)", 15)

# Annotate point with relative temperature >1 degree
ax2.annotate(text=">1 degree", xy=(pd.Timestamp('2015-10-06'),1), # This is the position the text is pointing at (the point arrow point at); float numer pd.Timestamp('2015-10-06') is essentially a float
            xytext=(12000, 1.3), # This is the position the text is placed at (the point arrow starts from); float numer
            arrowprops={"arrowstyle":"->", 'color':'green'} # This is the style of the arrow
            )
plt.show()



### --------:::::::::::::::::::::::Line chart with Error bars attached :::::::::::::::::::::
fig, ax = plt.subplots()   # This is a special plot which plots line chart but each point would also have a small bar of error (the std in this case):
# Add Austin temperature data in each month with error bars
ax.errorbar(x = austin_weather['DATE'], y = austin_weather['MLY-TAVG-NORMAL'], yerr=austin_weather['MLY-TAVG-STDDEV'])
# Set the y-axis label
ax.set_ylabel('Temperature (Fahrenheit)')
plt.show()


'''::::::::::: Scatter Chart:::::::::::'''

fig, ax=plt.subplots(figsize=(8,6))
ax.scatter(climate_change['co2'], climate_change['relative_temp'], c=climate_change.index) # c=climate_change.index for render each point by index(datetime) for each point to get a different color; very beautiful;
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (C)")
plt.show()



'''::::::::::: Bar Chart:::::::::::'''

fig, ax = plt.subplots(figsize=(10,8))
fig.suptitle("Medals for different countries")
# Plot a bar-chart of gold medals as a function of country
ax.bar(medals.index, medals['Gold'])
# Set the x-axis tick labels to the country names
ax.set_xticks(medals.index)   # should set the ticks first then change the xticklabels format;
ax.set_xticklabels(medals.index, rotation=90)
# Set the y-axis label
ax.set_ylabel('Number of medals')
plt.show()


### Stacked bar chart::::::::::::::::::::::::Stacked bar chart :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(10,8))
fig.suptitle("Medals for different countries")
# Plot a bar-chart of gold medals as a function of country
ax.bar(medals.index, medals['Gold'], label='Gold')
# Stack bars for "Silver" on top with label "Silver"
ax.bar(medals.index, medals['Silver'], bottom=medals['Gold'], label='Silver')
# Stack bars for "Bronze" on top of that with label "Bronze"
ax.bar(medals.index, medals['Bronze'], bottom=medals['Gold']+medals['Silver'], label='Bronze')
# Set the x-axis tick labels to the country names
ax.set_xticks(medals.index)
ax.set_xticklabels(medals.index, rotation=90)
# Set the y-axis label
ax.set_ylabel('Number of medals')
# Display the legend
ax.legend()
plt.show()


### ::::::::Stacked bar chart :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(12,8))
fig.suptitle("Medals for different countries")
x_axis = np.array(range(len(medals.index))) 
bar_width=0.3
# Plot a bar-chart of gold, Silver, Bronze medals as a function of country
ax.bar(x_axis-bar_width, medals['Gold'], width=bar_width, label='Gold')
ax.bar(x_axis, medals['Silver'], width=bar_width, label='Silver')
ax.bar(x_axis+bar_width, medals['Bronze'], width=bar_width, label='Bronze')

# Set the x-axis tick labels to the country names
ax.set_xticks(x_axis)
ax.set_xticklabels(medals.index, rotation=45)
# Set the y-axis label
ax.set_ylabel('Number of medals')
# Display the legend
ax.legend()
ax.grid(alpha=0.5)
plt.show()


### A number of bars chart::::::::::::::::::::::::Error bar chart :::::::::::::::::::::
fig, ax = plt.subplots()
# Add a bar for the rowing "Height" column mean/std
ax.bar("Rowing", mens_rowing['Height'].mean(), yerr=mens_rowing['Height'].std())  # you can using 'string' for x_axis???
# Add a bar for the gymnastics "Height" column mean/std
ax.bar("Gymnastics",mens_gymnastics['Height'].mean(), yerr=mens_gymnastics['Height'].std())
# Label the y-axis
ax.set_ylabel("Height (cm)")
plt.show()


### A number of bars chart:::::::::::::::::::::::: Iterate to automating plot error bar :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(15, 8))
bar_width=0.7
# Loop over the different sports branches
for i in sports:
  # Extract the rows only for this sport
  sport_df = summer2016.loc[summer2016['Sport'] == i, 'Weight']
  # Add a bar for the "Weight" mean with std y error bar
  ax.bar(i, sport_df.mean(), yerr=sport_df.std() ,width=bar_width)

ax.set_xticks(sports)
ax.set_xticklabels(sports, rotation=90)
ax.set_ylabel("Weight")
# Save the figure to file
plt.savefig("sports_weights.png")


'''------ Histogram Chart:::::::::::'''
fig, ax = plt.subplots(figsize=(8,6))
# Plot a histogram of "Weight" for mens_rowing
ax.hist(mens_rowing['Weight'], label="Rowing", bins = 5, histtype='step', alpha=0.8)
# Compare to histogram of "Weight" for mens_gymnastics
ax.hist(mens_gymnastics['Weight'], label="Gymnastics", bins = 5, histtype='step', alpha=0.8)
# Set the x-axis label to "Weight (kg)"
ax.set_xlabel("Weight (kg)")
# Set the y-axis label to "# of observations"
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

'''--------- Boxplot Chart:::::::::::'''
fig, ax = plt.subplots()
arr_data_inputs=[mens_rowing['Height'], mens_gymnastics['Height']]
xticklabels=["Rowing", "Gymnastics"]
# Add a boxplot for the "Height" column in the DataFrames
ax.boxplot(arr_data_inputs)   # x is for inputing data, it can be one data set, an be an array of a collection of datasets(list of lists);
# Add x-axis tick labels:
ax.set_xticklabels(xticklabels)
# Add a y-axis label
ax.set_ylabel('Height (cm)')
plt.show()


''''******************** Line Chart************************'''
#different color using the keyword color with either an HTML color name or a HEX code:
plt.plot(days, money_spent, color='green')
plt.plot(days, money_spent_2, color='#AAAAAA')

# Dashed:
plt.plot(x_values, y_values, linestyle='--')
# Dotted:
plt.plot(x_values, y_values, linestyle=':')
# No line:
plt.plot(x_values, y_values, linestyle='')

# A circle dot line: round markers
plt.plot(x_values, y_values, marker='o')
# A square dot line:
plt.plot(x_values, y_values, marker='s')
# A star dot line:
plt.plot(x_values, y_values, marker='*')


#Line with Shaded Error
#plt.fill_between(x_values, y_lower, y_upper, alpha=0.2)
line1.fill_between(months, y_lower, y_upper, alpha=0.2)

#if we want to display a plot from x=0 to x=3 and from y=2 to y=5, we would call:
plt.axis([0, 3, 2, 5]) #plt.axis([min_x, max_x, min_y, max_y])
'''
we instead set it to these values, with both the x and y min and max values reversed:
plt.axis([3, 0, 5, 2])
What this will do is not throw an error, but instead the graph will be essentially drawn inverted, both on the x axis and the y axis. The x axis will be for x values from 3 to 0, and the y axis will be for values 5 to 2, both in decreasing order.'''

plt.xlabel("Time")   # can be written: ax.set_xlabel("Year")
plt.ylabel("Dollars spent on coffee")  # can be written: ax.set_ylabel("Test average")

# CAN NOT be written to ax.title()  **** IMPORTANT
plt.title("My Last Twelve Years of Coffee Drinking")

#The command plt.subplot() needs three arguments to be passed into it:
#1 is the number of rows of subplots can be displayed in the chart
#2 is the number of columns of subplots can be displayed in the chart
#1 is the index of the subplot we want to create
plt.subplot(1,2,1)
plt.plot(months, temperature)
plt.title("temperature over months")
plt.show()

plt.subplot(1,2,2)
plt.plot(temperature, flights_to_hawaii, 'o')  #'o' just a dot not line. 
plt.plot(temperature, flights_to_hawaii, '*')  #'*' just a start dot not line.
plt.title("flights_to_hawaii over temperature")
plt.show()

# **** GOOD Line chart ***: 
# Line Graph: Time Series Analysis for table : hourly_viwer_us
plt.figure(figsize=(12,8))
line_chart=plt.subplot()
y_lower = [i*0.85 for i in hourly_viwer_us.viewers]
y_upper = [i*1.15 for i in hourly_viwer_us.viewers]
line_chart.plot(hourly_viwer_us.hour, hourly_viwer_us.viewers)
line_chart.fill_between(hourly_viwer_us.hour, y_lower, y_upper, alpha=0.2)
line_chart.set_xlabel("Hour")
line_chart.set_ylabel("Viewers")
line_chart.set_title("Time Series")
line_chart.legend(["2015-01-01"])
line_chart.set_xticks(hourly_viwer_us.hour)
#line_chart.set_yticks("Hour")
plt.show()

# **** GOOD Line chart ***: 
plt.plot(discount_rate,npvs_a, linewidth = 2.0, color = "red", label = "Project A")
plt.plot(discount_rate,npvs_b, linewidth = 2.0, color = "blue", label = "Project B")
plt.axhline(y=0, linewidth = 0.5, color = "black")   # -- adding a horizontal line; 
plt.title('NPV Profile for Projects A and B')
plt.xlabel('Discount Rate')
plt.ylabel('Net Present Value')
plt.legend()
plt.show()

# OR:

plt.figure(figsize=(10,8))
plt.style.use("ggplot")
plt.plot(uk_df.Age, uk_df.SalaryInUSD, "^--g", label= "UK") # "s--g" is for '[marker][line][color]'
plt.plot(uk_df.Age, us_df.SalaryInUSD, label= "US", marker="*", linestyle=":", color="y")
plt.plot(au_df.Age, au_df.SalaryInUSD, label= "AU", linestyle="-.")
plt.plot(ch_df.Age, ch_df.SalaryInUSD, label= "CH", linestyle=":", marker=".",  color="#5a7d9a")
plt.title("Median Salary (in USD) Earned by IT worker by Age in UK, US, AU, CH")
plt.legend() # not passing the label here but passing in the abov method as paramaters;
plt.xlabel("Age")
plt.ylabel("Median Salary (in USD)")
plt.grid(True, alpha=0.3)
plt.show()


# pandas how to draw customized DATE in X axis::::::::: customized x axis:
plt.figure(figsize=(16,12))
x_axis = np.array(final.year)
plt.plot(x_axis, final.Births)
plt.plot(x_axis, final.Deaths)
plt.xticks([x_axis[i] for i in range(0, len(x_axis), 2)],  # here to customize the number of x axis you want to display:
           ["year {}".format(x_axis[i]) for i in range(0, len(x_axis), 2)], rotation=45)
		   # here to replace the actually string of the label of x axis you want to display:
plt.legend(['Births', 'Death'])
plt.grid(alpha=0.5)
plt.show()

# pandas how to draw customized DATE in X axis::::::::: customized x axis:
plt.figure(figsize=(20,12))
x_axis = [i for i in range(0, len(final.Date))]   # x_axis must match the date of the y axis: final.Births
plt.plot(x_axis, final.Births)   
plt.plot(x_axis, final.Deaths)
plt.xticks([i for i in range(0, len(final.Date), 2)],   # here to customize the number of x axis you want to display:
           [final.Date[i].strftime("%d-%B-%Y") for i in range(0, len(final.Date), 2)], rotation=90)
		   # here to replace the actually string of the label of x axis you want to display:
plt.legend(['Births', 'Death'])
plt.grid(alpha=0.5)
plt.show()

# display the month name of the month by uisng calendar package:
plt.figure(figsize=(10,8))
plt.plot(f1.index, f1)
plt.xticks(f1.index, [calendar.month_name[i] for i in f1.index], rotation=45)
plt.show()


###### Equivalent methods for plot and axes

plt.xlabel()
ax.set_xlabel()

plt.xticks() 
plt.xticks(rotation='vertical')   # to make the xlabels verticical show  
plt.xticks(rotation=50)
ax.set_xticks() # preferred way

plt.xticks(x_positions, chart_labels)

#labels are particularly long, rotation keyword to rotate labels to some degrees:
plt.xticklabels(([0.1, 0.6, 0.8]), rotation=30)
ax.set_xticklabels(['10%', '60%', '80%'], rotation=30)

plt.rcParams['ytick.labelsize'] = 8         # to change the yticks font size
plt.rcParams['xtick.labelsize'] = 8.5
plt.rcParams['font.sans-serif']=['SimHei']  # to show normal chinese fonts
plt.rcParams['axes.unicode_minus']=False    # to show negative sign

plt.title("flights_to_hawaii over temperature")
ax.set_title("flights_to_hawaii over temperature")

plt.subplots_adjust(bottom =0.2, wspace=0.5)  #  to change the space between subplot.
'''
left — the left-side margin, with a default of 0.125. You can increase this number to make room for a y-axis label
right — the right-side margin, with a default of 0.9. You can increase this to make more room for the figure, or decrease it to make room for a legend
bottom — the bottom margin, with a default of 0.1. You can increase this to make room for tick mark labels or an x-axis label
top — the top margin, with a default of 0.9
wspace — the horizontal space between adjacent subplots, with a default of 0.2
hspace — the vertical space between adjacent subplots, with a default of 0.2
'''
#Adding a labels:
plt.legend(['parabola', 'cubic'], loc=6)
plt.show()
# OR:
plt.plot([0, 1, 2, 3, 4], [0, 1, 4, 9, 16],label="parabola")
plt.plot([0, 1, 2, 3, 4], [0, 1, 8, 27, 64],label="cubic")
plt.legend() # Still need this command!
plt.show()

#In order to be sure that you don’t have any stray lines, you can use the command plt.close('all') to clear all existing plots before you plot a new one.
plt.close('all')

#use the command plt.close('all') to clear all existing plots before you plot a new one.
#To create a figure with a width of 4 inches, and height of 10 inches, we would use:
plt.figure(figsize=(4, 10))

# another way to work:
fig = plt.figure()
fig.add_subplot(1,1,1)
plt.scatter(hours_reported,exam_scores)
plt.title("Orion in 2D")


'''+++++++++++++++++++++++++++++++++++++++++++ Scatter plot: =================================== ::::::: '''
gdp_cap=[974.5803384, 5937.029525999998, 6223.367465, 4797.231267, 12779.37964, 34435.367439999995, 36126.4927, 29796.04834, 1391.253792, 33692.60508, 1441.284873]
life_exp = [43.828, 76.423, 72.301, 42.731, 75.32, 81.235, 79.829, 75.635, 64.062, 79.441, 56.728, 65.554, 74.852, 50.728, 72.39, 73.005, 52.295, 49.58, 59.723]
pop = [31.889923, 3.600523, 33.333216, 12.420476, 40.301927, 20.434176, 8.199783, 0.708573, 150.448339, 10.392226, 8.078314, 9.119152, 4.552198, 1.639131, 190.010647]
col = ['red', 'green', 'blue', 'blue', 'yellow', 'black', 'green', 'red', 'red', 'green', 'blue', 'yellow', 'green', 'blue', 'yellow', 'green', 'blue', 'blue', 'red', 'blue']
# Import numpy as np
import numpy as np
# Store pop as a numpy array: np_pop
np_pop = np.array(pop)
# Double np_pop
np_pop = np_pop*2
# Update: set s argument to np_pop
plt.scatter(gdp_cap, life_exp, s = np_pop, c=col)  # s is The marker size in points**2, c=col is array-like or list of colors or color, optional The marker colors;
# Previous customizations
plt.xscale('log')  # using logarithmic scale to make the graph wider;
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')
plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])
plt.show()



# Scatter chart:
# scatter:
girls_grades = [89, 90, 70, 89, 100, 80, 90, 100, 80, 34]
boys_grades = [30, 29, 49, 48, 100, 48, 38, 45, 20, 30]
grades_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

ax9=plt.subplot()
ax9.scatter(grades_range, girls_grades, color='r')
ax9.scatter(grades_range, boys_grades, color='b')
ax9.set_xlabel('Grades Range')
ax9.set_ylabel('Grades Scored')
ax9.set_title('scatter plot')
plt.show()



# To plot scatter chart -------------------------------------------:
fig=plt.figure(figsize=(18,14))
fig.suptitle("Latitude and Longitude for Countries categorized by Population and Continent", 
             y=0.92, # y=0.92 is the y position of the title ploted;
             fontsize=25)

scatter = plt.scatter(merged_df10.avg_longitude, merged_df10.avg_latitude, 
            s=merged_df10.pop_size.values/30000,
           c=continent_color_lst, marker='.')

for k, v in continent_color.items(): 
    ''' This is a shortcut method to plot all the continents with blank value, 
    so the legend would show all the labels of colors and continues '''
    plt.scatter([], [], c=v, label=k, s=500)
    
plt.xlabel('Average Longitude', fontsize=15)
plt.ylabel('Average Latitude', fontsize=15)
plt.legend(loc=2, markerscale=0.5,scatterpoints=1, fontsize=15)
plt.grid(True, alpha=0.3, linestyle='--')
#plt.savefig("{}-Q13.png".format(999999))
plt.show()

''''******************** Bar Chart************************'''
# Bar chart is good for categorical type of data is not good for the data with time as x-axis such as for each year. 
# plot singel figure:
plt.figure(figsize=(8,6))
plt.bar(published_year.index , published_year.sum_published)
plt.xticks(published_year.index, published_year.year_publised)
plt.show()


drinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
sales =  [91, 76, 56, 66, 52, 27]

plt.subplot()  # always put this line before the bar plot method. 
plt.bar(range(len(sales)), sales)



# This is best way to do the preferred way: 
ax1 = plt.subplot()
ax1.bar(range(len(sales)), sales)
ax1.set_xticks(range(len(drinks)))
ax1.set_xticklabels(drinks, rotation = 20)
ax1.set_title()

plt.show()

# Side-By-Side Bars: 
# China Data (blue bars)
n = 1  # This is our first dataset (out of 2)
t = 2 # Number of datasets
d = 7 # Number of sets of bars
w = 0.8 # Width of each bar
x_values1 = [t*element + w*n for element in range(d)]

# US Data (orange bars)
n = 2  # This is our second dataset (out of 2)
t = 2 # Number of datasets
d = 7 # Number of sets of bars
w = 0.8 # Width of each bar
x_values2 = [t*element + w*n for element in range(d)]

def create_x(t, w, n, d):
    return [t*x + w*n for x in range(d)]
	
ax2 = plt.subplot()
ax2.bar(x_values1, sales1)
ax2.bar(x_values2, sales2)
ax2.set_xticks([(i+j)/2 for i, j in zip(x_values1,x_values2) ])
ax2.set_xticklabels(drinks, rotation = 10)
plt.show()



# -------------------------------------Best Side-By-Side Bars Method =======: 
""" *************************************** Side-By-Side Bars: ----------- Better method """   
plt.figure(figsize=(20,10), facecolor ="white")
x_indexes = np.arange(len(us_df.Age))
bar_width = 0.25
plt.bar(x_indexes-bar_width, us_df.CompensationInUSD, width=bar_width, label = "US")
plt.bar(x_indexes, uk_df.CompensationInUSD, width=bar_width, label = "UK")
plt.bar(x_indexes+bar_width, au_df.CompensationInUSD, width=bar_width, label = "AU")
#plt.bar(x_indexes+2*bar_width, ca_df.CompensationInUSD, width=bar_width, label = "CA")
plt.legend()
plt.xticks(ticks=x_indexes, labels=us_df.Age)
plt.xlabel("Ages")
plt.ylabel("Median Salary (in USD)")
plt.title("Median Salary (in USD) Earned by IT worker by Age in UK, US, AU, CH")
plt.show()




# *************************************** Side-By-Side Bars: ------- 
fig = plt.figure(figsize=(14,10))
#plt.rcParams['ytick.labelsize'] = 13
#plt.rcParams['xtick.labelsize'] = 13
fig.suptitle('Covid19 Economic Indicators Comparison Among Low, Middle, High Income Country Groups', fontsize=15) 

x_axis = np.array([i for i in range(4)])
x_labels = ["Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI",
           "Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI and Food Import",
           "Average Foreign \n Direct Investment", "Average Foreign Direct Investment\n(Net Inflows Percent of GDP)"]
bar_width = 0.25
plt.bar(x_axis-bar_width, pivot_df9.LIC, width=bar_width, label='LIC')
plt.bar(x_axis, pivot_df9.MIC, width=bar_width, label='MID')
plt.bar(x_axis+bar_width, pivot_df9.HIC, width=bar_width, label='HIC')
plt.xticks(x_axis, x_labels)
plt.ylabel('Values', fontsize=15, loc='top')
plt.grid(True, alpha=0.5, axis='y')
plt.legend(prop={'size': 15}, loc=9)
plt.show()

# *************************************** Side-By-Side Bars: ------- Example 3
    # Plot the Chart:
    fig = plt.figure(figsize=(14,10))
    fig.suptitle('Comparison of Average Covid19 Economic Indicators \n for Low, Middle, High Income Country Groups', fontsize=15) 

    x_axis = np.array([i for i in range(4)])
    x_labels = ["Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI",
               "Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI and Food Import",
               "Average Foreign \n Direct Investment", "Average Foreign Direct Investment\n(Net Inflows Percent of GDP)"]
    bar_width = 0.25
    plt.bar(x_axis-bar_width, pivot_df9.LIC, width=bar_width, label='LIC')
    plt.bar(x_axis, pivot_df9.MIC, width=bar_width, label='MID')
    plt.bar(x_axis+bar_width, pivot_df9.HIC, width=bar_width, label='HIC')
    plt.xticks(x_axis, x_labels)
    plt.ylabel('Average Index Value', fontsize=15, loc='top')
    plt.grid(True, alpha=0.5, axis='y')
    plt.legend(prop={'size': 15}, loc=9, ncol=3)   # ncol=3 make the legends horizontal in 3 columns
	


#horizontal bar chart:
'''----------------------------------------horizontal bar chart:-----------------------'''
plt.style.use("fivethirtyeight")
plt.figure(figsize=(12,8))
y_axis = range(len(dict_language_ten_common))
plt.barh(y_axis, dict_language_ten_common.values())
plt.gca().invert_yaxis()  # this line is to invert the y axis order (upside down) or ax.invert_yaxis() 

plt.yticks(ticks=y_axis, labels=dict_language_ten_common.keys())
plt.title("Most Popular Languages")
plt.xlabel("Number of People Who Use")
plt.grid(alpha=0.5)
plt.show()

# horizontal bar chart ----------------- horizontal Stacked bar:
plt.figure(figsize=(15,50))
bar_width = 0.8
y_axis = np.arange(len(ny_df.neighbourhood.unique()))
plt.barh(y_axis, f_lst[2].avg_days_occupied_2019, height = bar_width, color = 'lightblue')
plt.barh(y_axis, f_lst[1].avg_days_occupied_2019, height = bar_width, 
         left=np.array(f_lst[2].avg_days_occupied_2019), color='lightcoral')
plt.barh(y_axis, f_lst[3].avg_days_occupied_2019, height = bar_width, color='orange',
        left=np.array(f_lst[2].avg_days_occupied_2019)+np.array(f_lst[1].avg_days_occupied_2019))
plt.barh(y_axis, f_lst[0].avg_days_occupied_2019, height = bar_width, color='steelblue',
        left=np.array(f_lst[2].avg_days_occupied_2019)+\
         np.array(f_lst[1].avg_days_occupied_2019)+np.array(f_lst[3].avg_days_occupied_2019))
plt.yticks(y_axis, ny_df.neighbourhood.unique())
plt.xlabel('Avg Days Occupied in 2019')

plt.show()


# Stacked Bars
ax = plt.subplot()
ax.bar(range(len(drinks)), sales1)
ax.bar(range(len(drinks)), sales2, bottom = sales1)
ax.legend(["Location 1", "Location 2"])
ax.set_xticks(range(len(drinks)))
ax.set_xticklabels(drinks, rotation = 10)
plt.show()


## Stacked Bars :
plt.figure(figsize=(12,10))
x_axis = [i for i in range(len(shared_room_df.neighbourhood))]
plt.bar(x_axis, shared_room_df.sub_reviews, color='red')
plt.bar(x_axis, private_room_df.sub_reviews, bottom = np.array(shared_room_df.sub_reviews), color='orange')
plt.bar(x_axis, entire_room_df.sub_reviews, color = 'steelblue', \        # must using np.array() to perform the addition calculation:
        bottom = (np.array(shared_room_df.sub_reviews) + np.array(private_room_df.sub_reviews))) 
plt.xticks(ticks=x_axis, labels=shared_room_df.neighbourhood, rotation=90)
plt.ylabel('Number Of Reviews')
plt.xlabel('Neighbourhood')

# adding figures only for the entire_room
total_values = np.array(entire_room_df.sub_reviews)+np.array(shared_room_df.sub_reviews)+np.array(private_room_df.sub_reviews)
for i, v in enumerate(zip(entire_room_df.sub_reviews.values, total_values)):
    plt.text(x_axis[i] - 0.25, v[1]+500, "{:,}".format(v[0]), rotation=90, color='white') # plt.text() x_axis[i] - 0.25 is the x point the figure is and v[1]*2/3 is the y point of the figure, "{:,}".format(v[0]) is the format for displaying 1,200
	
plt.show()



## ERROR Bar:
drinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
ounces_of_milk = [6, 9, 4, 0, 9, 0]
error = [0.6, 0.9, 0.4, 0, 0.9, 0]

# Plot the bar graph here
chart = plt.subplot()
error = [0.1*i for i in ounces_of_milk]
# the fllowing error would give different upper and lower errors:
#error = ([0.1*i for i in ounces_of_milk], [0.2*i for i in ounces_of_milk])
chart.bar(range(len(drinks)), ounces_of_milk, yerr = error, capsize = 5)
chart.set_xticks(range(len(drinks)))
chart.set_xticklabels(drinks)



# Pie Chart:
pie1=plt.subplot()
colors=["red", "orange", "yellow", "green"]   # Optional
pie1.pie(payment_method_freqs, labels=payment_method_names, colors=colors)
pie1.axis('equal')
plt.legend(new_country_LOL_viewers.country)

'''
'%0.2f' — 2 decimal places, like 4.08
'%0.2f%%' — 2 decimal places, but with a percent sign at the end, like 4.08%. You need two consecutive percent signs because the first one acts as an escape character, so that the second one gets displayed on the chart.
'%d%%' — rounded to the nearest int and with a percent sign at the end, like 4%.
'''

# ***** A Good Pie Chart example *******:
https://matplotlib.org/stable/gallery/pie_and_polar_charts/bar_of_pie.html#sphx-glr-gallery-pie-and-polar-charts-bar-of-pie-py

https://matplotlib.org/3.1.0/gallery/pie_and_polar_charts/pie_and_donut_labels.html

plt.figure(figsize=(12,8))
pie_chart=plt.subplot()
#colors=["slateblue", "orange", "yellow", "green","red","blue","indigo","violet","purple","pink", "black"] 
colors = ['lightskyblue', 'gold', 'lightcoral', 'gainsboro', 'royalblue', 'lightpink', 'darkseagreen', 'sienna', 'khaki', 'gold', 'violet', 'yellowgreen']
explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
pie_chart.pie(new_country_LOL_viewers.viewers, 
              labels=new_country_LOL_viewers.country,
             explode = explode, # This is to make the first number out of the chart to be outstanding
             shadow=True,colors =colors, autopct='%0.1f%%',
            startangle=335, # this is to rotate the pie chart to make sure it's not been coverred
             pctdistance=1.2, # This is the position of the percentage values
			labeldistance=1.1 # This is the position of the country labels
				wedgeprops = {"edgecolor":"black"} # to define the edge color of each small section 
				)
pie_chart.axis("equal")
pie_chart.legend(new_country_LOL_viewers.country)
pie_chart.set_title("League of Legends Viewers' Whereabouts")
plt.show()


# A wonderful chart: 
fig = plt.figure(figsize=(14,10)) # Creates a new figure
fig.suptitle('Percentage of the world population living in South American Countries', fontsize=15) 

colors = ['teal', 'gold', 'powderblue', 'limegreen', 'royalblue', 
          'lightpink', 'blueviolet', 'darkseagreen', 'wheat', 'red', 'aqua', 'violet']

def percentage_in_world(val): # this fun is to process the percentage value:
    a  = val/100.*south_american_df['Population'].sum()/world_pop
    return a

explode = [0, 0.1, 0, 0, 0, 0.05, 0.35, -0.05, 0, 0.35, 0.1, 0.1]
plt.pie(south_american_final.Population[0:12], shadow=False, labels=south_american_final.Country[0:12], labeldistance=1.02,
        colors=colors, normalize=True, explode=explode, startangle=15, rotatelabels =True,
       autopct=lambda p: '{:.2%}'.format(percentage_in_world(p)), 
        textprops={'fontsize': 12.5}, pctdistance=1.33)

plt.axis('equal')
plt.legend(south_american_final.Country[0:12], prop={'size': 12}) # legend size to 12:
#plt.title('Percentage of the world population living in South American Countries')

plt.show()



# Histogram:
ax.hist(exam_scores1,bins = [0,25,50,75,100], density=True)  # density=True meaning generate Histogram chart basded on posibility

h_chart=plt.subplot()
h_chart.hist(customer_amount.price, range=(0,200), bins=40)
h_chart.set_xlabel("Total Spent")
h_chart.set_ylabel("Number of Customers")
h_chart.set_title("Spending Per Customer")
plt.show()


# Histogram with annotate
'''Compute and draw the histogram of *x*.  The return value is a tuple
(*n*, *bins*, *patches*) or ([*n0*, *n1*, ...], *bins*, [*patches0*,
*patches1*,...]) if the input contains multiple data.  See the
documentation of the *weights* parameter to draw a histogram of
already-binned data.'''
counts, bins, patches = plt.hist(df['Daily Log Rate of Return'].dropna())
# counts get the counts, bins, and patches (one patch is like Rectangle(xy=(-0.0443595, 0), width=0.0090739, height=2, angle=0))

# annotate：
for count, patch in zip(counts,patches):
    plt.annotate(str(int(count))   #annotate， must be a string
			, xy=(patch.get_x()+0.003, patch.get_height()+1) #annotate x, y：float number;
			)  # 0.003 is a further left movement adding to x coordinate (horizontal)
			   # 1 is a further upper movement adding to y coordinate (vertical)
plt.show()