Matplotlib Notes

This post covers useful notes for using matplotlib (ploting charts for data analysis). I’ve listed quite lots of useful notes for future reference.

The most commonly used methods I've encountered

1. Presettings

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# import the package:
import matplotlib.pyplot as plt

# check avaiable predefined styles:
>> plt.style.available

>> ['Solarize_Light2',
'_classic_test_patch',
'_mpl-gallery',
'_mpl-gallery-nogrid',
'bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn-v0_8',
...
'seaborn-v0_8-white',
'seaborn-v0_8-whitegrid',
'tableau-colorblind10']

# to use the style:
>> plt.style.use("seaborn-notebook")


#NOTE: if want to save file must have plt.show() after plt.savefig() methods;

#We can use the command plt.savefig() to save out to many different file formats, such as png, svg, or pdf. After plotting, we can call plt.savefig('name_of_graph.png'):
plt.savefig('image.png') # is high quality but larger file;
plt.savefig('image.png', dpi = 300) # dpi is high quality resolation image; and lager
plt.savefig('subfolder/filename.png')
plt.savefig('filename.jpg', quality = 50) # jpg is small size image but lower quality; quality is only for 'jpg' or 'jpeg';

2. More examples for plotting charts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
# Create a Figure and an array of subplots with 1 chart;

fig, ax = plt.subplots(figsize=(20,8))
x_axis= np.arange(len(data.index))
ax.plot(x_axis, data.gdp)
ax.plot(x_axis, data.djia)
ax.set_xticks(x_axis)
ax.set_xticklabels(data.index.strftime("%Y-%m-%d"), rotation=90) # change date format:
plt.show()


# Create a Figure and an array of subplots with 2 rows and 2 columns
fig, ax = plt.subplots(2, 2, figsize=(12,8))

# Addressing the top left Axes as index 0, 0, plot month and Seattle precipitation
ax[0, 0].plot(austin_weather.DATE, seattle_weather.groupby('DATE')["MLY-PRCP-NORMAL"].mean().values)

# In the top right (index 0,1), plot month and Seattle temperatures
ax[0, 1].plot(austin_weather.DATE, seattle_weather.groupby('DATE')["MLY-TAVG-NORMAL"].mean().values)

# In the bottom left (1, 0) plot month and Austin precipitations
ax[1, 0].plot(austin_weather.DATE, austin_weather["MLY-PRCP-NORMAL"])

# In the bottom right (1, 1) plot month and Austin temperatures
ax[1, 1].plot(austin_weather.DATE, austin_weather["MLY-TAVG-NORMAL"])


# Create a figure and an array of axes: 2 rows, 1 column with shared y axis
fig, ax = plt.subplots(2, 1, sharey=True) # sharey=True, means share Y axis for all subcharts
fig.suptitle("Title text")
# Plot Seattle precipitation data in the top axes
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-NORMAL"], color = 'b')
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-25PCTL"], color = 'b', linestyle = '--')
ax[0].plot(seattle_weather.MONTH, seattle_weather["MLY-PRCP-75PCTL"], color = 'b', linestyle = '--')

# Plot Austin precipitation data in the bottom axes
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-NORMAL"], color = 'r')
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-25PCTL"], color = 'r', linestyle = '--')
ax[1].plot(austin_weather.MONTH, austin_weather["MLY-PRCP-75PCTL"], color = 'r', linestyle = '--')
plt.show()

# Moving average using groupby generator;
fig, ax = plt.subplots(figsize = (18,8))
groupby_generator = rel.groupby('Country')
for key, group in groupby_generator:
ax.plot(group['Year'], group['moving_avg'], label = key)
ax.legend()
plt.show()


# Create a figure and an array of axes: 2 rows, 1 column with figure size = 18*12
fig, ax=plt.subplots(2, 1, figsize=(18,12))
fig.suptitle("Title text")
x_axis=np.array(range(len(climate_change['2015':].index))) # create the x-axis first;
ax[0].plot(x_axis, climate_change['2015':].relative_temp)
ax[0].set_xticks(x_axis) # pass on the x-axis first to xticks; IF ax[0].set_xticks([]) will hide the x_axis ticks;
ax[0].set_xticklabels(climate_change['2015':].index, rotation=50) # must set the ticks first then change the labels; (二者必须合起来用才能改label)

ax[1].plot(x_axis, climate_change['2015':].co2)
ax[1].set_xticks(x_axis)
ax[1].set_xticklabels(climate_change['2015':].index, rotation=50)

ax[0].set_ylabel("Relative Temp")
ax[1].set_ylabel("CO2")
plt.show()


# A good eample to plot 3 bar chars horizontally in the same row:
fig, ax = plt.subplots(1, 3, figsize=(18, 7))
plt.suptitle('Accumulated Gold Medals among each years for China, South Korea and Japan', fontsize=18)
x_axis = np.arange(len(final.year.unique()))
for i in range(3):
ax[i].bar(x_axis, final.loc[final.country==targets[i],'runningSUM'], label=targets[i], color = color_ls[i])
ax[i].set_xticks(x_axis)
ax[i].set_xticklabels(final.year.unique())
ax[i].set_yticks([0, 25, 50, 100, 150, 200, 270])
ax[i].set_xlabel('Year', fontsize=12)
ax[i].legend()
ax[i].grid(True, alpha=0.2)

ax[0].set_ylabel('Gold Medals Accumulated', fontsize=16)
plt.show()


# good example to generate random color:
@Method 1 - nongroup
import random
fig, ax = plt.subplots(1,3, figsize=(12,6))
country_obj = final.Country.unique()
for i in range(len(country_obj)):
x_axis = np.arange(len(final.loc[final.Country == country_obj[i], 'Year']))
ax[i].bar(x_axis, final.loc[final.Country == country_obj[i], 'cum_medals'], label=country_obj[i],
color=(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))) #uniform includes 1;
ax[i].set_xticks(x_axis)
ax[i].set_xticklabels(final.loc[final.Country == country_obj[i], 'Year'])
ax[i].set_xlabel(country_obj[i])
ax[i].legend()
plt.show()


@Method 2 - groupby
fig, ax = plt.subplots(1,3, figsize=(12,6))
groupby_country_obj = final.groupby('Country')
index = 0
for key, group in groupby_country_obj:
x_axis = np.arange(len(group.Year))
ax[index].bar(x_axis, group.cum_medals, label=key,
color=(np.random.uniform(0,1),np.random.uniform(0,1),np.random.uniform(0,1)))
ax[index].set_xticks(x_axis)
ax[index].set_xticklabels(group.Year)
ax[index].set_xlabel(key)
ax[index].legend()
index+=1
plt.show()



#@Horizontal-bar >>>>>>>>

fig, ax = plt.subplots(len(final.Country.unique()), 1, figsize=(12, 18))
fig.suptitle('Cummulative Medals Earned for Countries', y=0.9, fontsize=15)
groupby_generator = final.groupby('Country')
index=0
for key, group in groupby_generator:
rand_color=(np.random.uniform(0,1),np.random.uniform(0,1),np.random.uniform(0,1))
y_axis =np.arange(len(group.Year))
ax[index].barh(y_axis, group.cumsum_3_year, label=key, color=rand_color)
ax[index].set_yticks(y_axis)
ax[index].set_yticklabels(group.Year)
ax[index].set_xlabel(key, fontsize=15, color=rand_color)
ax[index].legend()
index+=1
plt.show()


# Initalize a Figure and Axes
fig, ax= plt.subplots(figsize=(12,8))
fig.suptitle("Title text")
# Plot the CO2 variable in blue
ax.plot(climate_change.index, climate_change.co2, color='b')
ax.set_ylabel('CO2 Level', fontsize=15, color='b')
ax.tick_params('y', colors='b') # Change the tick paramaters for y axis, you can also pass on 'x' or 'both';
# Create a twin Axes that shares the x-axis
ax2 = ax.twinx() # This is make the two plots share the same x axis and eventually made a dual y axis;

# Plot the relative temperature in red
ax2.plot(climate_change.index, climate_change.relative_temp, color='r')
ax2.set_ylabel('relative temperature', fontsize=15, color='r')
ax2.tick_params('y', colors='r')
plt.show()


# Using functions:
# Define a function called plot_timeseries
def plot_timeseries(axes, x, y, color, xlabel, ylabel, label_font_size):
axes.plot(x, y, color=color)
axes.set_xlabel(xlabel, fontsize=label_font_size)
axes.set_ylabel(ylabel, color=color, fontsize=label_font_size)
axes.tick_params('y', colors=color)

fig, ax = plt.subplots(figsize=(12,10))
fig.suptitle("Title text")
# Plot the CO2 levels time-series in blue
plot_timeseries(ax, climate_change.index, climate_change['co2'], "blue", "Time (years)", "CO2 levels", 15)
# Create a twin Axes object that shares the x-axis
ax2 = ax.twinx()
# Plot the relative temperature data in red
plot_timeseries(ax2, climate_change.index, climate_change['relative_temp'], "red", "Time (years)", "Relative temperature (Celsius)", 15)

# Annotate point with relative temperature >1 degree
ax2.annotate(text=">1 degree", xy=(pd.Timestamp('2015-10-06'),1), # This is the position the text is pointing at (the point arrow point at); float numer pd.Timestamp('2015-10-06') is essentially a float
xytext=(12000, 1.3), # This is the position the text is placed at (the point arrow starts from); float numer
arrowprops={"arrowstyle":"->", 'color':'green'} # This is the style of the arrow
)
plt.show()



### --------:::::::::::::::::::::::Line chart with Error bars attached :::::::::::::::::::::
fig, ax = plt.subplots() # This is a special plot which plots line chart but each point would also have a small bar of error (the std in this case):
# Add Austin temperature data in each month with error bars
ax.errorbar(x = austin_weather['DATE'], y = austin_weather['MLY-TAVG-NORMAL'], yerr=austin_weather['MLY-TAVG-STDDEV'])
# Set the y-axis label
ax.set_ylabel('Temperature (Fahrenheit)')
plt.show()


'''::::::::::: Scatter Chart:::::::::::'''

fig, ax=plt.subplots(figsize=(8,6))
ax.scatter(climate_change['co2'], climate_change['relative_temp'], c=climate_change.index) # c=climate_change.index for render each point by index(datetime) for each point to get a different color; very beautiful;
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (C)")
plt.show()



'''::::::::::: Bar Chart:::::::::::'''

fig, ax = plt.subplots(figsize=(10,8))
fig.suptitle("Medals for different countries")
# Plot a bar-chart of gold medals as a function of country
ax.bar(medals.index, medals['Gold'])
# Set the x-axis tick labels to the country names
ax.set_xticks(medals.index) # should set the ticks first then change the xticklabels format;
ax.set_xticklabels(medals.index, rotation=90)
# Set the y-axis label
ax.set_ylabel('Number of medals')
plt.show()


### Stacked bar chart::::::::::::::::::::::::Stacked bar chart :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(10,8))
fig.suptitle("Medals for different countries")
# Plot a bar-chart of gold medals as a function of country
ax.bar(medals.index, medals['Gold'], label='Gold')
# Stack bars for "Silver" on top with label "Silver"
ax.bar(medals.index, medals['Silver'], bottom=medals['Gold'], label='Silver')
# Stack bars for "Bronze" on top of that with label "Bronze"
ax.bar(medals.index, medals['Bronze'], bottom=medals['Gold']+medals['Silver'], label='Bronze')
# Set the x-axis tick labels to the country names
ax.set_xticks(medals.index)
ax.set_xticklabels(medals.index, rotation=90)
# Set the y-axis label
ax.set_ylabel('Number of medals')
# Display the legend
ax.legend()
plt.show()


### ::::::::Stacked bar chart :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(12,8))
fig.suptitle("Medals for different countries")
x_axis = np.array(range(len(medals.index)))
bar_width=0.3
# Plot a bar-chart of gold, Silver, Bronze medals as a function of country
ax.bar(x_axis-bar_width, medals['Gold'], width=bar_width, label='Gold')
ax.bar(x_axis, medals['Silver'], width=bar_width, label='Silver')
ax.bar(x_axis+bar_width, medals['Bronze'], width=bar_width, label='Bronze')

# Set the x-axis tick labels to the country names
ax.set_xticks(x_axis)
ax.set_xticklabels(medals.index, rotation=45)
# Set the y-axis label
ax.set_ylabel('Number of medals')
# Display the legend
ax.legend()
ax.grid(alpha=0.5)
plt.show()


### A number of bars chart::::::::::::::::::::::::Error bar chart :::::::::::::::::::::
fig, ax = plt.subplots()
# Add a bar for the rowing "Height" column mean/std
ax.bar("Rowing", mens_rowing['Height'].mean(), yerr=mens_rowing['Height'].std()) # you can using 'string' for x_axis???
# Add a bar for the gymnastics "Height" column mean/std
ax.bar("Gymnastics",mens_gymnastics['Height'].mean(), yerr=mens_gymnastics['Height'].std())
# Label the y-axis
ax.set_ylabel("Height (cm)")
plt.show()


### A number of bars chart:::::::::::::::::::::::: Iterate to automating plot error bar :::::::::::::::::::::
fig, ax = plt.subplots(figsize=(15, 8))
bar_width=0.7
# Loop over the different sports branches
for i in sports:
# Extract the rows only for this sport
sport_df = summer2016.loc[summer2016['Sport'] == i, 'Weight']
# Add a bar for the "Weight" mean with std y error bar
ax.bar(i, sport_df.mean(), yerr=sport_df.std() ,width=bar_width)

ax.set_xticks(sports)
ax.set_xticklabels(sports, rotation=90)
ax.set_ylabel("Weight")
# Save the figure to file
plt.savefig("sports_weights.png")


'''------ Histogram Chart:::::::::::'''
fig, ax = plt.subplots(figsize=(8,6))
# Plot a histogram of "Weight" for mens_rowing
ax.hist(mens_rowing['Weight'], label="Rowing", bins = 5, histtype='step', alpha=0.8)
# Compare to histogram of "Weight" for mens_gymnastics
ax.hist(mens_gymnastics['Weight'], label="Gymnastics", bins = 5, histtype='step', alpha=0.8)
# Set the x-axis label to "Weight (kg)"
ax.set_xlabel("Weight (kg)")
# Set the y-axis label to "# of observations"
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

'''--------- Boxplot Chart:::::::::::'''
fig, ax = plt.subplots()
arr_data_inputs=[mens_rowing['Height'], mens_gymnastics['Height']]
xticklabels=["Rowing", "Gymnastics"]
# Add a boxplot for the "Height" column in the DataFrames
ax.boxplot(arr_data_inputs) # x is for inputing data, it can be one data set, an be an array of a collection of datasets(list of lists);
# Add x-axis tick labels:
ax.set_xticklabels(xticklabels)
# Add a y-axis label
ax.set_ylabel('Height (cm)')
plt.show()


''''******************** Line Chart************************'''
#different color using the keyword color with either an HTML color name or a HEX code:
plt.plot(days, money_spent, color='green')
plt.plot(days, money_spent_2, color='#AAAAAA')

# Dashed:
plt.plot(x_values, y_values, linestyle='--')
# Dotted:
plt.plot(x_values, y_values, linestyle=':')
# No line:
plt.plot(x_values, y_values, linestyle='')

# A circle dot line: round markers
plt.plot(x_values, y_values, marker='o')
# A square dot line:
plt.plot(x_values, y_values, marker='s')
# A star dot line:
plt.plot(x_values, y_values, marker='*')


#Line with Shaded Error
#plt.fill_between(x_values, y_lower, y_upper, alpha=0.2)
line1.fill_between(months, y_lower, y_upper, alpha=0.2)

#if we want to display a plot from x=0 to x=3 and from y=2 to y=5, we would call:
plt.axis([0, 3, 2, 5]) #plt.axis([min_x, max_x, min_y, max_y])
'''
we instead set it to these values, with both the x and y min and max values reversed:
plt.axis([3, 0, 5, 2])
What this will do is not throw an error, but instead the graph will be essentially drawn inverted, both on the x axis and the y axis. The x axis will be for x values from 3 to 0, and the y axis will be for values 5 to 2, both in decreasing order.'''

plt.xlabel("Time") # can be written: ax.set_xlabel("Year")
plt.ylabel("Dollars spent on coffee") # can be written: ax.set_ylabel("Test average")

# CAN NOT be written to ax.title() **** IMPORTANT
plt.title("My Last Twelve Years of Coffee Drinking")

#The command plt.subplot() needs three arguments to be passed into it:
#1 is the number of rows of subplots can be displayed in the chart
#2 is the number of columns of subplots can be displayed in the chart
#1 is the index of the subplot we want to create
plt.subplot(1,2,1)
plt.plot(months, temperature)
plt.title("temperature over months")
plt.show()

plt.subplot(1,2,2)
plt.plot(temperature, flights_to_hawaii, 'o') #'o' just a dot not line.
plt.plot(temperature, flights_to_hawaii, '*') #'*' just a start dot not line.
plt.title("flights_to_hawaii over temperature")
plt.show()

# **** GOOD Line chart ***:
# Line Graph: Time Series Analysis for table : hourly_viwer_us
plt.figure(figsize=(12,8))
line_chart=plt.subplot()
y_lower = [i*0.85 for i in hourly_viwer_us.viewers]
y_upper = [i*1.15 for i in hourly_viwer_us.viewers]
line_chart.plot(hourly_viwer_us.hour, hourly_viwer_us.viewers)
line_chart.fill_between(hourly_viwer_us.hour, y_lower, y_upper, alpha=0.2)
line_chart.set_xlabel("Hour")
line_chart.set_ylabel("Viewers")
line_chart.set_title("Time Series")
line_chart.legend(["2015-01-01"])
line_chart.set_xticks(hourly_viwer_us.hour)
#line_chart.set_yticks("Hour")
plt.show()

# **** GOOD Line chart ***:
plt.plot(discount_rate,npvs_a, linewidth = 2.0, color = "red", label = "Project A")
plt.plot(discount_rate,npvs_b, linewidth = 2.0, color = "blue", label = "Project B")
plt.axhline(y=0, linewidth = 0.5, color = "black") # -- adding a horizontal line;
plt.title('NPV Profile for Projects A and B')
plt.xlabel('Discount Rate')
plt.ylabel('Net Present Value')
plt.legend()
plt.show()

# OR:

plt.figure(figsize=(10,8))
plt.style.use("ggplot")
plt.plot(uk_df.Age, uk_df.SalaryInUSD, "^--g", label= "UK") # "s--g" is for '[marker][line][color]'
plt.plot(uk_df.Age, us_df.SalaryInUSD, label= "US", marker="*", linestyle=":", color="y")
plt.plot(au_df.Age, au_df.SalaryInUSD, label= "AU", linestyle="-.")
plt.plot(ch_df.Age, ch_df.SalaryInUSD, label= "CH", linestyle=":", marker=".", color="#5a7d9a")
plt.title("Median Salary (in USD) Earned by IT worker by Age in UK, US, AU, CH")
plt.legend() # not passing the label here but passing in the abov method as paramaters;
plt.xlabel("Age")
plt.ylabel("Median Salary (in USD)")
plt.grid(True, alpha=0.3)
plt.show()


# pandas how to draw customized DATE in X axis::::::::: customized x axis:
plt.figure(figsize=(16,12))
x_axis = np.array(final.year)
plt.plot(x_axis, final.Births)
plt.plot(x_axis, final.Deaths)
plt.xticks([x_axis[i] for i in range(0, len(x_axis), 2)], # here to customize the number of x axis you want to display:
["year {}".format(x_axis[i]) for i in range(0, len(x_axis), 2)], rotation=45)
# here to replace the actually string of the label of x axis you want to display:
plt.legend(['Births', 'Death'])
plt.grid(alpha=0.5)
plt.show()

# pandas how to draw customized DATE in X axis::::::::: customized x axis:
plt.figure(figsize=(20,12))
x_axis = [i for i in range(0, len(final.Date))] # x_axis must match the date of the y axis: final.Births
plt.plot(x_axis, final.Births)
plt.plot(x_axis, final.Deaths)
plt.xticks([i for i in range(0, len(final.Date), 2)], # here to customize the number of x axis you want to display:
[final.Date[i].strftime("%d-%B-%Y") for i in range(0, len(final.Date), 2)], rotation=90)
# here to replace the actually string of the label of x axis you want to display:
plt.legend(['Births', 'Death'])
plt.grid(alpha=0.5)
plt.show()

# display the month name of the month by uisng calendar package:
plt.figure(figsize=(10,8))
plt.plot(f1.index, f1)
plt.xticks(f1.index, [calendar.month_name[i] for i in f1.index], rotation=45)
plt.show()


###### Equivalent methods for plot and axes

plt.xlabel()
ax.set_xlabel()

plt.xticks()
plt.xticks(rotation='vertical') # to make the xlabels verticical show
plt.xticks(rotation=50)
ax.set_xticks() # preferred way

plt.xticks(x_positions, chart_labels)

#labels are particularly long, rotation keyword to rotate labels to some degrees:
plt.xticklabels(([0.1, 0.6, 0.8]), rotation=30)
ax.set_xticklabels(['10%', '60%', '80%'], rotation=30)

plt.rcParams['ytick.labelsize'] = 8 # to change the yticks font size
plt.rcParams['xtick.labelsize'] = 8.5
plt.rcParams['font.sans-serif']=['SimHei'] # to show normal chinese fonts
plt.rcParams['axes.unicode_minus']=False # to show negative sign

plt.title("flights_to_hawaii over temperature")
ax.set_title("flights_to_hawaii over temperature")

plt.subplots_adjust(bottom =0.2, wspace=0.5) # to change the space between subplot.
'''
left — the left-side margin, with a default of 0.125. You can increase this number to make room for a y-axis label
right — the right-side margin, with a default of 0.9. You can increase this to make more room for the figure, or decrease it to make room for a legend
bottom — the bottom margin, with a default of 0.1. You can increase this to make room for tick mark labels or an x-axis label
top — the top margin, with a default of 0.9
wspace — the horizontal space between adjacent subplots, with a default of 0.2
hspace — the vertical space between adjacent subplots, with a default of 0.2
'''
#Adding a labels:
plt.legend(['parabola', 'cubic'], loc=6)
plt.show()
# OR:
plt.plot([0, 1, 2, 3, 4], [0, 1, 4, 9, 16],label="parabola")
plt.plot([0, 1, 2, 3, 4], [0, 1, 8, 27, 64],label="cubic")
plt.legend() # Still need this command!
plt.show()

#In order to be sure that you don’t have any stray lines, you can use the command plt.close('all') to clear all existing plots before you plot a new one.
plt.close('all')

#use the command plt.close('all') to clear all existing plots before you plot a new one.
#To create a figure with a width of 4 inches, and height of 10 inches, we would use:
plt.figure(figsize=(4, 10))

# another way to work:
fig = plt.figure()
fig.add_subplot(1,1,1)
plt.scatter(hours_reported,exam_scores)
plt.title("Orion in 2D")


'''+++++++++++++++++++++++++++++++++++++++++++ Scatter plot: =================================== ::::::: '''
gdp_cap=[974.5803384, 5937.029525999998, 6223.367465, 4797.231267, 12779.37964, 34435.367439999995, 36126.4927, 29796.04834, 1391.253792, 33692.60508, 1441.284873]
life_exp = [43.828, 76.423, 72.301, 42.731, 75.32, 81.235, 79.829, 75.635, 64.062, 79.441, 56.728, 65.554, 74.852, 50.728, 72.39, 73.005, 52.295, 49.58, 59.723]
pop = [31.889923, 3.600523, 33.333216, 12.420476, 40.301927, 20.434176, 8.199783, 0.708573, 150.448339, 10.392226, 8.078314, 9.119152, 4.552198, 1.639131, 190.010647]
col = ['red', 'green', 'blue', 'blue', 'yellow', 'black', 'green', 'red', 'red', 'green', 'blue', 'yellow', 'green', 'blue', 'yellow', 'green', 'blue', 'blue', 'red', 'blue']
# Import numpy as np
import numpy as np
# Store pop as a numpy array: np_pop
np_pop = np.array(pop)
# Double np_pop
np_pop = np_pop*2
# Update: set s argument to np_pop
plt.scatter(gdp_cap, life_exp, s = np_pop, c=col) # s is The marker size in points**2, c=col is array-like or list of colors or color, optional The marker colors;
# Previous customizations
plt.xscale('log') # using logarithmic scale to make the graph wider;
plt.xlabel('GDP per Capita [in USD]')
plt.ylabel('Life Expectancy [in years]')
plt.title('World Development in 2007')
plt.xticks([1000, 10000, 100000],['1k', '10k', '100k'])
plt.show()



# Scatter chart:
# scatter:
girls_grades = [89, 90, 70, 89, 100, 80, 90, 100, 80, 34]
boys_grades = [30, 29, 49, 48, 100, 48, 38, 45, 20, 30]
grades_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

ax9=plt.subplot()
ax9.scatter(grades_range, girls_grades, color='r')
ax9.scatter(grades_range, boys_grades, color='b')
ax9.set_xlabel('Grades Range')
ax9.set_ylabel('Grades Scored')
ax9.set_title('scatter plot')
plt.show()



# To plot scatter chart -------------------------------------------:
fig=plt.figure(figsize=(18,14))
fig.suptitle("Latitude and Longitude for Countries categorized by Population and Continent",
y=0.92, # y=0.92 is the y position of the title ploted;
fontsize=25)

scatter = plt.scatter(merged_df10.avg_longitude, merged_df10.avg_latitude,
s=merged_df10.pop_size.values/30000,
c=continent_color_lst, marker='.')

for k, v in continent_color.items():
''' This is a shortcut method to plot all the continents with blank value,
so the legend would show all the labels of colors and continues '''
plt.scatter([], [], c=v, label=k, s=500)

plt.xlabel('Average Longitude', fontsize=15)
plt.ylabel('Average Latitude', fontsize=15)
plt.legend(loc=2, markerscale=0.5,scatterpoints=1, fontsize=15)
plt.grid(True, alpha=0.3, linestyle='--')
#plt.savefig("{}-Q13.png".format(999999))
plt.show()

''''******************** Bar Chart************************'''
# Bar chart is good for categorical type of data is not good for the data with time as x-axis such as for each year.
# plot singel figure:
plt.figure(figsize=(8,6))
plt.bar(published_year.index , published_year.sum_published)
plt.xticks(published_year.index, published_year.year_publised)
plt.show()


drinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
sales = [91, 76, 56, 66, 52, 27]

plt.subplot() # always put this line before the bar plot method.
plt.bar(range(len(sales)), sales)



# This is best way to do the preferred way:
ax1 = plt.subplot()
ax1.bar(range(len(sales)), sales)
ax1.set_xticks(range(len(drinks)))
ax1.set_xticklabels(drinks, rotation = 20)
ax1.set_title()

plt.show()

# Side-By-Side Bars:
# China Data (blue bars)
n = 1 # This is our first dataset (out of 2)
t = 2 # Number of datasets
d = 7 # Number of sets of bars
w = 0.8 # Width of each bar
x_values1 = [t*element + w*n for element in range(d)]

# US Data (orange bars)
n = 2 # This is our second dataset (out of 2)
t = 2 # Number of datasets
d = 7 # Number of sets of bars
w = 0.8 # Width of each bar
x_values2 = [t*element + w*n for element in range(d)]

def create_x(t, w, n, d):
return [t*x + w*n for x in range(d)]

ax2 = plt.subplot()
ax2.bar(x_values1, sales1)
ax2.bar(x_values2, sales2)
ax2.set_xticks([(i+j)/2 for i, j in zip(x_values1,x_values2) ])
ax2.set_xticklabels(drinks, rotation = 10)
plt.show()



# -------------------------------------Best Side-By-Side Bars Method =======:
""" *************************************** Side-By-Side Bars: ----------- Better method """
plt.figure(figsize=(20,10), facecolor ="white")
x_indexes = np.arange(len(us_df.Age))
bar_width = 0.25
plt.bar(x_indexes-bar_width, us_df.CompensationInUSD, width=bar_width, label = "US")
plt.bar(x_indexes, uk_df.CompensationInUSD, width=bar_width, label = "UK")
plt.bar(x_indexes+bar_width, au_df.CompensationInUSD, width=bar_width, label = "AU")
#plt.bar(x_indexes+2*bar_width, ca_df.CompensationInUSD, width=bar_width, label = "CA")
plt.legend()
plt.xticks(ticks=x_indexes, labels=us_df.Age)
plt.xlabel("Ages")
plt.ylabel("Median Salary (in USD)")
plt.title("Median Salary (in USD) Earned by IT worker by Age in UK, US, AU, CH")
plt.show()




# *************************************** Side-By-Side Bars: -------
fig = plt.figure(figsize=(14,10))
#plt.rcParams['ytick.labelsize'] = 13
#plt.rcParams['xtick.labelsize'] = 13
fig.suptitle('Covid19 Economic Indicators Comparison Among Low, Middle, High Income Country Groups', fontsize=15)

x_axis = np.array([i for i in range(4)])
x_labels = ["Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI",
"Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI and Food Import",
"Average Foreign \n Direct Investment", "Average Foreign Direct Investment\n(Net Inflows Percent of GDP)"]
bar_width = 0.25
plt.bar(x_axis-bar_width, pivot_df9.LIC, width=bar_width, label='LIC')
plt.bar(x_axis, pivot_df9.MIC, width=bar_width, label='MID')
plt.bar(x_axis+bar_width, pivot_df9.HIC, width=bar_width, label='HIC')
plt.xticks(x_axis, x_labels)
plt.ylabel('Values', fontsize=15, loc='top')
plt.grid(True, alpha=0.5, axis='y')
plt.legend(prop={'size': 15}, loc=9)
plt.show()

# *************************************** Side-By-Side Bars: ------- Example 3
# Plot the Chart:
fig = plt.figure(figsize=(14,10))
fig.suptitle('Comparison of Average Covid19 Economic Indicators \n for Low, Middle, High Income Country Groups', fontsize=15)

x_axis = np.array([i for i in range(4)])
x_labels = ["Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI",
"Average Covid19 Economic \n Exposure Index \n Ex Aid and FDI and Food Import",
"Average Foreign \n Direct Investment", "Average Foreign Direct Investment\n(Net Inflows Percent of GDP)"]
bar_width = 0.25
plt.bar(x_axis-bar_width, pivot_df9.LIC, width=bar_width, label='LIC')
plt.bar(x_axis, pivot_df9.MIC, width=bar_width, label='MID')
plt.bar(x_axis+bar_width, pivot_df9.HIC, width=bar_width, label='HIC')
plt.xticks(x_axis, x_labels)
plt.ylabel('Average Index Value', fontsize=15, loc='top')
plt.grid(True, alpha=0.5, axis='y')
plt.legend(prop={'size': 15}, loc=9, ncol=3) # ncol=3 make the legends horizontal in 3 columns



#horizontal bar chart:
'''----------------------------------------horizontal bar chart:-----------------------'''
plt.style.use("fivethirtyeight")
plt.figure(figsize=(12,8))
y_axis = range(len(dict_language_ten_common))
plt.barh(y_axis, dict_language_ten_common.values())
plt.gca().invert_yaxis() # this line is to invert the y axis order (upside down) or ax.invert_yaxis()

plt.yticks(ticks=y_axis, labels=dict_language_ten_common.keys())
plt.title("Most Popular Languages")
plt.xlabel("Number of People Who Use")
plt.grid(alpha=0.5)
plt.show()

# horizontal bar chart ----------------- horizontal Stacked bar:
plt.figure(figsize=(15,50))
bar_width = 0.8
y_axis = np.arange(len(ny_df.neighbourhood.unique()))
plt.barh(y_axis, f_lst[2].avg_days_occupied_2019, height = bar_width, color = 'lightblue')
plt.barh(y_axis, f_lst[1].avg_days_occupied_2019, height = bar_width,
left=np.array(f_lst[2].avg_days_occupied_2019), color='lightcoral')
plt.barh(y_axis, f_lst[3].avg_days_occupied_2019, height = bar_width, color='orange',
left=np.array(f_lst[2].avg_days_occupied_2019)+np.array(f_lst[1].avg_days_occupied_2019))
plt.barh(y_axis, f_lst[0].avg_days_occupied_2019, height = bar_width, color='steelblue',
left=np.array(f_lst[2].avg_days_occupied_2019)+\
np.array(f_lst[1].avg_days_occupied_2019)+np.array(f_lst[3].avg_days_occupied_2019))
plt.yticks(y_axis, ny_df.neighbourhood.unique())
plt.xlabel('Avg Days Occupied in 2019')

plt.show()


# Stacked Bars
ax = plt.subplot()
ax.bar(range(len(drinks)), sales1)
ax.bar(range(len(drinks)), sales2, bottom = sales1)
ax.legend(["Location 1", "Location 2"])
ax.set_xticks(range(len(drinks)))
ax.set_xticklabels(drinks, rotation = 10)
plt.show()


## Stacked Bars :
plt.figure(figsize=(12,10))
x_axis = [i for i in range(len(shared_room_df.neighbourhood))]
plt.bar(x_axis, shared_room_df.sub_reviews, color='red')
plt.bar(x_axis, private_room_df.sub_reviews, bottom = np.array(shared_room_df.sub_reviews), color='orange')
plt.bar(x_axis, entire_room_df.sub_reviews, color = 'steelblue', \ # must using np.array() to perform the addition calculation:
bottom = (np.array(shared_room_df.sub_reviews) + np.array(private_room_df.sub_reviews)))
plt.xticks(ticks=x_axis, labels=shared_room_df.neighbourhood, rotation=90)
plt.ylabel('Number Of Reviews')
plt.xlabel('Neighbourhood')

# adding figures only for the entire_room
total_values = np.array(entire_room_df.sub_reviews)+np.array(shared_room_df.sub_reviews)+np.array(private_room_df.sub_reviews)
for i, v in enumerate(zip(entire_room_df.sub_reviews.values, total_values)):
plt.text(x_axis[i] - 0.25, v[1]+500, "{:,}".format(v[0]), rotation=90, color='white') # plt.text() x_axis[i] - 0.25 is the x point the figure is and v[1]*2/3 is the y point of the figure, "{:,}".format(v[0]) is the format for displaying 1,200

plt.show()



## ERROR Bar:
drinks = ["cappuccino", "latte", "chai", "americano", "mocha", "espresso"]
ounces_of_milk = [6, 9, 4, 0, 9, 0]
error = [0.6, 0.9, 0.4, 0, 0.9, 0]

# Plot the bar graph here
chart = plt.subplot()
error = [0.1*i for i in ounces_of_milk]
# the fllowing error would give different upper and lower errors:
#error = ([0.1*i for i in ounces_of_milk], [0.2*i for i in ounces_of_milk])
chart.bar(range(len(drinks)), ounces_of_milk, yerr = error, capsize = 5)
chart.set_xticks(range(len(drinks)))
chart.set_xticklabels(drinks)



# Pie Chart:
pie1=plt.subplot()
colors=["red", "orange", "yellow", "green"] # Optional
pie1.pie(payment_method_freqs, labels=payment_method_names, colors=colors)
pie1.axis('equal')
plt.legend(new_country_LOL_viewers.country)

'''
'%0.2f' — 2 decimal places, like 4.08
'%0.2f%%' — 2 decimal places, but with a percent sign at the end, like 4.08%. You need two consecutive percent signs because the first one acts as an escape character, so that the second one gets displayed on the chart.
'%d%%' — rounded to the nearest int and with a percent sign at the end, like 4%.
'''

# ***** A Good Pie Chart example *******:
https://matplotlib.org/stable/gallery/pie_and_polar_charts/bar_of_pie.html#sphx-glr-gallery-pie-and-polar-charts-bar-of-pie-py

https://matplotlib.org/3.1.0/gallery/pie_and_polar_charts/pie_and_donut_labels.html

plt.figure(figsize=(12,8))
pie_chart=plt.subplot()
#colors=["slateblue", "orange", "yellow", "green","red","blue","indigo","violet","purple","pink", "black"]
colors = ['lightskyblue', 'gold', 'lightcoral', 'gainsboro', 'royalblue', 'lightpink', 'darkseagreen', 'sienna', 'khaki', 'gold', 'violet', 'yellowgreen']
explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
pie_chart.pie(new_country_LOL_viewers.viewers,
labels=new_country_LOL_viewers.country,
explode = explode, # This is to make the first number out of the chart to be outstanding
shadow=True,colors =colors, autopct='%0.1f%%',
startangle=335, # this is to rotate the pie chart to make sure it's not been coverred
pctdistance=1.2, # This is the position of the percentage values
labeldistance=1.1 # This is the position of the country labels
wedgeprops = {"edgecolor":"black"} # to define the edge color of each small section
)
pie_chart.axis("equal")
pie_chart.legend(new_country_LOL_viewers.country)
pie_chart.set_title("League of Legends Viewers' Whereabouts")
plt.show()


# A wonderful chart:
fig = plt.figure(figsize=(14,10)) # Creates a new figure
fig.suptitle('Percentage of the world population living in South American Countries', fontsize=15)

colors = ['teal', 'gold', 'powderblue', 'limegreen', 'royalblue',
'lightpink', 'blueviolet', 'darkseagreen', 'wheat', 'red', 'aqua', 'violet']

def percentage_in_world(val): # this fun is to process the percentage value:
a = val/100.*south_american_df['Population'].sum()/world_pop
return a

explode = [0, 0.1, 0, 0, 0, 0.05, 0.35, -0.05, 0, 0.35, 0.1, 0.1]
plt.pie(south_american_final.Population[0:12], shadow=False, labels=south_american_final.Country[0:12], labeldistance=1.02,
colors=colors, normalize=True, explode=explode, startangle=15, rotatelabels =True,
autopct=lambda p: '{:.2%}'.format(percentage_in_world(p)),
textprops={'fontsize': 12.5}, pctdistance=1.33)

plt.axis('equal')
plt.legend(south_american_final.Country[0:12], prop={'size': 12}) # legend size to 12:
#plt.title('Percentage of the world population living in South American Countries')

plt.show()



# Histogram:
ax.hist(exam_scores1,bins = [0,25,50,75,100], density=True) # density=True meaning generate Histogram chart basded on posibility

h_chart=plt.subplot()
h_chart.hist(customer_amount.price, range=(0,200), bins=40)
h_chart.set_xlabel("Total Spent")
h_chart.set_ylabel("Number of Customers")
h_chart.set_title("Spending Per Customer")
plt.show()


# Histogram with annotate
'''Compute and draw the histogram of *x*. The return value is a tuple
(*n*, *bins*, *patches*) or ([*n0*, *n1*, ...], *bins*, [*patches0*,
*patches1*,...]) if the input contains multiple data. See the
documentation of the *weights* parameter to draw a histogram of
already-binned data.'''
counts, bins, patches = plt.hist(df['Daily Log Rate of Return'].dropna())
# counts get the counts, bins, and patches (one patch is like Rectangle(xy=(-0.0443595, 0), width=0.0090739, height=2, angle=0))

# annotate:
for count, patch in zip(counts,patches):
plt.annotate(str(int(count)) #annotate, must be a string
, xy=(patch.get_x()+0.003, patch.get_height()+1) #annotate x, y:float number;
) # 0.003 is a further left movement adding to x coordinate (horizontal)
# 1 is a further upper movement adding to y coordinate (vertical)
plt.show()