matplotlib cheatsheet

so much options
visualization
matplotlib
cheatsheet
Published

January 13, 2021

Introduction to Data Visualization with Matplotlib

matplotlib cheatsheet in pdf

pdf lecture in github

Introduction to Matplotlib

Introduction to data visualization with Matplotlib

# Introducing the pyplot interface
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
plt.show()

# Adding data to axes
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
plt.show()

Customizing your plots


# Adding markers
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-PRCP-NORMAL"],
marker="o")
plt.show()

# Choosing markers
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-PRCP-NORMAL"],
marker="v")
plt.show()

markers

# Setting the linestyle
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="--")
plt.show()

line style

# Eliminating lines with linestyle
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="None")
plt.show()

# Choosing color
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="--", color="r")
plt.show()

# Customizing the axes labels
ax.set_xlabel("Time (months)")
plt.show()

# Setting the y axis label
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()

# Adding a title
ax.set_title("Weather in Seattle")
plt.show()

Small multiples

# Small multiples with plt.subplots
fig, ax = plt.subplots(3, 2)
plt.show()

# Adding data to subplots
ax.shape
(3, 2)
ax[0, 0].plot(seattle_weather["MONTH"],seattle_weather["MLY-PRCP-NORMAL"],color='b')
plt.show()

# Subplots with data
fig, ax = plt.subplots(2, 1)
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"],color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-25PCTL"],linestyle='--', color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-75PCTL"],linestyle='--', color='b')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-NORMAL"],color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-25PCTL"],linestyle='--', color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-75PCTL"],linestyle='--', color='r')
ax[0].set_ylabel("Precipitation (inches)")
ax[1].set_ylabel("Precipitation (inches)")
ax[1].set_xlabel("Time (months)")
plt.show()

# Sharing the y-axis range
fig, ax = plt.subplots(2, 1, sharey=True)

Plotting time-series

Plotting time-series data


# DateTimeIndex
climate_change.index
DatetimeIndex(['1958-03-06', '1958-04-06', '1958-05-06', '1958-06-06',
     dtype='datetime64[ns]', name='date', length=706, freq=None)
               
               
# Plotting time-series data
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

# Zooming in on a decade
sixties = climate_change["1960-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixties.index, sixties['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

# Zooming in on one year
sixty_nine = climate_change["1969-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixty_nine.index, sixty_nine['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

Plotting time-series with different variables

# Plotting two time-series together
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.plot(climate_change.index, climate_change["relative_temp"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm) / Relative temperature')
plt.show()

# Using twin axes
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"])
ax2.set_ylabel('Relative temperature (Celsius)')
plt.show()

# Separating variables by color
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"], color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"],
color='red')
ax2.set_ylabel('Relative temperature (Celsius)', color='red')
plt.show()

# Coloring the ticks
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"],
color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax.tick_params('y', colors='blue')
ax2 = ax.twinx()
ax2.plot(climate_change.index,
climate_change["relative_temp"],
color='red')
ax2.set_ylabel('Relative temperature (Celsius)',
color='red')
ax2.tick_params('y', colors='red')
plt.show()

# A function that plots time-series
def plot_timeseries(axes, x, y, color, xlabel, ylabel):
    axes.plot(x, y, color=color)
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel, color=color)
    axes.tick_params('y', colors=color)
# Using our function
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax, climate_change.index,climate_change['relative_temp'],'red', 'Time', 'Relative temperature (Celsius)')
plt.show()

Annotating time-series data

# Annotation
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],
'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax2, climate_change.index,
climate_change['relative_temp'],
'red', 'Time', 'Relative temperature (Celsius)')
ax2.annotate(">1 degree",
xy=[pd.TimeStamp("2015-10-06"), 1])
plt.show()

# Positioning the text
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2))

# Adding arrows to annotation
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={})

# Customizing arrow properties
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={"arrowstyle":"->", "color":"gray"})

Customizing annotations

Quantitative comparisons and statistical visualizations

Quantitative comparisons: bar-charts


# Olympic medals: visualizing the data
medals = pd.read_csv('medals_by_country_2016.csv', index_col=0)
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
plt.show()

# Interlude: rotate the tick labels
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

# Olympic medals: visualizing the other medals : stacked bar chart
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

# Olympic medals: visualizing all three
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals["Bronze"],
bottom=medals["Gold"] + medals["Silver"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

# Adding a legend
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"], label="Gold")
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"],
label="Silver")
ax.bar(medals.index, medals["Bronze"],
bottom=medals["Gold"] + medals["Silver"],
label="Bronze")
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
ax.legend()
plt.show()

Quantitative comparisons: histograms


# Introducing histograms
fig, ax = plt.subplots()
ax.hist(mens_rowing["Height"])
ax.hist(mens_gymnastic["Height"])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
plt.show()

# Labels are needed
ax.hist(mens_rowing["Height"], label="Rowing")
ax.hist(mens_gymnastic["Height"], label="Gymnastics")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

# Customizing histograms: setting the number of bins
ax.hist(mens_rowing["Height"], label="Rowing", bins=5)
ax.hist(mens_gymnastic["Height"], label="Gymnastics", bins=5)
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

# Customizing histograms: setting bin boundaries
ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.hist(mens_gymnastic["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

# Customizing histograms: transparency
ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.hist(mens_gymnastic["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

Statistical plotting

# Adding error bars to bar charts
fig, ax = plt.subplots()
ax.bar("Rowing",mens_rowing["Height"].mean(),
yerr=mens_rowing["Height"].std())
ax.bar("Gymnastics",mens_gymnastics["Height"].mean(),
yerr=mens_gymnastics["Height"].std())
ax.set_ylabel("Height (cm)")
plt.show()

# Adding error bars to plots
fig, ax = plt.subplots()
ax.errorbar(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
yerr=seattle_weather["MLY-TAVG-STDDEV"])

ax.errorbar(austin_weather["MONTH"],
austin_weather["MLY-TAVG-NORMAL"],
yerr=austin_weather["MLY-TAVG-STDDEV"])

ax.set_ylabel("Temperature (Fahrenheit)")
plt.show()

# Adding boxplots
fig, ax = plt.subplots()
ax.boxplot([mens_rowing["Height"],
mens_gymnastics["Height"]])
ax.set_xticklabels(["Rowing", "Gymnastics"])
ax.set_ylabel("Height (cm)")
plt.show()

Quantitative comparisons: scatter plots


# Introducing scatter plots
fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"])
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

# Customizing scatter plots
eighties = climate_change["1980-01-01":"1989-12-31"]
nineties = climate_change["1990-01-01":"1999-12-31"]
fig, ax = plt.subplots()
ax.scatter(eighties["co2"], eighties["relative_temp"],
color="red", label="eighties")
ax.scatter(nineties["co2"], nineties["relative_temp"],
color="blue", label="nineties")
ax.legend()
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

# Encoding a third variable by color
fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"],
c=climate_change.index)
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

Sharing visualizations with others

Preparing your figures to share with others


# Choosing a style
plt.style.use("ggplot")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
                                                  
# Back to the default
plt.style.use("default")                                                  

available styles

# The "bmh" style
plt.style.use("bmh")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
                                                  
# Seaborn styles
plt.style.use("seaborn-colorblind")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()                                                  

Saving your visualizations


# Saving the figure to file
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
fig.savefig("gold_medals.png")

# Different file formats
fig.savefig("gold_medals.jpg")
fig.savefig("gold_medals.jpg", quality=50)
fig.savefig("gold_medals.svg")

# Resolution
fig.savefig("gold_medals.png", dpi=300)

# Size
fig.set_size_inches([5, 3])

# Another aspect ratio
fig.set_size_inches([3, 5])

Automating figures from data


# Getting unique values of a column
sports = summer_2016_medals["Sport"].unique()

# Bar-chart of heights for all sports
fig, ax = plt.subplots()
for sport in sports:
sport_df = summer_2016_medals[summer_2016_medals["Sport"] == spor
ax.bar(sport, sport_df["Height"].mean(),
yerr=sport_df["Height"].std())
ax.set_ylabel("Height (cm)")
ax.set_xticklabels(sports, rotation=90)
plt.show()