matplotlib cheatsheet
so much options
- Introduction to Data Visualization with Matplotlib
Introduction to Data Visualization with Matplotlib
matplotlib cheatsheet in pdf
Customizing your plots
# Adding markers
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-PRCP-NORMAL"],
marker="o")
plt.show()
# Choosing markers
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-PRCP-NORMAL"],
marker="v")
plt.show()
# Setting the linestyle
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="--")
plt.show()
# Eliminating lines with linestyle
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="None")
plt.show()
# Choosing color
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
marker="v", linestyle="--", color="r")
plt.show()
# Customizing the axes labels
ax.set_xlabel("Time (months)")
plt.show()
# Setting the y axis label
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
# Adding a title
ax.set_title("Weather in Seattle")
plt.show()
Small multiples
# Small multiples with plt.subplots
fig, ax = plt.subplots(3, 2)
plt.show()
# Adding data to subplots
ax.shape
(3, 2)
ax[0, 0].plot(seattle_weather["MONTH"],seattle_weather["MLY-PRCP-NORMAL"],color='b')
plt.show()
# Subplots with data
fig, ax = plt.subplots(2, 1)
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"],color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-25PCTL"],linestyle='--', color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-75PCTL"],linestyle='--', color='b')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-NORMAL"],color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-25PCTL"],linestyle='--', color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-75PCTL"],linestyle='--', color='r')
ax[0].set_ylabel("Precipitation (inches)")
ax[1].set_ylabel("Precipitation (inches)")
ax[1].set_xlabel("Time (months)")
plt.show()
# Sharing the y-axis range
fig, ax = plt.subplots(2, 1, sharey=True)
Plotting time-series data
# DateTimeIndex
climate_change.index
DatetimeIndex(['1958-03-06', '1958-04-06', '1958-05-06', '1958-06-06',
dtype='datetime64[ns]', name='date', length=706, freq=None)
# Plotting time-series data
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()
# Zooming in on a decade
sixties = climate_change["1960-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixties.index, sixties['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()
# Zooming in on one year
sixty_nine = climate_change["1969-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixty_nine.index, sixty_nine['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()
Plotting time-series with different variables
# Plotting two time-series together
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.plot(climate_change.index, climate_change["relative_temp"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm) / Relative temperature')
plt.show()
# Using twin axes
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"])
ax2.set_ylabel('Relative temperature (Celsius)')
plt.show()
# Separating variables by color
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"], color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"],
color='red')
ax2.set_ylabel('Relative temperature (Celsius)', color='red')
plt.show()
# Coloring the ticks
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"],
color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax.tick_params('y', colors='blue')
ax2 = ax.twinx()
ax2.plot(climate_change.index,
climate_change["relative_temp"],
color='red')
ax2.set_ylabel('Relative temperature (Celsius)',
color='red')
ax2.tick_params('y', colors='red')
plt.show()
# A function that plots time-series
def plot_timeseries(axes, x, y, color, xlabel, ylabel):
axes.plot(x, y, color=color)
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel, color=color)
axes.tick_params('y', colors=color)
# Using our function
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax, climate_change.index,climate_change['relative_temp'],'red', 'Time', 'Relative temperature (Celsius)')
plt.show()
Annotating time-series data
# Annotation
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],
'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax2, climate_change.index,
climate_change['relative_temp'],
'red', 'Time', 'Relative temperature (Celsius)')
ax2.annotate(">1 degree",
xy=[pd.TimeStamp("2015-10-06"), 1])
plt.show()
# Positioning the text
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2))
# Adding arrows to annotation
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={})
# Customizing arrow properties
ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={"arrowstyle":"->", "color":"gray"})
Quantitative comparisons: bar-charts
# Olympic medals: visualizing the data
medals = pd.read_csv('medals_by_country_2016.csv', index_col=0)
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
plt.show()
# Interlude: rotate the tick labels
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()
# Olympic medals: visualizing the other medals : stacked bar chart
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()
# Olympic medals: visualizing all three
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals["Bronze"],
bottom=medals["Gold"] + medals["Silver"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()
# Adding a legend
fig, ax = plt.subplots
ax.bar(medals.index, medals["Gold"], label="Gold")
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"],
label="Silver")
ax.bar(medals.index, medals["Bronze"],
bottom=medals["Gold"] + medals["Silver"],
label="Bronze")
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
ax.legend()
plt.show()
Quantitative comparisons: histograms
# Introducing histograms
fig, ax = plt.subplots()
ax.hist(mens_rowing["Height"])
ax.hist(mens_gymnastic["Height"])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
plt.show()
# Labels are needed
ax.hist(mens_rowing["Height"], label="Rowing")
ax.hist(mens_gymnastic["Height"], label="Gymnastics")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
# Customizing histograms: setting the number of bins
ax.hist(mens_rowing["Height"], label="Rowing", bins=5)
ax.hist(mens_gymnastic["Height"], label="Gymnastics", bins=5)
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
# Customizing histograms: setting bin boundaries
ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.hist(mens_gymnastic["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
# Customizing histograms: transparency
ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.hist(mens_gymnastic["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
Statistical plotting
# Adding error bars to bar charts
fig, ax = plt.subplots()
ax.bar("Rowing",mens_rowing["Height"].mean(),
yerr=mens_rowing["Height"].std())
ax.bar("Gymnastics",mens_gymnastics["Height"].mean(),
yerr=mens_gymnastics["Height"].std())
ax.set_ylabel("Height (cm)")
plt.show()
# Adding error bars to plots
fig, ax = plt.subplots()
ax.errorbar(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
yerr=seattle_weather["MLY-TAVG-STDDEV"])
ax.errorbar(austin_weather["MONTH"],
austin_weather["MLY-TAVG-NORMAL"],
yerr=austin_weather["MLY-TAVG-STDDEV"])
ax.set_ylabel("Temperature (Fahrenheit)")
plt.show()
# Adding boxplots
fig, ax = plt.subplots()
ax.boxplot([mens_rowing["Height"],
mens_gymnastics["Height"]])
ax.set_xticklabels(["Rowing", "Gymnastics"])
ax.set_ylabel("Height (cm)")
plt.show()
Quantitative comparisons: scatter plots
# Introducing scatter plots
fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"])
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()
# Customizing scatter plots
eighties = climate_change["1980-01-01":"1989-12-31"]
nineties = climate_change["1990-01-01":"1999-12-31"]
fig, ax = plt.subplots()
ax.scatter(eighties["co2"], eighties["relative_temp"],
color="red", label="eighties")
ax.scatter(nineties["co2"], nineties["relative_temp"],
color="blue", label="nineties")
ax.legend()
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()
# Encoding a third variable by color
fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"],
c=climate_change.index)
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()
Preparing your figures to share with others
# Choosing a style
plt.style.use("ggplot")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
# Back to the default
plt.style.use("default")
# The "bmh" style
plt.style.use("bmh")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
# Seaborn styles
plt.style.use("seaborn-colorblind")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()
Saving your visualizations
# Saving the figure to file
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
fig.savefig("gold_medals.png")
# Different file formats
fig.savefig("gold_medals.jpg")
fig.savefig("gold_medals.jpg", quality=50)
fig.savefig("gold_medals.svg")
# Resolution
fig.savefig("gold_medals.png", dpi=300)
# Size
fig.set_size_inches([5, 3])
# Another aspect ratio
fig.set_size_inches([3, 5])
Automating figures from data
# Getting unique values of a column
sports = summer_2016_medals["Sport"].unique()
# Bar-chart of heights for all sports
fig, ax = plt.subplots()
for sport in sports:
sport_df = summer_2016_medals[summer_2016_medals["Sport"] == spor
ax.bar(sport, sport_df["Height"].mean(),
yerr=sport_df["Height"].std())
ax.set_ylabel("Height (cm)")
ax.set_xticklabels(sports, rotation=90)
plt.show()