Introduction to Data Visualization with Matplotlib
matplotlib cheatsheet in pdf
Introduction to Matplotlib
Introduction to data visualization with Matplotlib
# Introducing the pyplot interface
import matplotlib.pyplot as plt
= plt.subplots()
fig, ax
plt.show()
# Adding data to axes
"MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(seattle_weather[ plt.show()
Customizing your plots
# Adding markers
"MONTH"],
ax.plot(seattle_weather["MLY-PRCP-NORMAL"],
seattle_weather[="o")
marker
plt.show()
# Choosing markers
"MONTH"],
ax.plot(seattle_weather["MLY-PRCP-NORMAL"],
seattle_weather[="v")
marker plt.show()
# Setting the linestyle
= plt.subplots()
fig, ax "MONTH"],
ax.plot(seattle_weather["MLY-TAVG-NORMAL"],
seattle_weather[="v", linestyle="--")
marker plt.show()
# Eliminating lines with linestyle
= plt.subplots()
fig, ax "MONTH"],
ax.plot(seattle_weather["MLY-TAVG-NORMAL"],
seattle_weather[="v", linestyle="None")
marker
plt.show()
# Choosing color
= plt.subplots()
fig, ax "MONTH"],
ax.plot(seattle_weather["MLY-TAVG-NORMAL"],
seattle_weather[="v", linestyle="--", color="r")
marker
plt.show()
# Customizing the axes labels
"Time (months)")
ax.set_xlabel(
plt.show()
# Setting the y axis label
"Time (months)")
ax.set_xlabel("Average temperature (Fahrenheit degrees)")
ax.set_ylabel(
plt.show()
# Adding a title
"Weather in Seattle")
ax.set_title( plt.show()
Small multiples
# Small multiples with plt.subplots
= plt.subplots(3, 2)
fig, ax
plt.show()
# Adding data to subplots
ax.shape3, 2)
(0, 0].plot(seattle_weather["MONTH"],seattle_weather["MLY-PRCP-NORMAL"],color='b')
ax[
plt.show()
# Subplots with data
= plt.subplots(2, 1)
fig, ax 0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"],color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-25PCTL"],linestyle='--', color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-75PCTL"],linestyle='--', color='b')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-NORMAL"],color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-25PCTL"],linestyle='--', color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-75PCTL"],linestyle='--', color='r')
ax[0].set_ylabel("Precipitation (inches)")
ax[1].set_ylabel("Precipitation (inches)")
ax[1].set_xlabel("Time (months)")
ax[
plt.show()
# Sharing the y-axis range
= plt.subplots(2, 1, sharey=True) fig, ax
Plotting time-series
Plotting time-series data
# DateTimeIndex
climate_change.index'1958-03-06', '1958-04-06', '1958-05-06', '1958-06-06',
DatetimeIndex([='datetime64[ns]', name='date', length=706, freq=None)
dtype
# Plotting time-series data
import matplotlib.pyplot as plt
= plt.subplots()
fig, ax 'co2'])
ax.plot(climate_change.index, climate_change['Time')
ax.set_xlabel('CO2 (ppm)')
ax.set_ylabel(
plt.show()
# Zooming in on a decade
= climate_change["1960-01-01":"1969-12-31"]
sixties = plt.subplots()
fig, ax 'co2'])
ax.plot(sixties.index, sixties['Time')
ax.set_xlabel('CO2 (ppm)')
ax.set_ylabel(
plt.show()
# Zooming in on one year
= climate_change["1969-01-01":"1969-12-31"]
sixty_nine = plt.subplots()
fig, ax 'co2'])
ax.plot(sixty_nine.index, sixty_nine['Time')
ax.set_xlabel('CO2 (ppm)')
ax.set_ylabel( plt.show()
Plotting time-series with different variables
# Plotting two time-series together
import matplotlib.pyplot as plt
= plt.subplots()
fig, ax "co2"])
ax.plot(climate_change.index, climate_change["relative_temp"])
ax.plot(climate_change.index, climate_change['Time')
ax.set_xlabel('CO2 (ppm) / Relative temperature')
ax.set_ylabel(
plt.show()
# Using twin axes
= plt.subplots()
fig, ax "co2"])
ax.plot(climate_change.index, climate_change['Time')
ax.set_xlabel('CO2 (ppm)')
ax.set_ylabel(= ax.twinx()
ax2 "relative_temp"])
ax2.plot(climate_change.index, climate_change['Relative temperature (Celsius)')
ax2.set_ylabel(
plt.show()
# Separating variables by color
= plt.subplots()
fig, ax "co2"], color='blue')
ax.plot(climate_change.index, climate_change['Time')
ax.set_xlabel('CO2 (ppm)', color='blue')
ax.set_ylabel(= ax.twinx()
ax2 "relative_temp"],
ax2.plot(climate_change.index, climate_change[='red')
color'Relative temperature (Celsius)', color='red')
ax2.set_ylabel(
plt.show()
# Coloring the ticks
= plt.subplots()
fig, ax "co2"],
ax.plot(climate_change.index, climate_change[='blue')
color'Time')
ax.set_xlabel('CO2 (ppm)', color='blue')
ax.set_ylabel('y', colors='blue')
ax.tick_params(= ax.twinx()
ax2
ax2.plot(climate_change.index,"relative_temp"],
climate_change[='red')
color'Relative temperature (Celsius)',
ax2.set_ylabel(='red')
color'y', colors='red')
ax2.tick_params(
plt.show()
# A function that plots time-series
def plot_timeseries(axes, x, y, color, xlabel, ylabel):
=color)
axes.plot(x, y, color
axes.set_xlabel(xlabel)=color)
axes.set_ylabel(ylabel, color'y', colors=color)
axes.tick_params(# Using our function
= plt.subplots()
fig, ax 'co2'],'blue', 'Time', 'CO2 (ppm)')
plot_timeseries(ax, climate_change.index, climate_change[= ax.twinx()
ax2 'relative_temp'],'red', 'Time', 'Relative temperature (Celsius)')
plot_timeseries(ax, climate_change.index,climate_change[ plt.show()
Annotating time-series data
# Annotation
= plt.subplots()
fig, ax 'co2'],
plot_timeseries(ax, climate_change.index, climate_change['blue', 'Time', 'CO2 (ppm)')
= ax.twinx()
ax2
plot_timeseries(ax2, climate_change.index,'relative_temp'],
climate_change['red', 'Time', 'Relative temperature (Celsius)')
">1 degree",
ax2.annotate(=[pd.TimeStamp("2015-10-06"), 1])
xy
plt.show()
# Positioning the text
">1 degree",
ax2.annotate(=(pd.Timestamp('2015-10-06'), 1),
xy=(pd.Timestamp('2008-10-06'), -0.2))
xytext
# Adding arrows to annotation
">1 degree",
ax2.annotate(=(pd.Timestamp('2015-10-06'), 1),
xy=(pd.Timestamp('2008-10-06'), -0.2),
xytext={})
arrowprops
# Customizing arrow properties
">1 degree",
ax2.annotate(=(pd.Timestamp('2015-10-06'), 1),
xy=(pd.Timestamp('2008-10-06'), -0.2),
xytext={"arrowstyle":"->", "color":"gray"}) arrowprops
Quantitative comparisons and statistical visualizations
Quantitative comparisons: bar-charts
# Olympic medals: visualizing the data
= pd.read_csv('medals_by_country_2016.csv', index_col=0)
medals = plt.subplots()
fig, ax "Gold"])
ax.bar(medals.index, medals[
plt.show()
# Interlude: rotate the tick labels
= plt.subplots()
fig, ax "Gold"])
ax.bar(medals.index, medals[=90)
ax.set_xticklabels(medals.index, rotation"Number of medals")
ax.set_ylabel(
plt.show()
# Olympic medals: visualizing the other medals : stacked bar chart
= plt.subplots
fig, ax "Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals[=90)
ax.set_xticklabels(medals.index, rotation"Number of medals")
ax.set_ylabel(
plt.show()
# Olympic medals: visualizing all three
= plt.subplots
fig, ax "Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals["Bronze"],
ax.bar(medals.index, medals[=medals["Gold"] + medals["Silver"])
bottom=90)
ax.set_xticklabels(medals.index, rotation"Number of medals")
ax.set_ylabel(
plt.show()
# Adding a legend
= plt.subplots
fig, ax "Gold"], label="Gold")
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"],
ax.bar(medals.index, medals[="Silver")
label"Bronze"],
ax.bar(medals.index, medals[=medals["Gold"] + medals["Silver"],
bottom="Bronze")
label=90)
ax.set_xticklabels(medals.index, rotation"Number of medals")
ax.set_ylabel(
ax.legend() plt.show()
Quantitative comparisons: histograms
# Introducing histograms
= plt.subplots()
fig, ax "Height"])
ax.hist(mens_rowing["Height"])
ax.hist(mens_gymnastic["Height (cm)")
ax.set_xlabel("# of observations")
ax.set_ylabel(
plt.show()
# Labels are needed
"Height"], label="Rowing")
ax.hist(mens_rowing["Height"], label="Gymnastics")
ax.hist(mens_gymnastic["Height (cm)")
ax.set_xlabel("# of observations")
ax.set_ylabel(
ax.legend()
plt.show()
# Customizing histograms: setting the number of bins
"Height"], label="Rowing", bins=5)
ax.hist(mens_rowing["Height"], label="Gymnastics", bins=5)
ax.hist(mens_gymnastic["Height (cm)")
ax.set_xlabel("# of observations")
ax.set_ylabel(
ax.legend()
plt.show()
# Customizing histograms: setting bin boundaries
"Height"], label="Rowing",
ax.hist(mens_rowing[=[150, 160, 170, 180, 190, 200, 210])
bins"Height"], label="Gymnastics",
ax.hist(mens_gymnastic[=[150, 160, 170, 180, 190, 200, 210])
bins"Height (cm)")
ax.set_xlabel("# of observations")
ax.set_ylabel(
ax.legend()
plt.show()
# Customizing histograms: transparency
"Height"], label="Rowing",
ax.hist(mens_rowing[=[150, 160, 170, 180, 190, 200, 210],
bins="step")
histtype"Height"], label="Gymnastics",
ax.hist(mens_gymnastic[=[150, 160, 170, 180, 190, 200, 210],
bins="step")
histtype"Height (cm)")
ax.set_xlabel("# of observations")
ax.set_ylabel(
ax.legend() plt.show()
Statistical plotting
# Adding error bars to bar charts
= plt.subplots()
fig, ax "Rowing",mens_rowing["Height"].mean(),
ax.bar(=mens_rowing["Height"].std())
yerr"Gymnastics",mens_gymnastics["Height"].mean(),
ax.bar(=mens_gymnastics["Height"].std())
yerr"Height (cm)")
ax.set_ylabel(
plt.show()
# Adding error bars to plots
= plt.subplots()
fig, ax "MONTH"],
ax.errorbar(seattle_weather["MLY-TAVG-NORMAL"],
seattle_weather[=seattle_weather["MLY-TAVG-STDDEV"])
yerr
"MONTH"],
ax.errorbar(austin_weather["MLY-TAVG-NORMAL"],
austin_weather[=austin_weather["MLY-TAVG-STDDEV"])
yerr
"Temperature (Fahrenheit)")
ax.set_ylabel(
plt.show()
# Adding boxplots
= plt.subplots()
fig, ax "Height"],
ax.boxplot([mens_rowing["Height"]])
mens_gymnastics["Rowing", "Gymnastics"])
ax.set_xticklabels(["Height (cm)")
ax.set_ylabel( plt.show()
Quantitative comparisons: scatter plots
# Introducing scatter plots
= plt.subplots()
fig, ax "co2"], climate_change["relative_temp"])
ax.scatter(climate_change["CO2 (ppm)")
ax.set_xlabel("Relative temperature (Celsius)")
ax.set_ylabel(
plt.show()
# Customizing scatter plots
= climate_change["1980-01-01":"1989-12-31"]
eighties = climate_change["1990-01-01":"1999-12-31"]
nineties = plt.subplots()
fig, ax "co2"], eighties["relative_temp"],
ax.scatter(eighties[="red", label="eighties")
color"co2"], nineties["relative_temp"],
ax.scatter(nineties[="blue", label="nineties")
color
ax.legend()"CO2 (ppm)")
ax.set_xlabel("Relative temperature (Celsius)")
ax.set_ylabel(
plt.show()
# Encoding a third variable by color
= plt.subplots()
fig, ax "co2"], climate_change["relative_temp"],
ax.scatter(climate_change[=climate_change.index)
c"CO2 (ppm)")
ax.set_xlabel("Relative temperature (Celsius)")
ax.set_ylabel( plt.show()