Bachelors degrees by gender

URL: http://matplotlib.org/examples/showcase/bachelors_degrees_by_gender.html

Most examples work across multiple plotting backends equivalent, this example is also available for:

In [1]:
import numpy as np
import holoviews as hv
hv.extension('matplotlib')
%output fig='svg'

Define data

In [2]:
import pandas as pd
from matplotlib.mlab import csv2rec
from matplotlib.cbook import get_sample_data

fname = get_sample_data('percent_bachelors_degrees_women_usa.csv')
gender_degree_data = csv2rec(fname)

title = ('Percentage of Bachelor\'s degrees conferred to women in '
        'the U.S.A. by major (1970-2011)\n')

# These are the colors that will be used in the plot
color_sequence = ['#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                  '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                  '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                  '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5']

# Offsets for degree labels
y_offsets = {'Foreign Languages': 0.5, 'English': -0.5,
             'Communications and Journalism': 0.75,
             'Art and Performance': -0.25, 'Agriculture': 1.25,
             'Social Sciences and History': 0.25, 'Business': -0.75,
             'Math and Statistics': 0.75, 'Architecture': -0.75,
             'Computer Science': 0.75, 'Engineering': -0.25}

# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars='year', var_name='Degree', value_name='conferred')
df['Degree'] = [d.replace('_', ' ').title() for d in df.Degree]

# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
    try:
        return '{:0.0f}%'.format(x)
    except:
        return '%d%' % x

# Define the value dimensions
value_dim = hv.Dimension('conferred', value_format=percent_format, range=(0, 90))

# Define the dataset
ds = hv.Dataset(df, vdims=[value_dim])
curves = ds.to(hv.Curve, 'year', groupby='Degree').overlay()

# Define a function to get the text annotations
max_year = ds['year'].max()
def get_labels():
     return hv.NdOverlay({deg: hv.Text(max_year, ds[max_year, deg]+y_offsets.get(deg, 0),
                                        deg, halign='left', fontsize=10)(style=dict(color=col))
                            for deg, col in zip(df.Degree.unique(), color_sequence)})

Display in matplotlib

In [3]:
# Define a callback to define a custom grid along the y-axis and disabling the (ugly) axis spines
def cb(plot, element):
    ax = plot.handles['axis']
    ax.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)

# Define various options to adjust the plot
options = hv.Store.options(backend='matplotlib')
options.Curve = hv.Options('plot', show_frame=False, bgcolor='white', labelled=[], show_grid=False,
                           aspect=0.7, show_legend=False, xticks=5, final_hooks=[cb], fig_size=350)
options.Curve = hv.Options('style', color=hv.Cycle(values=color_sequence), linewidth=2)

(curves * get_labels()).relabel(title)
Out[3]:

Download this notebook from GitHub (right-click to download).