Source code for holoviews.interface.pandas

"""
The interface subpackage provides View and Plot types to wrap external
objects with. Currently only a Pandas compatibility wrapper is
provided, which allows integrating Pandas DataFrames within the
HoloViews compositioning and animation framework. Additionally, it
provides methods to apply operations to the underlying data and
convert it to standard HoloViews View types.
"""

from __future__ import absolute_import

import numpy as np

try:
    import pandas as pd
    from ..core.data import PandasInterface
except:
    pd = None
    PandasInterface = None

import param

from ..core import ViewableElement, NdMapping, Dataset, NdOverlay,\
    NdLayout, GridSpace, HoloMap
from ..element import (Chart, Table, Curve, Scatter, Bars, Points,
                       VectorField, HeatMap, Scatter3D, Surface)


[docs]class DataFrameView(Dataset): """ DataFrameView provides a convenient compatibility wrapper around Pandas DataFrames. It provides several core functions: * Allows integrating several Pandas plot types with the HoloViews plotting system (includes plot, boxplot, histogram and scatter_matrix). * Provides several convenient wrapper methods to apply DataFrame methods and slice data. This includes: 1) The apply method, which takes the DataFrame method to be applied as the first argument and passes any supplied args or kwargs along. 2) The select and __getitem__ method which allow for selecting and slicing the data using NdMapping. """ plot_type = param.ObjectSelector(default=None, objects=['plot', 'boxplot', 'hist', 'scatter_matrix', 'autocorrelation_plot', None], doc="""Selects which Pandas plot type to use, when visualizing the ViewableElement.""") x = param.String(doc="""Dimension to visualize along the x-axis.""") x2 = param.String(doc="""Dimension to visualize along a second dependent axis.""") y = param.String(doc="""Dimension to visualize along the y-axis.""") group = param.String(default='DFrame', constant=True) vdims = param.List(doc="DataFrameView has no value dimension.") def __init__(self, data, dimensions={}, kdims=None, clone_override=False, index=None, columns=None, dtype=None, copy=True, **params): if pd is None: raise Exception("Pandas is required for the Pandas interface.") if not isinstance(data, pd.DataFrame): data = pd.DataFrame(data, index=index, columns=columns, dtype=dtype) elif copy: data = pd.DataFrame(data, copy=True) if clone_override: dim_dict = {d.name: d for d in kdims} dims = [dim_dict.get(k, k) for k in data.columns] elif kdims: if len(kdims) != len(data.columns): raise ValueError("Supplied key dimensions do not match data columns") dims = kdims else: dims = list(data.columns) for name, dim in dimensions.items(): if name in data.columns: dims[list(data.columns).index(name)] = dim ViewableElement.__init__(self, data, kdims=dims, **params) self.interface = PandasInterface self.data.columns = self.dimensions('key', True) def groupby(self, dimensions, container_type=NdMapping): invalid_dims = [d for d in dimensions if d not in self.dimensions()] if invalid_dims: raise Exception('Following dimensions could not be found %s.' % invalid_dims) index_dims = [self.get_dimension(d) for d in dimensions] mapping_data = [] for k, v in self.data.groupby([self.get_dimension(d).name for d in dimensions]): data = v.drop(dimensions, axis=1) mapping_data.append((k, self.clone(data, kdims=[self.get_dimension(d) for d in data.columns]))) return container_type(mapping_data, kdims=index_dims)
[docs] def apply(self, name, *args, **kwargs): """ Applies the Pandas dframe method corresponding to the supplied name with the supplied args and kwargs. """ return self.clone(getattr(self.data, name)(*args, **kwargs), clone_override=True)
def overlay(self, dimensions): return self.groupby(dimensions, NdOverlay) def layout(self, dimensions=[], cols=4): return self.groupby(dimensions, NdLayout).cols(4)
[docs] def grid(self, dimensions): """ Splits the supplied the dimensions out into a GridSpace. """ if len(dimensions) > 2: raise Exception('Grids hold a maximum of two dimensions.') return self.groupby(dimensions, GridSpace)
[docs] def holomap(self, kdims=[]): """ Splits the supplied dimensions out into a HoloMap. """ return self.groupby(kdims, HoloMap)
def is_type(df, baseType): test = [issubclass(np.dtype(d).type, baseType) for d in df.dtypes] return pd.DataFrame(data=test, index=df.columns, columns=["numeric"]) def is_number(df): try: return is_type(df, np.number) except: return False
[docs]class DFrame(DataFrameView): """ DFrame is a specialized Dataset type useful as an interface for pandas plots. """