Statistical Visualizations¶

ggplotly includes statistical transformations for smoothing, density estimation, and summary statistics.

Smoothed Lines¶

LOESS Smoothing¶

Local regression smoothing (default):

In [1]:

Copied!





import pandas as pd
import numpy as np
from ggplotly import *

np.random.seed(42)
df = pd.DataFrame({
    'x': np.linspace(0, 10, 100),
    'y': np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.3, 100)
})

(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', color='blue'))
import pandas as pd
import numpy as np
from ggplotly import *

np.random.seed(42)
df = pd.DataFrame({
    'x': np.linspace(0, 10, 100),
    'y': np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.3, 100)
})

(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', color='blue'))

Out[1]:

Linear Regression¶

In [2]:

Copied!

(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='lm', color='red'))
(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='lm', color='red'))

Out[2]:

Confidence Intervals¶

In [3]:

Copied!





(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', se=True, color='green')
 + labs(title='LOESS with Confidence Interval'))
(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', se=True, color='green')
 + labs(title='LOESS with Confidence Interval'))

Out[3]:

Smooth by Group¶

In [4]:

Copied!





df = pd.DataFrame({
    'x': np.tile(np.linspace(0, 10, 50), 2),
    'y': np.concatenate([
        np.sin(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50),
        np.cos(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50)
    ]),
    'group': ['A'] * 50 + ['B'] * 50
})

(ggplot(df, aes(x='x', y='y', color='group'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', se=True))
df = pd.DataFrame({
    'x': np.tile(np.linspace(0, 10, 50), 2),
    'y': np.concatenate([
        np.sin(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50),
        np.cos(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50)
    ]),
    'group': ['A'] * 50 + ['B'] * 50
})

(ggplot(df, aes(x='x', y='y', color='group'))
 + geom_point(alpha=0.5)
 + geom_smooth(method='loess', se=True))

Out[4]:

Smooth Parameters¶

Parameter	Default	Description
`method`	'loess'	'loess', 'lm', 'lowess'
`se`	True	Show confidence interval
`span`	0.75	Smoothing span for LOESS (0-1)
`level`	0.95	Confidence level

Density Plots¶

Basic Density¶

In [5]:

Copied!

df = pd.DataFrame({'x': np.random.randn(500)})
(ggplot(df, aes(x='x')) + geom_density(fill='lightblue', alpha=0.5))
df = pd.DataFrame({'x': np.random.randn(500)})
(ggplot(df, aes(x='x')) + geom_density(fill='lightblue', alpha=0.5))

Out[5]:

Overlapping Densities¶

In [6]:

Copied!





df = pd.DataFrame({
    'x': np.concatenate([np.random.normal(0, 1, 500), np.random.normal(2, 1.5, 500)]),
    'group': ['A'] * 500 + ['B'] * 500
})

(ggplot(df, aes(x='x', fill='group')) + geom_density(alpha=0.5))
df = pd.DataFrame({
    'x': np.concatenate([np.random.normal(0, 1, 500), np.random.normal(2, 1.5, 500)]),
    'group': ['A'] * 500 + ['B'] * 500
})

(ggplot(df, aes(x='x', fill='group')) + geom_density(alpha=0.5))

Out[6]:

Density with Histogram¶

In [7]:

Copied!

df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgray', color='white')
 + geom_density(color='red', size=2))
df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgray', color='white')
 + geom_density(color='red', size=2))

Out[7]:

Empirical CDF¶

Cumulative distribution function:

In [8]:

Copied!

df = pd.DataFrame({'x': np.random.randn(200)})
(ggplot(df, aes(x='x')) + geom_step(stat='ecdf') + labs(title='Empirical CDF'))
df = pd.DataFrame({'x': np.random.randn(200)})
(ggplot(df, aes(x='x')) + geom_step(stat='ecdf') + labs(title='Empirical CDF'))

Out[8]:

Compare Distributions¶

In [9]:

Copied!





df = pd.DataFrame({
    'x': np.concatenate([np.random.normal(0, 1, 200), np.random.normal(1, 0.5, 200)]),
    'group': ['A'] * 200 + ['B'] * 200
})

(ggplot(df, aes(x='x', color='group'))
 + geom_step(stat='ecdf')
 + labs(title='Comparing CDFs'))
df = pd.DataFrame({
    'x': np.concatenate([np.random.normal(0, 1, 200), np.random.normal(1, 0.5, 200)]),
    'group': ['A'] * 200 + ['B'] * 200
})

(ggplot(df, aes(x='x', color='group'))
 + geom_step(stat='ecdf')
 + labs(title='Comparing CDFs'))

Out[9]:

Summary Statistics¶

Mean Points¶

In [10]:

Copied!





df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 30),
    'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})

(ggplot(df, aes(x='category', y='value'))
 + geom_point(alpha=0.3)
 + stat_summary(fun='mean', geom='point', color='red', size=15))
df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 30),
    'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})

(ggplot(df, aes(x='category', y='value'))
 + geom_point(alpha=0.3)
 + stat_summary(fun='mean', geom='point', color='red', size=15))

Out[10]:

Mean with Error Bars¶

In [11]:

Copied!





(ggplot(df, aes(x='category', y='value'))
 + geom_jitter(width=0.2, alpha=0.3)
 + stat_summary(fun='mean', geom='point', color='red', size=10)
 + stat_summary(fun='mean', fun_min=lambda x: x.mean() - x.std(),
                fun_max=lambda x: x.mean() + x.std(), geom='errorbar', color='red'))
(ggplot(df, aes(x='category', y='value'))
 + geom_jitter(width=0.2, alpha=0.3)
 + stat_summary(fun='mean', geom='point', color='red', size=10)
 + stat_summary(fun='mean', fun_min=lambda x: x.mean() - x.std(),
                fun_max=lambda x: x.mean() + x.std(), geom='errorbar', color='red'))

Out[11]:

Contour Plots¶

Contour Lines¶

In [12]:

Copied!





# Create 2D density data
x = np.linspace(-3, 3, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = np.exp(-(X**2 + Y**2))

df = pd.DataFrame({
    'x': X.flatten(),
    'y': Y.flatten(),
    'z': Z.flatten()
})

(ggplot(df, aes(x='x', y='y', z='z')) + geom_contour())
# Create 2D density data
x = np.linspace(-3, 3, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = np.exp(-(X**2 + Y**2))

df = pd.DataFrame({
    'x': X.flatten(),
    'y': Y.flatten(),
    'z': Z.flatten()
})

(ggplot(df, aes(x='x', y='y', z='z')) + geom_contour())

Out[12]:

Filled Contours¶

In [13]:

Copied!

(ggplot(df, aes(x='x', y='y', z='z'))
 + geom_contour_filled()
 + labs(title='Filled Contour Plot'))
(ggplot(df, aes(x='x', y='y', z='z'))
 + geom_contour_filled()
 + labs(title='Filled Contour Plot'))

Out[13]:

Contour with Points¶

In [14]:

Copied!





# Sample points
points = pd.DataFrame({
    'x': np.random.uniform(-2, 2, 50),
    'y': np.random.uniform(-2, 2, 50)
})

(ggplot(df, aes(x='x', y='y', z='z'))
 + geom_contour_filled(alpha=0.7)
 + geom_point(data=points, color='white', size=5))
# Sample points
points = pd.DataFrame({
    'x': np.random.uniform(-2, 2, 50),
    'y': np.random.uniform(-2, 2, 50)
})

(ggplot(df, aes(x='x', y='y', z='z'))
 + geom_contour_filled(alpha=0.7)
 + geom_point(data=points, color='white', size=5))

Out[14]:

Error Bars¶

Basic Error Bars¶

In [15]:

Copied!





df_err = pd.DataFrame({
    'x': ['A', 'B', 'C', 'D'],
    'y': [10, 15, 12, 18],
    'ymin': [8, 13, 10, 15],
    'ymax': [12, 17, 14, 21]
})

(ggplot(df_err, aes(x='x', y='y', ymin='ymin', ymax='ymax'))
 + geom_col(fill='steelblue', alpha=0.7)
 + geom_errorbar(width=0.2))
df_err = pd.DataFrame({
    'x': ['A', 'B', 'C', 'D'],
    'y': [10, 15, 12, 18],
    'ymin': [8, 13, 10, 15],
    'ymax': [12, 17, 14, 21]
})

(ggplot(df_err, aes(x='x', y='y', ymin='ymin', ymax='ymax'))
 + geom_col(fill='steelblue', alpha=0.7)
 + geom_errorbar(width=0.2))

Out[15]:

Error Bars from Standard Error¶

In [16]:

Copied!





# Recreate the category data
df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 30),
    'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})

# Compute statistics
summary = df.groupby('category')['value'].agg(['mean', 'std', 'count']).reset_index()
summary['se'] = summary['std'] / np.sqrt(summary['count'])
summary['ymin'] = summary['mean'] - summary['se']
summary['ymax'] = summary['mean'] + summary['se']

(ggplot(summary, aes(x='category', y='mean', ymin='ymin', ymax='ymax'))
 + geom_col(fill='steelblue', alpha=0.7)
 + geom_errorbar(width=0.2, color='black'))
# Recreate the category data
df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 30),
    'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})

# Compute statistics
summary = df.groupby('category')['value'].agg(['mean', 'std', 'count']).reset_index()
summary['se'] = summary['std'] / np.sqrt(summary['count'])
summary['ymin'] = summary['mean'] - summary['se']
summary['ymax'] = summary['mean'] + summary['se']

(ggplot(summary, aes(x='category', y='mean', ymin='ymin', ymax='ymax'))
 + geom_col(fill='steelblue', alpha=0.7)
 + geom_errorbar(width=0.2, color='black'))

Out[16]:

Violin with Box Plot¶

Combine violin and box for distribution overview:

In [17]:

Copied!





df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 100),
    'value': np.random.randn(300) * np.tile([1, 2, 1.5], 100) + np.tile([0, 2, 1], 100)
})

(ggplot(df, aes(x='category', y='value', fill='category'))
 + geom_violin(alpha=0.5)
 + geom_boxplot(width=0.1, fill='white'))
df = pd.DataFrame({
    'category': np.repeat(['A', 'B', 'C'], 100),
    'value': np.random.randn(300) * np.tile([1, 2, 1.5], 100) + np.tile([0, 2, 1], 100)
})

(ggplot(df, aes(x='category', y='value', fill='category'))
 + geom_violin(alpha=0.5)
 + geom_boxplot(width=0.1, fill='white'))

Out[17]:

Scatter with Marginal Distributions¶

In [18]:

Copied!





df = pd.DataFrame({
    'x': np.random.randn(200),
    'y': np.random.randn(200)
})

# Main scatter with rug
(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_rug(sides='bl', alpha=0.3))
df = pd.DataFrame({
    'x': np.random.randn(200),
    'y': np.random.randn(200)
})

# Main scatter with rug
(ggplot(df, aes(x='x', y='y'))
 + geom_point(alpha=0.5)
 + geom_rug(sides='bl', alpha=0.3))

Out[18]:

Quantile-Quantile Plots¶

Compare distribution to theoretical:

In [19]:

Copied!





from scipy import stats

# Generate data
np.random.seed(42)
df = pd.DataFrame({'values': np.random.randn(100)})

# Basic Q-Q plot using geom_qq and geom_qq_line
(ggplot(df, aes(sample='values'))
 + geom_qq()
 + geom_qq_line()
 + labs(title='Q-Q Plot', x='Theoretical Quantiles', y='Sample Quantiles'))
from scipy import stats

# Generate data
np.random.seed(42)
df = pd.DataFrame({'values': np.random.randn(100)})

# Basic Q-Q plot using geom_qq and geom_qq_line
(ggplot(df, aes(sample='values'))
 + geom_qq()
 + geom_qq_line()
 + labs(title='Q-Q Plot', x='Theoretical Quantiles', y='Sample Quantiles'))

Out[19]:

Q-Q Plot Against t-Distribution¶

Compare heavy-tailed data to a t-distribution:

In [20]:

Copied!





# Generate t-distributed data
np.random.seed(42)
df_t = pd.DataFrame({'values': stats.t.rvs(df=5, size=100)})

# Q-Q plot against t-distribution with df=5
(ggplot(df_t, aes(sample='values'))
 + geom_qq(distribution=stats.t, dparams={'df': 5})
 + geom_qq_line(distribution=stats.t, dparams={'df': 5})
 + labs(title='Q-Q Plot Against t(5) Distribution'))
# Generate t-distributed data
np.random.seed(42)
df_t = pd.DataFrame({'values': stats.t.rvs(df=5, size=100)})

# Q-Q plot against t-distribution with df=5
(ggplot(df_t, aes(sample='values'))
 + geom_qq(distribution=stats.t, dparams={'df': 5})
 + geom_qq_line(distribution=stats.t, dparams={'df': 5})
 + labs(title='Q-Q Plot Against t(5) Distribution'))

Out[20]:

2D Density / Hexbin¶

For large scatter plots, show density:

In [21]:

Copied!





# Large dataset
df = pd.DataFrame({
    'x': np.random.randn(10000),
    'y': np.random.randn(10000)
})

# 2D density using contour
(ggplot(df, aes(x='x', y='y'))
 + geom_contour_filled()
 + labs(title='2D Density'))
# Large dataset
df = pd.DataFrame({
    'x': np.random.randn(10000),
    'y': np.random.randn(10000)
})

# 2D density using contour
(ggplot(df, aes(x='x', y='y'))
 + geom_contour_filled()
 + labs(title='2D Density'))

Out[21]:

Regression Diagnostics¶

In [22]:

Copied!





# Fit a model and plot residuals
from sklearn.linear_model import LinearRegression

df = pd.DataFrame({
    'x': np.linspace(0, 10, 100),
    'y': 2 * np.linspace(0, 10, 100) + np.random.normal(0, 2, 100)
})

model = LinearRegression()
model.fit(df[['x']], df['y'])
df['predicted'] = model.predict(df[['x']])
df['residual'] = df['y'] - df['predicted']

# Residuals vs Fitted
(ggplot(df, aes(x='predicted', y='residual'))
 + geom_point(alpha=0.5)
 + geom_hline(data=0, color='red', linetype='dash')
 + labs(title='Residuals vs Fitted', x='Fitted Values', y='Residuals'))
# Fit a model and plot residuals
from sklearn.linear_model import LinearRegression

df = pd.DataFrame({
    'x': np.linspace(0, 10, 100),
    'y': 2 * np.linspace(0, 10, 100) + np.random.normal(0, 2, 100)
})

model = LinearRegression()
model.fit(df[['x']], df['y'])
df['predicted'] = model.predict(df[['x']])
df['residual'] = df['y'] - df['predicted']

# Residuals vs Fitted
(ggplot(df, aes(x='predicted', y='residual'))
 + geom_point(alpha=0.5)
 + geom_hline(data=0, color='red', linetype='dash')
 + labs(title='Residuals vs Fitted', x='Fitted Values', y='Residuals'))

Out[22]:

Waterfall Charts¶

Waterfall charts show how an initial value is affected by intermediate positive or negative values.

Basic Waterfall¶

In [23]:

Copied!





# Basic waterfall showing quarterly changes
waterfall_df = pd.DataFrame({
    'category': ['Q1 Sales', 'Q2 Growth', 'Q3 Decline', 'Q4 Recovery', 'Year Total'],
    'value': [100, 50, -30, 20, 0],
    'measure': ['absolute', 'relative', 'relative', 'relative', 'total']
})

(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
 + geom_waterfall()
 + labs(title='Quarterly Sales Waterfall'))
# Basic waterfall showing quarterly changes
waterfall_df = pd.DataFrame({
    'category': ['Q1 Sales', 'Q2 Growth', 'Q3 Decline', 'Q4 Recovery', 'Year Total'],
    'value': [100, 50, -30, 20, 0],
    'measure': ['absolute', 'relative', 'relative', 'relative', 'total']
})

(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
 + geom_waterfall()
 + labs(title='Quarterly Sales Waterfall'))

Out[23]:

Financial Statement Waterfall¶

Common use case for income statement analysis:

In [24]:

Copied!





# Income statement waterfall
income_df = pd.DataFrame({
    'item': ['Revenue', 'COGS', 'Gross Profit', 'Operating Expenses', 'Operating Income', 'Taxes', 'Net Income'],
    'amount': [1000, -400, 0, -300, 0, -75, 0],
    'type': ['absolute', 'relative', 'total', 'relative', 'total', 'relative', 'total']
})

(ggplot(income_df, aes(x='item', y='amount', measure='type'))
 + geom_waterfall()
 + labs(title='Income Statement Breakdown', y='Amount ($K)'))
# Income statement waterfall
income_df = pd.DataFrame({
    'item': ['Revenue', 'COGS', 'Gross Profit', 'Operating Expenses', 'Operating Income', 'Taxes', 'Net Income'],
    'amount': [1000, -400, 0, -300, 0, -75, 0],
    'type': ['absolute', 'relative', 'total', 'relative', 'total', 'relative', 'total']
})

(ggplot(income_df, aes(x='item', y='amount', measure='type'))
 + geom_waterfall()
 + labs(title='Income Statement Breakdown', y='Amount ($K)'))

Out[24]:

Custom Colors¶

In [25]:

Copied!





# Custom colors for different bar types
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
 + geom_waterfall(
     increasing_color='#17becf',  # Cyan for increases
     decreasing_color='#ff7f0e',  # Orange for decreases
     total_color='#1f77b4'        # Blue for totals
 )
 + labs(title='Waterfall with Custom Colors'))
# Custom colors for different bar types
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
 + geom_waterfall(
     increasing_color='#17becf',  # Cyan for increases
     decreasing_color='#ff7f0e',  # Orange for decreases
     total_color='#1f77b4'        # Blue for totals
 )
 + labs(title='Waterfall with Custom Colors'))

Out[25]:

Waterfall Parameters¶

Parameter	Default	Description
`increasing_color`	'#2ca02c'	Color for positive changes (green)
`decreasing_color`	'#d62728'	Color for negative changes (red)
`total_color`	'#1f77b4'	Color for totals (blue)
`connector_visible`	True	Show connector lines
`connector_color`	gray	Color of connectors
`text_position`	'outside'	Position of value labels
`orientation`	'v'	'v' (vertical) or 'h' (horizontal)

The measure aesthetic controls bar types:

'absolute': Starting value (resets running total)
'relative': Change from previous value (default)
'total': Shows cumulative total at this point

Theoretical Distribution Overlays¶

Normal Distribution Overlay¶

Compare histogram to fitted normal distribution using geom_norm():

In [26]:

Copied!





# Auto-fit normal distribution to data
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightblue', color='white')
 + geom_norm(color='red', size=2)
 + labs(title='Histogram with Fitted Normal'))
# Auto-fit normal distribution to data
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightblue', color='white')
 + geom_norm(color='red', size=2)
 + labs(title='Histogram with Fitted Normal'))

Out[26]:

Scale to Match Histogram Counts¶

Use scale='count' to automatically scale the normal curve to match histogram counts (no need for density scaling on histogram):

In [27]:

Copied!





# With count histogram - no density scaling needed
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(bins=30, fill='lightblue', color='white')
 + geom_norm(scale='count', color='red', size=2)
 + labs(title='Normal Curve Scaled to Histogram Counts'))
# With count histogram - no density scaling needed
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})

(ggplot(df, aes(x='x'))
 + geom_histogram(bins=30, fill='lightblue', color='white')
 + geom_norm(scale='count', color='red', size=2)
 + labs(title='Normal Curve Scaled to Histogram Counts'))

Out[27]:

Explicit Normal Parameters¶

Overlay a standard normal (mean=0, sd=1) regardless of data:

In [28]:

Copied!





# Compare data to standard normal
df = pd.DataFrame({'x': np.random.randn(500) * 1.5 + 0.5})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=25, fill='lightgray', color='white')
 + geom_norm(mean=0, sd=1, color='blue', linetype='dashed', size=2)
 + geom_norm(color='red', size=2)
 + labs(title='Data vs Standard Normal', 
        subtitle='Red=fitted, Blue=N(0,1)'))
# Compare data to standard normal
df = pd.DataFrame({'x': np.random.randn(500) * 1.5 + 0.5})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=25, fill='lightgray', color='white')
 + geom_norm(mean=0, sd=1, color='blue', linetype='dashed', size=2)
 + geom_norm(color='red', size=2)
 + labs(title='Data vs Standard Normal', 
        subtitle='Red=fitted, Blue=N(0,1)'))

Out[28]:

Custom Distribution with stat_function¶

Use stat_function() to overlay any distribution from scipy.stats:

In [29]:

Copied!





from scipy import stats

# Exponential distribution
df = pd.DataFrame({'x': np.random.exponential(scale=2, size=500)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgreen', color='white')
 + stat_function(fun=lambda x: stats.expon.pdf(x, scale=2), color='darkgreen', size=2)
 + labs(title='Exponential Distribution Overlay'))
from scipy import stats

# Exponential distribution
df = pd.DataFrame({'x': np.random.exponential(scale=2, size=500)})

(ggplot(df, aes(x='x'))
 + geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgreen', color='white')
 + stat_function(fun=lambda x: stats.expon.pdf(x, scale=2), color='darkgreen', size=2)
 + labs(title='Exponential Distribution Overlay'))

Out[29]:

Multiple Distributions¶

In [30]:

Copied!





# Compare Student's t with different degrees of freedom
# No data needed - just provide xlim to define the x range
# stat_function uses geom_line by default
(ggplot()
 + stat_function(fun=lambda x: stats.norm.pdf(x), color='black', size=2, xlim=(-4, 4), name='Normal')
 + stat_function(fun=lambda x: stats.t.pdf(x, df=3), color='red', size=2, xlim=(-4, 4), name='t(3)')
 + stat_function(fun=lambda x: stats.t.pdf(x, df=10), color='blue', size=2, xlim=(-4, 4), name='t(10)')
 + labs(title='Normal vs t-distributions'))
# Compare Student's t with different degrees of freedom
# No data needed - just provide xlim to define the x range
# stat_function uses geom_line by default
(ggplot()
 + stat_function(fun=lambda x: stats.norm.pdf(x), color='black', size=2, xlim=(-4, 4), name='Normal')
 + stat_function(fun=lambda x: stats.t.pdf(x, df=3), color='red', size=2, xlim=(-4, 4), name='t(3)')
 + stat_function(fun=lambda x: stats.t.pdf(x, df=10), color='blue', size=2, xlim=(-4, 4), name='t(10)')
 + labs(title='Normal vs t-distributions'))

Out[30]:

In [ ]: