Statistical Visualizations¶
ggplotly includes statistical transformations for smoothing, density estimation, and summary statistics.
In [1]:
Copied!
import pandas as pd
import numpy as np
from ggplotly import *
np.random.seed(42)
df = pd.DataFrame({
'x': np.linspace(0, 10, 100),
'y': np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.3, 100)
})
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', color='blue'))
import pandas as pd
import numpy as np
from ggplotly import *
np.random.seed(42)
df = pd.DataFrame({
'x': np.linspace(0, 10, 100),
'y': np.sin(np.linspace(0, 10, 100)) + np.random.normal(0, 0.3, 100)
})
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', color='blue'))
Out[1]:
Linear Regression¶
In [2]:
Copied!
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='lm', color='red'))
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='lm', color='red'))
Out[2]:
Confidence Intervals¶
In [3]:
Copied!
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', se=True, color='green')
+ labs(title='LOESS with Confidence Interval'))
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', se=True, color='green')
+ labs(title='LOESS with Confidence Interval'))
Out[3]:
Smooth by Group¶
In [4]:
Copied!
df = pd.DataFrame({
'x': np.tile(np.linspace(0, 10, 50), 2),
'y': np.concatenate([
np.sin(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50),
np.cos(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50)
]),
'group': ['A'] * 50 + ['B'] * 50
})
(ggplot(df, aes(x='x', y='y', color='group'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', se=True))
df = pd.DataFrame({
'x': np.tile(np.linspace(0, 10, 50), 2),
'y': np.concatenate([
np.sin(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50),
np.cos(np.linspace(0, 10, 50)) + np.random.normal(0, 0.3, 50)
]),
'group': ['A'] * 50 + ['B'] * 50
})
(ggplot(df, aes(x='x', y='y', color='group'))
+ geom_point(alpha=0.5)
+ geom_smooth(method='loess', se=True))
Out[4]:
In [5]:
Copied!
df = pd.DataFrame({'x': np.random.randn(500)})
(ggplot(df, aes(x='x')) + geom_density(fill='lightblue', alpha=0.5))
df = pd.DataFrame({'x': np.random.randn(500)})
(ggplot(df, aes(x='x')) + geom_density(fill='lightblue', alpha=0.5))
Out[5]:
Overlapping Densities¶
In [6]:
Copied!
df = pd.DataFrame({
'x': np.concatenate([np.random.normal(0, 1, 500), np.random.normal(2, 1.5, 500)]),
'group': ['A'] * 500 + ['B'] * 500
})
(ggplot(df, aes(x='x', fill='group')) + geom_density(alpha=0.5))
df = pd.DataFrame({
'x': np.concatenate([np.random.normal(0, 1, 500), np.random.normal(2, 1.5, 500)]),
'group': ['A'] * 500 + ['B'] * 500
})
(ggplot(df, aes(x='x', fill='group')) + geom_density(alpha=0.5))
Out[6]:
Density with Histogram¶
In [7]:
Copied!
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgray', color='white')
+ geom_density(color='red', size=2))
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgray', color='white')
+ geom_density(color='red', size=2))
Out[7]:
Empirical CDF¶
Cumulative distribution function:
In [8]:
Copied!
df = pd.DataFrame({'x': np.random.randn(200)})
(ggplot(df, aes(x='x')) + geom_step(stat='ecdf') + labs(title='Empirical CDF'))
df = pd.DataFrame({'x': np.random.randn(200)})
(ggplot(df, aes(x='x')) + geom_step(stat='ecdf') + labs(title='Empirical CDF'))
Out[8]:
Compare Distributions¶
In [9]:
Copied!
df = pd.DataFrame({
'x': np.concatenate([np.random.normal(0, 1, 200), np.random.normal(1, 0.5, 200)]),
'group': ['A'] * 200 + ['B'] * 200
})
(ggplot(df, aes(x='x', color='group'))
+ geom_step(stat='ecdf')
+ labs(title='Comparing CDFs'))
df = pd.DataFrame({
'x': np.concatenate([np.random.normal(0, 1, 200), np.random.normal(1, 0.5, 200)]),
'group': ['A'] * 200 + ['B'] * 200
})
(ggplot(df, aes(x='x', color='group'))
+ geom_step(stat='ecdf')
+ labs(title='Comparing CDFs'))
Out[9]:
In [10]:
Copied!
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 30),
'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})
(ggplot(df, aes(x='category', y='value'))
+ geom_point(alpha=0.3)
+ stat_summary(fun='mean', geom='point', color='red', size=15))
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 30),
'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})
(ggplot(df, aes(x='category', y='value'))
+ geom_point(alpha=0.3)
+ stat_summary(fun='mean', geom='point', color='red', size=15))
Out[10]:
Mean with Error Bars¶
In [11]:
Copied!
(ggplot(df, aes(x='category', y='value'))
+ geom_jitter(width=0.2, alpha=0.3)
+ stat_summary(fun='mean', geom='point', color='red', size=10)
+ stat_summary(fun='mean', fun_min=lambda x: x.mean() - x.std(),
fun_max=lambda x: x.mean() + x.std(), geom='errorbar', color='red'))
(ggplot(df, aes(x='category', y='value'))
+ geom_jitter(width=0.2, alpha=0.3)
+ stat_summary(fun='mean', geom='point', color='red', size=10)
+ stat_summary(fun='mean', fun_min=lambda x: x.mean() - x.std(),
fun_max=lambda x: x.mean() + x.std(), geom='errorbar', color='red'))
Out[11]:
In [12]:
Copied!
# Create 2D density data
x = np.linspace(-3, 3, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = np.exp(-(X**2 + Y**2))
df = pd.DataFrame({
'x': X.flatten(),
'y': Y.flatten(),
'z': Z.flatten()
})
(ggplot(df, aes(x='x', y='y', z='z')) + geom_contour())
# Create 2D density data
x = np.linspace(-3, 3, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = np.exp(-(X**2 + Y**2))
df = pd.DataFrame({
'x': X.flatten(),
'y': Y.flatten(),
'z': Z.flatten()
})
(ggplot(df, aes(x='x', y='y', z='z')) + geom_contour())
Out[12]:
Filled Contours¶
In [13]:
Copied!
(ggplot(df, aes(x='x', y='y', z='z'))
+ geom_contour_filled()
+ labs(title='Filled Contour Plot'))
(ggplot(df, aes(x='x', y='y', z='z'))
+ geom_contour_filled()
+ labs(title='Filled Contour Plot'))
Out[13]:
Contour with Points¶
In [14]:
Copied!
# Sample points
points = pd.DataFrame({
'x': np.random.uniform(-2, 2, 50),
'y': np.random.uniform(-2, 2, 50)
})
(ggplot(df, aes(x='x', y='y', z='z'))
+ geom_contour_filled(alpha=0.7)
+ geom_point(data=points, color='white', size=5))
# Sample points
points = pd.DataFrame({
'x': np.random.uniform(-2, 2, 50),
'y': np.random.uniform(-2, 2, 50)
})
(ggplot(df, aes(x='x', y='y', z='z'))
+ geom_contour_filled(alpha=0.7)
+ geom_point(data=points, color='white', size=5))
Out[14]:
In [15]:
Copied!
df_err = pd.DataFrame({
'x': ['A', 'B', 'C', 'D'],
'y': [10, 15, 12, 18],
'ymin': [8, 13, 10, 15],
'ymax': [12, 17, 14, 21]
})
(ggplot(df_err, aes(x='x', y='y', ymin='ymin', ymax='ymax'))
+ geom_col(fill='steelblue', alpha=0.7)
+ geom_errorbar(width=0.2))
df_err = pd.DataFrame({
'x': ['A', 'B', 'C', 'D'],
'y': [10, 15, 12, 18],
'ymin': [8, 13, 10, 15],
'ymax': [12, 17, 14, 21]
})
(ggplot(df_err, aes(x='x', y='y', ymin='ymin', ymax='ymax'))
+ geom_col(fill='steelblue', alpha=0.7)
+ geom_errorbar(width=0.2))
Out[15]:
Error Bars from Standard Error¶
In [16]:
Copied!
# Recreate the category data
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 30),
'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})
# Compute statistics
summary = df.groupby('category')['value'].agg(['mean', 'std', 'count']).reset_index()
summary['se'] = summary['std'] / np.sqrt(summary['count'])
summary['ymin'] = summary['mean'] - summary['se']
summary['ymax'] = summary['mean'] + summary['se']
(ggplot(summary, aes(x='category', y='mean', ymin='ymin', ymax='ymax'))
+ geom_col(fill='steelblue', alpha=0.7)
+ geom_errorbar(width=0.2, color='black'))
# Recreate the category data
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 30),
'value': np.random.randn(90) + np.tile([0, 2, 1], 30)
})
# Compute statistics
summary = df.groupby('category')['value'].agg(['mean', 'std', 'count']).reset_index()
summary['se'] = summary['std'] / np.sqrt(summary['count'])
summary['ymin'] = summary['mean'] - summary['se']
summary['ymax'] = summary['mean'] + summary['se']
(ggplot(summary, aes(x='category', y='mean', ymin='ymin', ymax='ymax'))
+ geom_col(fill='steelblue', alpha=0.7)
+ geom_errorbar(width=0.2, color='black'))
Out[16]:
Violin with Box Plot¶
Combine violin and box for distribution overview:
In [17]:
Copied!
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 100),
'value': np.random.randn(300) * np.tile([1, 2, 1.5], 100) + np.tile([0, 2, 1], 100)
})
(ggplot(df, aes(x='category', y='value', fill='category'))
+ geom_violin(alpha=0.5)
+ geom_boxplot(width=0.1, fill='white'))
df = pd.DataFrame({
'category': np.repeat(['A', 'B', 'C'], 100),
'value': np.random.randn(300) * np.tile([1, 2, 1.5], 100) + np.tile([0, 2, 1], 100)
})
(ggplot(df, aes(x='category', y='value', fill='category'))
+ geom_violin(alpha=0.5)
+ geom_boxplot(width=0.1, fill='white'))
Out[17]:
Scatter with Marginal Distributions¶
In [18]:
Copied!
df = pd.DataFrame({
'x': np.random.randn(200),
'y': np.random.randn(200)
})
# Main scatter with rug
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_rug(sides='bl', alpha=0.3))
df = pd.DataFrame({
'x': np.random.randn(200),
'y': np.random.randn(200)
})
# Main scatter with rug
(ggplot(df, aes(x='x', y='y'))
+ geom_point(alpha=0.5)
+ geom_rug(sides='bl', alpha=0.3))
Out[18]:
Quantile-Quantile Plots¶
Compare distribution to theoretical:
In [19]:
Copied!
from scipy import stats
# Generate data
np.random.seed(42)
df = pd.DataFrame({'values': np.random.randn(100)})
# Basic Q-Q plot using geom_qq and geom_qq_line
(ggplot(df, aes(sample='values'))
+ geom_qq()
+ geom_qq_line()
+ labs(title='Q-Q Plot', x='Theoretical Quantiles', y='Sample Quantiles'))
from scipy import stats
# Generate data
np.random.seed(42)
df = pd.DataFrame({'values': np.random.randn(100)})
# Basic Q-Q plot using geom_qq and geom_qq_line
(ggplot(df, aes(sample='values'))
+ geom_qq()
+ geom_qq_line()
+ labs(title='Q-Q Plot', x='Theoretical Quantiles', y='Sample Quantiles'))
Out[19]:
Q-Q Plot Against t-Distribution¶
Compare heavy-tailed data to a t-distribution:
In [20]:
Copied!
# Generate t-distributed data
np.random.seed(42)
df_t = pd.DataFrame({'values': stats.t.rvs(df=5, size=100)})
# Q-Q plot against t-distribution with df=5
(ggplot(df_t, aes(sample='values'))
+ geom_qq(distribution=stats.t, dparams={'df': 5})
+ geom_qq_line(distribution=stats.t, dparams={'df': 5})
+ labs(title='Q-Q Plot Against t(5) Distribution'))
# Generate t-distributed data
np.random.seed(42)
df_t = pd.DataFrame({'values': stats.t.rvs(df=5, size=100)})
# Q-Q plot against t-distribution with df=5
(ggplot(df_t, aes(sample='values'))
+ geom_qq(distribution=stats.t, dparams={'df': 5})
+ geom_qq_line(distribution=stats.t, dparams={'df': 5})
+ labs(title='Q-Q Plot Against t(5) Distribution'))
Out[20]:
2D Density / Hexbin¶
For large scatter plots, show density:
In [21]:
Copied!
# Large dataset
df = pd.DataFrame({
'x': np.random.randn(10000),
'y': np.random.randn(10000)
})
# 2D density using contour
(ggplot(df, aes(x='x', y='y'))
+ geom_contour_filled()
+ labs(title='2D Density'))
# Large dataset
df = pd.DataFrame({
'x': np.random.randn(10000),
'y': np.random.randn(10000)
})
# 2D density using contour
(ggplot(df, aes(x='x', y='y'))
+ geom_contour_filled()
+ labs(title='2D Density'))
Out[21]:
Regression Diagnostics¶
In [22]:
Copied!
# Fit a model and plot residuals
from sklearn.linear_model import LinearRegression
df = pd.DataFrame({
'x': np.linspace(0, 10, 100),
'y': 2 * np.linspace(0, 10, 100) + np.random.normal(0, 2, 100)
})
model = LinearRegression()
model.fit(df[['x']], df['y'])
df['predicted'] = model.predict(df[['x']])
df['residual'] = df['y'] - df['predicted']
# Residuals vs Fitted
(ggplot(df, aes(x='predicted', y='residual'))
+ geom_point(alpha=0.5)
+ geom_hline(data=0, color='red', linetype='dash')
+ labs(title='Residuals vs Fitted', x='Fitted Values', y='Residuals'))
# Fit a model and plot residuals
from sklearn.linear_model import LinearRegression
df = pd.DataFrame({
'x': np.linspace(0, 10, 100),
'y': 2 * np.linspace(0, 10, 100) + np.random.normal(0, 2, 100)
})
model = LinearRegression()
model.fit(df[['x']], df['y'])
df['predicted'] = model.predict(df[['x']])
df['residual'] = df['y'] - df['predicted']
# Residuals vs Fitted
(ggplot(df, aes(x='predicted', y='residual'))
+ geom_point(alpha=0.5)
+ geom_hline(data=0, color='red', linetype='dash')
+ labs(title='Residuals vs Fitted', x='Fitted Values', y='Residuals'))
Out[22]:
In [23]:
Copied!
# Basic waterfall showing quarterly changes
waterfall_df = pd.DataFrame({
'category': ['Q1 Sales', 'Q2 Growth', 'Q3 Decline', 'Q4 Recovery', 'Year Total'],
'value': [100, 50, -30, 20, 0],
'measure': ['absolute', 'relative', 'relative', 'relative', 'total']
})
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
+ geom_waterfall()
+ labs(title='Quarterly Sales Waterfall'))
# Basic waterfall showing quarterly changes
waterfall_df = pd.DataFrame({
'category': ['Q1 Sales', 'Q2 Growth', 'Q3 Decline', 'Q4 Recovery', 'Year Total'],
'value': [100, 50, -30, 20, 0],
'measure': ['absolute', 'relative', 'relative', 'relative', 'total']
})
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
+ geom_waterfall()
+ labs(title='Quarterly Sales Waterfall'))
Out[23]:
Financial Statement Waterfall¶
Common use case for income statement analysis:
In [24]:
Copied!
# Income statement waterfall
income_df = pd.DataFrame({
'item': ['Revenue', 'COGS', 'Gross Profit', 'Operating Expenses', 'Operating Income', 'Taxes', 'Net Income'],
'amount': [1000, -400, 0, -300, 0, -75, 0],
'type': ['absolute', 'relative', 'total', 'relative', 'total', 'relative', 'total']
})
(ggplot(income_df, aes(x='item', y='amount', measure='type'))
+ geom_waterfall()
+ labs(title='Income Statement Breakdown', y='Amount ($K)'))
# Income statement waterfall
income_df = pd.DataFrame({
'item': ['Revenue', 'COGS', 'Gross Profit', 'Operating Expenses', 'Operating Income', 'Taxes', 'Net Income'],
'amount': [1000, -400, 0, -300, 0, -75, 0],
'type': ['absolute', 'relative', 'total', 'relative', 'total', 'relative', 'total']
})
(ggplot(income_df, aes(x='item', y='amount', measure='type'))
+ geom_waterfall()
+ labs(title='Income Statement Breakdown', y='Amount ($K)'))
Out[24]:
Custom Colors¶
In [25]:
Copied!
# Custom colors for different bar types
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
+ geom_waterfall(
increasing_color='#17becf', # Cyan for increases
decreasing_color='#ff7f0e', # Orange for decreases
total_color='#1f77b4' # Blue for totals
)
+ labs(title='Waterfall with Custom Colors'))
# Custom colors for different bar types
(ggplot(waterfall_df, aes(x='category', y='value', measure='measure'))
+ geom_waterfall(
increasing_color='#17becf', # Cyan for increases
decreasing_color='#ff7f0e', # Orange for decreases
total_color='#1f77b4' # Blue for totals
)
+ labs(title='Waterfall with Custom Colors'))
Out[25]:
Waterfall Parameters¶
| Parameter | Default | Description |
|---|---|---|
increasing_color |
'#2ca02c' | Color for positive changes (green) |
decreasing_color |
'#d62728' | Color for negative changes (red) |
total_color |
'#1f77b4' | Color for totals (blue) |
connector_visible |
True | Show connector lines |
connector_color |
gray | Color of connectors |
text_position |
'outside' | Position of value labels |
orientation |
'v' | 'v' (vertical) or 'h' (horizontal) |
The measure aesthetic controls bar types:
'absolute': Starting value (resets running total)'relative': Change from previous value (default)'total': Shows cumulative total at this point
In [26]:
Copied!
# Auto-fit normal distribution to data
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightblue', color='white')
+ geom_norm(color='red', size=2)
+ labs(title='Histogram with Fitted Normal'))
# Auto-fit normal distribution to data
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightblue', color='white')
+ geom_norm(color='red', size=2)
+ labs(title='Histogram with Fitted Normal'))
Out[26]:
Scale to Match Histogram Counts¶
Use scale='count' to automatically scale the normal curve to match histogram counts (no need for density scaling on histogram):
In [27]:
Copied!
# With count histogram - no density scaling needed
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(bins=30, fill='lightblue', color='white')
+ geom_norm(scale='count', color='red', size=2)
+ labs(title='Normal Curve Scaled to Histogram Counts'))
# With count histogram - no density scaling needed
np.random.seed(42)
df = pd.DataFrame({'x': np.random.randn(1000)})
(ggplot(df, aes(x='x'))
+ geom_histogram(bins=30, fill='lightblue', color='white')
+ geom_norm(scale='count', color='red', size=2)
+ labs(title='Normal Curve Scaled to Histogram Counts'))
Out[27]:
Explicit Normal Parameters¶
Overlay a standard normal (mean=0, sd=1) regardless of data:
In [28]:
Copied!
# Compare data to standard normal
df = pd.DataFrame({'x': np.random.randn(500) * 1.5 + 0.5})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=25, fill='lightgray', color='white')
+ geom_norm(mean=0, sd=1, color='blue', linetype='dashed', size=2)
+ geom_norm(color='red', size=2)
+ labs(title='Data vs Standard Normal',
subtitle='Red=fitted, Blue=N(0,1)'))
# Compare data to standard normal
df = pd.DataFrame({'x': np.random.randn(500) * 1.5 + 0.5})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=25, fill='lightgray', color='white')
+ geom_norm(mean=0, sd=1, color='blue', linetype='dashed', size=2)
+ geom_norm(color='red', size=2)
+ labs(title='Data vs Standard Normal',
subtitle='Red=fitted, Blue=N(0,1)'))
Out[28]:
Custom Distribution with stat_function¶
Use stat_function() to overlay any distribution from scipy.stats:
In [29]:
Copied!
from scipy import stats
# Exponential distribution
df = pd.DataFrame({'x': np.random.exponential(scale=2, size=500)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgreen', color='white')
+ stat_function(fun=lambda x: stats.expon.pdf(x, scale=2), color='darkgreen', size=2)
+ labs(title='Exponential Distribution Overlay'))
from scipy import stats
# Exponential distribution
df = pd.DataFrame({'x': np.random.exponential(scale=2, size=500)})
(ggplot(df, aes(x='x'))
+ geom_histogram(aes(y=after_stat('density')), bins=30, fill='lightgreen', color='white')
+ stat_function(fun=lambda x: stats.expon.pdf(x, scale=2), color='darkgreen', size=2)
+ labs(title='Exponential Distribution Overlay'))
Out[29]:
Multiple Distributions¶
In [30]:
Copied!
# Compare Student's t with different degrees of freedom
# No data needed - just provide xlim to define the x range
# stat_function uses geom_line by default
(ggplot()
+ stat_function(fun=lambda x: stats.norm.pdf(x), color='black', size=2, xlim=(-4, 4), name='Normal')
+ stat_function(fun=lambda x: stats.t.pdf(x, df=3), color='red', size=2, xlim=(-4, 4), name='t(3)')
+ stat_function(fun=lambda x: stats.t.pdf(x, df=10), color='blue', size=2, xlim=(-4, 4), name='t(10)')
+ labs(title='Normal vs t-distributions'))
# Compare Student's t with different degrees of freedom
# No data needed - just provide xlim to define the x range
# stat_function uses geom_line by default
(ggplot()
+ stat_function(fun=lambda x: stats.norm.pdf(x), color='black', size=2, xlim=(-4, 4), name='Normal')
+ stat_function(fun=lambda x: stats.t.pdf(x, df=3), color='red', size=2, xlim=(-4, 4), name='t(3)')
+ stat_function(fun=lambda x: stats.t.pdf(x, df=10), color='blue', size=2, xlim=(-4, 4), name='t(10)')
+ labs(title='Normal vs t-distributions'))
Out[30]:
In [ ]:
Copied!