Hypothesis test for the mean (known variance)

We use one giraffe-themed story in three variants (adult neck length roughly \(1.8\)–\(2.4\) m): - left-tailed test; - right-tailed test; - two-tailed test.

The observed \(\bar x\) values are plausible, while the conservative \(\mu_0\) values are slightly exaggerated—but not as extreme as in the lecture examples, so the numbers stay interpretable. You can plug in the lecture’s more extreme nulls and see what changes.

Assumptions for the \(Z\)-test for a mean: - independent observations; - either a normal population or a large enough sample; - known population standard deviation: \(\sigma = 0.35\) m (the same in all three scenarios); - fixed significance level: \(\alpha = 0.05\).

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Shared σ for all scenarios (meters)
SIGMA = 0.35


def _fill_tail(ax, x, y, mask, **kwargs):
    if mask.any():
        ax.fill_between(x[mask], y[mask], **kwargs)


def plot_hypothesis_test(mu0, sigma, n, x_bar, alpha=0.05, kind='left', title=''):
    from matplotlib.patches import ConnectionPatch

    se = sigma / np.sqrt(n)
    z_score = (x_bar - mu0) / se
    z_abs = abs(z_score)

    fig, (ax_top, ax_bottom) = plt.subplots(2, 1, figsize=(8, 9), height_ratios=[1, 1])
    fig.subplots_adjust(hspace=0.38)

    x_grid = np.linspace(mu0 - 4 * se, mu0 + 4 * se, 1000)
    y_top = norm.pdf(x_grid, loc=mu0, scale=se)
    z_grid = np.linspace(-4, 4, 1000)
    y_bot = norm.pdf(z_grid)

    ax_top.plot(x_grid, y_top, lw=2, color='0.25', label=r'$\bar{X} \mid H_0$')
    ax_bottom.plot(z_grid, y_bot, lw=2, color='0.25', label=r'$Z$')

    def link(x_val, z_val, color):
        con = ConnectionPatch(
            xyA=(x_val, 0),
            coordsA=ax_top.get_xaxis_transform(),
            xyB=(z_val, 0),
            coordsB=ax_bottom.get_xaxis_transform(),
            color=color,
            ls='--',
            lw=1.8,
            axesA=ax_top,
            axesB=ax_bottom,
        )
        ax_bottom.add_artist(con)

    p_label = None

    if kind == 'left':
        z_crit = norm.ppf(alpha)
        K = mu0 + z_crit * se
        p_value = norm.cdf(z_score)
        reject = x_bar < K
        print(f'K = {K:.3f}')
        print('Decision (cutoff K):', 'Reject H0' if reject else 'Do not reject H0')
        ax_top.axvline(K, color='tomato', ls='--', lw=2, label=rf'$K={K:.3f}$')
        ax_bottom.axvline(z_crit, color='tomato', ls='--', lw=2, label=rf'$z_\alpha={z_crit:.3f}$')
        link(K, z_crit, 'tomato')
        _fill_tail(ax_top, x_grid, y_top, x_grid <= K, color='tomato', alpha=0.35, label=r'Rejection region ($\alpha$)')
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid <= z_crit, color='tomato', alpha=0.35)
        p_label = rf'p-value = {p_value:.4f}'
        _fill_tail(ax_top, x_grid, y_top, x_grid <= x_bar, color='steelblue', alpha=0.4, label=p_label)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid <= z_score, color='steelblue', alpha=0.4)

    elif kind == 'right':
        z_crit = norm.ppf(1 - alpha)
        K = mu0 + z_crit * se
        p_value = 1 - norm.cdf(z_score)
        reject = x_bar > K
        print(f'K = {K:.3f}')
        print('Decision (cutoff K):', 'Reject H0' if reject else 'Do not reject H0')
        ax_top.axvline(K, color='tomato', ls='--', lw=2, label=rf'$K={K:.3f}$')
        ax_bottom.axvline(z_crit, color='tomato', ls='--', lw=2, label=rf'$z_\alpha={z_crit:.3f}$')
        link(K, z_crit, 'tomato')
        _fill_tail(ax_top, x_grid, y_top, x_grid >= K, color='tomato', alpha=0.35, label=r'Rejection region ($\alpha$)')
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid >= z_crit, color='tomato', alpha=0.35)
        p_label = rf'p-value = {p_value:.4f}'
        _fill_tail(ax_top, x_grid, y_top, x_grid >= x_bar, color='steelblue', alpha=0.4, label=p_label)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid >= z_score, color='steelblue', alpha=0.4)

    elif kind == 'two-sided':
        z_half = norm.ppf(1 - alpha / 2)
        K_L = mu0 - z_half * se
        K_R = mu0 + z_half * se
        z_crit_l, z_crit_r = -z_half, z_half
        p_value = 2 * (1 - norm.cdf(z_abs))
        reject = abs(z_score) > z_half
        x_cut_l = mu0 - z_abs * se
        x_cut_r = mu0 + z_abs * se
        print(f'K_L = {K_L:.3f}, K_R = {K_R:.3f}')
        print('Decision (cutoffs):', 'Reject H0' if reject else 'Do not reject H0')
        ax_top.axvline(K_L, color='tomato', ls='--', lw=2, label=rf'$K_L={K_L:.3f}$')
        ax_top.axvline(K_R, color='tomato', ls='--', lw=2, label=rf'$K_R={K_R:.3f}$')
        ax_bottom.axvline(z_crit_l, color='tomato', ls='--', lw=2, label=rf'$-z_{{\alpha/2}}={z_crit_l:.3f}$')
        ax_bottom.axvline(z_crit_r, color='tomato', ls='--', lw=2, label=rf'$+z_{{\alpha/2}}={z_crit_r:.3f}$')
        link(K_L, z_crit_l, 'tomato')
        link(K_R, z_crit_r, 'tomato')
        ax_top.axvline(x_cut_l, color='steelblue', ls=':', lw=1.2)
        ax_top.axvline(x_cut_r, color='steelblue', ls=':', lw=1.2)
        ax_bottom.axvline(-z_abs, color='steelblue', ls=':', lw=1.2)
        ax_bottom.axvline(z_abs, color='steelblue', ls=':', lw=1.2)
        _fill_tail(ax_top, x_grid, y_top, x_grid <= K_L, color='tomato', alpha=0.35, label=r'Rejection region ($\alpha$)')
        _fill_tail(ax_top, x_grid, y_top, x_grid >= K_R, color='tomato', alpha=0.35)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid <= z_crit_l, color='tomato', alpha=0.35)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid >= z_crit_r, color='tomato', alpha=0.35)
        p_label = rf'p-value = {p_value:.4f} (two tails)'
        _fill_tail(ax_top, x_grid, y_top, x_grid <= x_cut_l, color='steelblue', alpha=0.4, label=p_label)
        _fill_tail(ax_top, x_grid, y_top, x_grid >= x_cut_r, color='steelblue', alpha=0.4)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid <= -z_abs, color='steelblue', alpha=0.4)
        _fill_tail(ax_bottom, z_grid, y_bot, z_grid >= z_abs, color='steelblue', alpha=0.4)
    else:
        raise ValueError("kind must be 'left', 'right', or 'two-sided'")

    ax_top.axvline(mu0, color='gray', ls=':', lw=1)
    ax_bottom.axvline(0, color='gray', ls=':', lw=1)
    ax_top.axvline(x_bar, color='navy', lw=2.5, label=rf'$\bar{{x}}={x_bar:.3f}$')
    ax_bottom.axvline(z_score, color='navy', lw=2.5, label=rf'$z_{{score}}={z_score:.3f}$')
    link(x_bar, z_score, 'navy')

    ax_top.set_title('Original scale')
    ax_bottom.set_title('Standard normal scale')
    ax_top.set_xlabel(r'$\bar{X}$')
    ax_bottom.set_xlabel(r'$Z$')
    ax_top.set_ylabel('Density')
    ax_bottom.set_ylabel('Density')
    ax_top.legend(loc='best', fontsize=9)
    ax_bottom.legend(loc='best', fontsize=9)
    ax_top.grid(alpha=0.2)
    ax_bottom.grid(alpha=0.2)
    if title:
        fig.suptitle(title, y=1.01, fontsize=12)

    print(f'$z_{{score}}$ = {z_score:.4f}')
    print(f'$p$-value = {p_value:.6f}')
    print('Decision (score / p-value):', 'Reject H0' if p_value < alpha else 'Do not reject H0')
    plt.show()

1. Left-tailed test

Problem setup: - \(X\) — adult giraffe neck length (m); in nature the typical range is about \(1.8\)–\(2.4\) m; - independent sample of size \(n = 20\); - observed mean \(\bar x = 2.1\); - conservative null: \(\mu_0 = 2.38\) — researchers believe giraffes have long necks on average; - test at \(\alpha = 0.05\): is there enough evidence for \(H_1: \mu < \mu_0\)?

Theory: - hypotheses: \(H_0: \mu = \mu_0\) vs. \(H_1: \mu < \mu_0\); - under \(H_0\): \[ \bar X \sim \mathcal N\!\left(\mu_0, \frac{\sigma^2}{n}\right); \] - the cutoff \(K\) satisfies \[ P_{H_0}(\bar X < K) = \alpha; \] - standardize: \[ P\!\left(\frac{\bar X-\mu_0}{\sigma/\sqrt n} < \frac{K-\mu_0}{\sigma/\sqrt n}\right)=\alpha \Rightarrow P(Z<z_\alpha)=\alpha; \] - since \(\alpha < 0.5\), write the quantile as \(-|z_{\alpha}|\) to avoid sign confusion; - on the original scale: \[ \frac{K-\mu_0}{\sigma/\sqrt n}=-|z_{\alpha}| \Rightarrow K=\mu_0 - |z_\alpha| \frac{\sigma}{\sqrt n}; \] - reject \(H_0\) if \(\bar x < K\).

\(z\)-score and \(p\)-value (left-tailed test)

\(z\)-score — the observation on the standardized scale: \[ z_{\text{score}} = \frac{\bar{x}-\mu_0}{\sigma/\sqrt{n}}. \] The cutoff \(K\) maps to the critical point \(z_\alpha\): \[ \frac{K-\mu_0}{\sigma/\sqrt{n}} = z_\alpha. \] This monotone map preserves order: \(\bar{x} < K \Leftrightarrow z_{\text{score}} < z_\alpha\).

Score rule: reject \(H_0\) if \(z_{\text{score}} < z_\alpha\) (for \(\alpha<0.5\), \(z_\alpha\) is negative).

\(p\)-value — under \(H_0\), the probability of a result at least as extreme as observed (here: not larger than \(\bar{x}\)): \[ p\text{-value} = P_{H_0}(\bar{X} \le \bar{x}) = P_{H_0}(Z \le z_{\text{score}}). \]

\(p\)-value rule: reject \(H_0\) if \(p\text{-value} < \alpha\).

# Left-tailed test
mu0 = 2.38
sigma = SIGMA
n = 20
x_bar = 2.1
alpha = 0.05

plot_hypothesis_test(
    mu0=mu0,
    sigma=sigma,
    n=n,
    x_bar=x_bar,
    alpha=alpha,
    kind='left',
    title='Left-tailed test',
)

2. Right-tailed test

Problem setup: - \(X\) — adult giraffe neck length (m); - independent sample of size \(n = 20\); - observed mean \(\bar x = 2.1\); - conservative null: \(\mu_0 = 1.88\) — researchers believe giraffes have short necks on average; - test at \(\alpha = 0.05\): is there enough evidence for \(H_1: \mu > \mu_0\)?

Theory: - hypotheses: \(H_0: \mu = \mu_0\) vs. \(H_1: \mu > \mu_0\); - under \(H_0\): \[ \bar X \sim \mathcal N\!\left(\mu_0, \frac{\sigma^2}{n}\right); \] - the cutoff \(K\) satisfies \[ P_{H_0}(\bar X > K) = \alpha; \] - standardize: \[ P\!\left(\frac{\bar X-\mu_0}{\sigma/\sqrt n} > \frac{K-\mu_0}{\sigma/\sqrt n}\right)=\alpha \Rightarrow P(Z>z_{\alpha})=\alpha; \] - for \(\alpha < 0.5\), \(z_\alpha\) is positive (no sign ambiguity here); - on the original scale: \[ \frac{K-\mu_0}{\sigma/\sqrt n}=z_{\alpha} \Rightarrow K=\mu_0+z_{\alpha}\frac{\sigma}{\sqrt n}; \] - reject \(H_0\) if \(\bar x > K\).

\(z\)-score and \(p\)-value (right-tailed test)

\(z\)-score: \[ z_{\text{score}} = \frac{\bar{x}-\mu_0}{\sigma/\sqrt{n}}. \] The cutoff \(K\) maps to \(z_\alpha\): \[ \frac{K-\mu_0}{\sigma/\sqrt{n}} = z_\alpha. \] Order is preserved: \(\bar{x} > K \Leftrightarrow z_{\text{score}} > z_\alpha\).

Score rule: reject \(H_0\) if \(z_{\text{score}} > z_\alpha\).

\(p\)-value — probability under \(H_0\) of a result at least as extreme to the right: \[ p\text{-value} = P_{H_0}(\bar{X} \ge \bar{x}) = P_{H_0}(Z \ge z_{\text{score}}). \]

\(p\)-value rule: reject \(H_0\) if \(p\text{-value} < \alpha\).

# Right-tailed test
mu0 = 1.88
sigma = SIGMA
n = 20
x_bar = 2.1
alpha = 0.05

plot_hypothesis_test(
    mu0=mu0,
    sigma=sigma,
    n=n,
    x_bar=x_bar,
    alpha=alpha,
    kind='right',
    title='Right-tailed test',
)

3. Two-tailed test

Problem setup: - \(X\) — adult giraffe neck length (m); - independent sample of size \(n = 20\); - observed mean \(\bar x = 2.1\); - conservative null: \(\mu_0 = 2.3\);

Theory: - hypotheses: \(H_0: \mu = \mu_0\) vs. \(H_1: \mu \neq \mu_0\); - under \(H_0\): \[ \bar X \sim \mathcal N\!\left(\mu_0, \frac{\sigma^2}{n}\right); \] - cutoffs: \[ P_{H_0}(\bar X < K_L)=\frac{\alpha}{2},\qquad P_{H_0}(\bar X > K_R)=\frac{\alpha}{2}; \] - standardize: \[ P\!\left(\frac{K_L-\mu_0}{\sigma/\sqrt n} < Z < \frac{K_R-\mu_0}{\sigma/\sqrt n}\right)=1-\alpha \Rightarrow P(-z_{\alpha/2}<Z<z_{\alpha/2})=1-\alpha; \] - on the original scale: \[ K_L=\mu_0-z_{\alpha/2}\frac{\sigma}{\sqrt n},\qquad K_R=\mu_0+z_{\alpha/2}\frac{\sigma}{\sqrt n}; \] - reject \(H_0\) if \(\bar x < K_L\) or \(\bar x > K_R\).

\(z\)-score and \(p\)-value (two-tailed test)

\(z\)-score: \[ z_{\text{score}} = \frac{\bar{x}-\mu_0}{\sigma/\sqrt{n}}. \] Cutoffs \(K_L, K_R\) map to \(\pm z_{\alpha/2}\): \[ K_L = \mu_0 - z_{\alpha/2}\frac{\sigma}{\sqrt{n}}, \qquad K_R = \mu_0 + z_{\alpha/2}\frac{\sigma}{\sqrt{n}}. \]

Score rule: reject \(H_0\) if \(|z_{\text{score}}| > z_{\alpha/2}\).

\(p\)-value — total probability in both tails at least as far from \(\mu_0\) as the observation: \[ p\text{-value} = 2\,P_{H_0}(Z \ge |z_{\text{score}}|). \]

\(p\)-value rule: reject \(H_0\) if \(p\text{-value} < \alpha\).

# Two-tailed test
mu0 = 2.3
sigma = SIGMA
n = 20
x_bar = 2.1
alpha = 0.05

plot_hypothesis_test(
    mu0=mu0,
    sigma=sigma,
    n=n,
    x_bar=x_bar,
    alpha=alpha,
    kind='two-sided',
    title='Two-tailed test',
)