Source code for mathdistops.pnorm

from scipy.special import erf
import math
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
import altair as alt
import pandas as pd


[docs]
def pnorm(q, mean = 0, std_dev =1, graph = True):
    """
    Calculates Cumulative Probability of the normal distribution at this quantile and plots corresponding PDF and CDF.

    Parameters
    ----------
    q: float
        The quantile at which to evaluate the CDF.
    mean: float
        The mean (average) of the normal distribution. Default is 0.
    std_dev: float
        The standard deviation of the normal distribution. Default is 1.
    graph: bool
        Whether to plot the PDF and CDF graph. Default is True.

    Returns
    -------
    result : pandas.DataFrame or tuple
        If `graph` is True (default), returns a tuple consisting a pandas DataFrame and a 
            layered altair Chart consisting of two graphs, CDF and PDF.
        If `graph` is False, returns a pandas DataFrame.

    Raises
    ------
    ValueError:
        If 'std_dev' is zero or negative, as the standard deviation must be a positive number.
    TypeError:
        If any of the input parameters ('q', 'mean', 'std_dev') are not numerical.

    Example
    --------
    >>> pnorm(1, mean=0, std_dev=1, graph=False)
        Z-score    Cumulative probability 
    0   1.0        0.8413447460685429
    """

    if std_dev <= 0:
        raise ValueError("Standard deviation cannot be zero or negative.")

    if not all(isinstance(param, (int, float)) for param in [q, mean, std_dev]):
        raise TypeError("Input parameters must be numerical.")

    #Calculate z-score and cumulative probability
    x = q 
    z = (x - mean) / std_dev
    prob = 1/2 * (1 + erf(z / math.sqrt(2)))
    
    results_df = pd.DataFrame({'Z-score': [z], 'Cumulative probability': [prob]})
    
    if graph: 
        
        x_values = np.linspace(mean - 3 * std_dev, mean + 3 * std_dev, 100)
        y_values_pdf = stats.norm.pdf(x_values, mean, std_dev)
        y_values_cdf = stats.norm.cdf(x_values, mean, std_dev)
        data = {'x': x_values, 'pdf': y_values_pdf, 'cdf': y_values_cdf, 'q': q}
        df = pd.DataFrame(data)

        # PDF
        chart = alt.Chart(
            df,
            title=alt.Title(
            text='Probability Density Function',
            subtitle=f'for q = {q:.4g},mean = {mean:.4g},sd = {std_dev:.4g}')
        ).mark_line().encode(
            x='x',
            y='pdf'
        ).properties(
            width=250,
            height=250
        )


        #Add a shaded area under the curve ()
        shade_area = alt.Chart(df).mark_area(opacity=0.3, color='lightblue').encode(
            x=alt.X('x', title='X'),
            y=alt.Y('pdf', title='f(X)')
        ).transform_filter(
            alt.datum.x <= x  
        ).properties(
            width=250,
            height=250
            )

        # Add vertical line at respective quantile 
        vertline = alt.Chart(pd.DataFrame({'z': [q]})).mark_rule(strokeDash=[3, 3]).encode(
            x='z'
        )

        #CDF
        cdf_chart = alt.Chart(
            df, 
            title=alt.Title(
            text="Cumulative Distribution Chart",
            subtitle= f'for q = {q:.4g}, mean = {mean:.4g}, sd = {std_dev:.4g}')
            ).mark_line().encode(
            x=alt.X('x').title("x"),
            y=alt.Y('cdf').title('probability'),
            color=alt.value('orange'),
            opacity=alt.value(0.5),
        ).properties(
            width=250,
            height=250
        )

        # Add horiozontal line at respective p
        horizontalline = alt.Chart(pd.DataFrame({'p': [prob]})).mark_rule(strokeDash=[3, 3]).encode(
            y='p'
        )
        
        # Combine all plots
        result_graph = (shade_area + chart + vertline) |(cdf_chart + vertline + horizontalline)

        return results_df, result_graph
    
    return results_df
Source code for mathdistops.pnorm

Table of Contents

Search