Source code for mathdistops.qnorm

import matplotlib.pyplot as plt
import numpy as np
from scipy.special import erfinv
import scipy.stats as stats
import math
import altair as alt
import pandas as pd

[docs] def qnorm(p, mean=0, std_dev=1, graph=True): """ Quantile (Inverse Cumulative Distribution Function) of the normal distribution. Parameters ---------- p: float The probability for which to find the quantile. mean: float, optional The mean (average) of the normal distribution. Default is 0. std_dev: float, optional The standard deviation of the normal distribution. Default is 1. graph: bool, optional Whether to plot the PDF and CDF graphs. Default is True. Returns ------- result : pandas.DataFrame or tuple If `graph` is True (default), returns a tuple consisting a pandas DataFrame and a layered altair Chart consisting of two graphs, CDF and PDF. If `graph` is False, returns a pandas DataFrame. Raises ------ TypeError: If any of the input parameters ('p', 'mean', 'std_dev') are not numerical. ValueError: If 'p' is not within the range [0, 1]. If 'std_dev' is zero or negative, as standard deviation must be positive. Example ------- >>> qnorm(0.8413447460685429, mean=0, std_dev=1, graph=False) Quantile 0 1.0 """ if not all(isinstance(param, (int, float)) for param in [p, mean, std_dev]): raise TypeError("Input parameters must be numerical.") if p<0 or p>1: raise ValueError("Parameter 'p' stands for probability, which should have a value between 0 and 1 only.") if std_dev <= 0: raise ValueError("Standard deviation cannot be zero or negative.") #Calculate quantile q = mean + std_dev * math.sqrt(2) * erfinv(2*p - 1) # Standardizing the names x = q z = (x - mean) / std_dev prob = p results_df = pd.DataFrame({'Quantile': [q]}) if graph: x_values = np.linspace(mean - 3 * std_dev, mean + 3 * std_dev, 100) y_values_pdf = stats.norm.pdf(x_values, mean, std_dev) y_values_cdf = stats.norm.cdf(x_values, mean, std_dev) data = {'x': x_values, 'pdf': y_values_pdf, 'cdf': y_values_cdf, 'q': q} df = pd.DataFrame(data) # PDF chart = alt.Chart( df, title=alt.Title( text='Probability Density Function', subtitle=f'for q = {q:.4g},mean = {mean:.4g},sd = {std_dev:.4g}') ).mark_line().encode( x='x', y='pdf' ).properties( width=250, height=250 ) #Add a shaded area under the curve () shade_area = alt.Chart(df).mark_area(opacity=0.3, color='lightblue').encode( x=alt.X('x', title='X'), y=alt.Y('pdf', title='f(X)') ).transform_filter( alt.datum.x <= x ).properties( width=250, height=250 ) # Add vertical line at respective quantile vertline = alt.Chart(pd.DataFrame({'z': [q]})).mark_rule(strokeDash=[3, 3]).encode( x='z' ) #CDF cdf_chart = alt.Chart( df, title=alt.Title( text="Cumulative Distribution Chart", subtitle= f'for q = {q:.4g}, mean = {mean:.4g}, sd = {std_dev:.4g}') ).mark_line().encode( x=alt.X('x').title("x"), y=alt.Y('cdf').title('probability'), color=alt.value('orange'), opacity=alt.value(0.5), ).properties( width=250, height=250 ) horizontalline = alt.Chart(pd.DataFrame({'p': [prob]})).mark_rule(strokeDash=[3, 3]).encode( y='p' ) # Combine all plots result_graph = (shade_area + chart + vertline) |(cdf_chart + vertline + horizontalline) return results_df, result_graph return results_df