Get the Books

Enjoying these notebooks and want to support the work? Check out the practical books on Data Science, Visualisation, and Evolutionary Algorithms.

Get the books

Normal Distribution Test

SciPy stats.normaltest

In [7]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
In [8]:
heights_male = np.array([100, 256, 238, 116, 286, 253, 112, 165, 246, 130, 217, 269, 155,
       136, 189, 235, 255, 113, 280, 222, 259, 177, 294, 290, 225, 113,
       163, 137, 172, 127])

heights_female = np.array([126, 172, 137, 163, 113, 225, 290, 294, 175, 259, 220, 280, 111,
       255, 235, 189, 136, 150, 269, 214, 130, 243, 165, 110, 253, 286,
       116, 238, 255, 99])

print("mean heights (male): {}".format(np.mean(heights_male)))
print("mean heights (female): {}".format(np.mean(heights_female)))
mean heights (male): 197.66666666666666
mean heights (female): 196.93333333333334
In [11]:
s, p = stats.wilcoxon(heights_female, heights_male)

if p < 0.05:
  print("null hypothesis rejected, significant difference between the data-sets")
else:
  print("null hypothesis accepted, no significant difference between the data-sets")

print("p value = {}".format(p))
null hypothesis accepted, no significant difference between the data-sets
p value = 0.9425801920860144
In [13]:
plt.hist(heights_male, color="Magenta", normed=1)
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [14]:
plt.hist(heights_female,color="yellow", normed=1);
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [15]:
SEM = []

for sample_size in range(3,len(heights_male)+1):
    sample = heights_male[0:sample_size]
    SEM.append(sample.std() / np.sqrt(sample_size))

plt.plot(range(3,len(heights_male)+1),SEM, marker='o', color='cyan')

plt.ylabel("Standard Error of the Mean ($SE_M$)")    
plt.xlabel("Sample size $(n)$")
plt.title("Relationship between $SE_M$ and $n$");
In [18]:
print(stats.normaltest(heights_male))
print(stats.normaltest(heights_female))
NormaltestResult(statistic=13.548310785013712, pvalue=0.0011429354242245898)
NormaltestResult(statistic=13.278600632632264, pvalue=0.001307942069480237)

Support this work

You can support this work by getting the e-books. This notebook will always be available for free in its online format.