Get the Books
Enjoying these notebooks and want to support the work? Check out the practical books on Data Science, Visualisation, and Evolutionary Algorithms.
Get the books
Normal Distribution Test
SciPy stats.normaltest¶
In [7]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
In [8]:
heights_male = np.array([100, 256, 238, 116, 286, 253, 112, 165, 246, 130, 217, 269, 155,
136, 189, 235, 255, 113, 280, 222, 259, 177, 294, 290, 225, 113,
163, 137, 172, 127])
heights_female = np.array([126, 172, 137, 163, 113, 225, 290, 294, 175, 259, 220, 280, 111,
255, 235, 189, 136, 150, 269, 214, 130, 243, 165, 110, 253, 286,
116, 238, 255, 99])
print("mean heights (male): {}".format(np.mean(heights_male)))
print("mean heights (female): {}".format(np.mean(heights_female)))
In [11]:
s, p = stats.wilcoxon(heights_female, heights_male)
if p < 0.05:
print("null hypothesis rejected, significant difference between the data-sets")
else:
print("null hypothesis accepted, no significant difference between the data-sets")
print("p value = {}".format(p))
In [13]:
plt.hist(heights_male, color="Magenta", normed=1)
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [14]:
plt.hist(heights_female,color="yellow", normed=1);
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [15]:
SEM = []
for sample_size in range(3,len(heights_male)+1):
sample = heights_male[0:sample_size]
SEM.append(sample.std() / np.sqrt(sample_size))
plt.plot(range(3,len(heights_male)+1),SEM, marker='o', color='cyan')
plt.ylabel("Standard Error of the Mean ($SE_M$)")
plt.xlabel("Sample size $(n)$")
plt.title("Relationship between $SE_M$ and $n$");
In [18]:
print(stats.normaltest(heights_male))
print(stats.normaltest(heights_female))
Support this work
You can support this work by getting the e-books. This notebook will always be available for free in its online format.
Plotapi, beautiful by default.
Let plotapi do the heavy lifting – enabling beautiful interactive visualisations with a single line of code (instead of hundreds).
Get Plotapi