#!/usr/bin/env python3 import matplotlib.pyplot as plt import numpy as np from matplotlib import colors from matplotlib.ticker import PercentFormatter import pandas as pd import random import statistics fish = pd.read_csv('abalone.txt',sep=',') #print(fish.head(10)) fish_len = fish.iloc[:,1] sample_median = [] #when popstd unknown T=[] popmedian=fish_len.median() popstd=fish_len.std() Z=[] #when popstd is known print("popmedian {median] = %10.4f" % popmedian) print("popstd {sigma} =%10.4f" % popstd) n=100 y = random.sample(fish_len.tolist(), n) for j in range(3000): x = np.random.choice(y, n) avg = np.median(x) sample_median.append(avg) for e in sample_median: T.append((e - popmedian) / (popstd/np.sqrt(n))) #print("%10.4f" % e) Sx = statistics.mean(sample_median) #calculate averge of sample_median stderr_x = statistics.stdev(sample_median) print("sample_median {median_x] = %10.4f" % Sx) print("stderr {sigma_x} =%10.4f" % stderr_x) if popmedian >= Sx: f=(popmedian-Sx)/stderr_x else: f=(Sx-popmedian)/stderr_x print("the fraction of true population median location lie in the internal =%10.4f" % popmedian) print("the fraction of true population median lie in the internal: 68% -1stderr < popmedain < +1stderr") x=sample_median #x=T plt.hist(x, density=True, bins = 30) # density=False would make counts plt.ylabel('Probability') plt.xlabel('Draw 2000 samples of 100 records at random with replacement'); plt.show()