#!/usr/bin/env python3

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
import pandas as pd
import random
import statistics

fish = pd.read_csv('abalone.txt',sep=',')
#print(fish.head(10))
fish_len = fish.iloc[:,1]
sample_median = []                #when popstd unknown
T=[]

popmedian=fish_len.median()
popstd=fish_len.std()
Z=[]                    #when popstd is known
print("popmedian {median] = %10.4f" % popmedian)
print("popstd {sigma} =%10.4f" % popstd)

n=100
y = random.sample(fish_len.tolist(), n)
for j in range(3000):
    x = np.random.choice(y, n)
    avg = np.median(x)
    sample_median.append(avg)

for e in sample_median:
     T.append((e - popmedian) / (popstd/np.sqrt(n)))
     #print("%10.4f" % e)

Sx = statistics.mean(sample_median)      #calculate averge of sample_median
stderr_x = statistics.stdev(sample_median)
print("sample_median {median_x] = %10.4f" % Sx)
print("stderr {sigma_x} =%10.4f" % stderr_x)

if popmedian >= Sx:
    f=(popmedian-Sx)/stderr_x
else:
    f=(Sx-popmedian)/stderr_x

print("the fraction of true population median location  lie in the internal =%10.4f" % popmedian)
print("the fraction of true population median lie in the internal: 68% -1stderr < popmedain < +1stderr")


x=sample_median
#x=T
plt.hist(x, density=True, bins = 30)  # density=False would make counts
plt.ylabel('Probability')
plt.xlabel('Draw 2000 samples of 100 records at random with replacement');
plt.show()