#!/usr/bin/env python3

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
import pandas as pd
import random

fish = pd.read_csv('abalone.txt',sep=',')
print(fish.head(10))
fish_len = fish.iloc[:,1]
sample_mean = []
p_x=[]

popmean=fish_len.mean()
popstd=fish_len.std()
print("popmean {mu] = %10.4f" % popmean)
print("popstd {sigma} =%10.4f" % popstd)

y = random.sample(fish_len.tolist(), 100)
for j in range(3000):
    x = np.random.choice(y, 100)
    #print(x)
    avg = np.mean(x)
    sample_mean.append(avg)

for e in sample_mean[10:]:
     p_x.append((e - popmean) / (popstd/10))
#    print("%10.4f" % e)
#print(sample_mean[10:])

#x=sample_mean
x=p_x
plt.hist(x, density=True, bins=50)  # density=False would make counts
plt.ylabel('Probability')
plt.xlabel('Draw 2000 samples of 100 records at random with replacement');
plt.show()