""" file descstats.py author Ernesto P. Adorio UPDEPP (U.P. Clarkfield) desc basic stats version 0.0.1 aug. 15, 2010 """ from quantile import quantile from math import sqrt, exp, log def mean(X, npasses = 1): """ arithmetic mean of vector X. Arguments: npasses : 1- no correction term. 2- with correction term. """ n = float(len(X)) avg = sum(X)/n if npasses > 1: return avg + sum([(x - avg) for x in X])/ n return avg def rms(X): """ root mean square average of vector X. """ return sqrt(sum([x*x for x in X])/len(X)) def hm(X): """ harmonic mean of vector X """ if min(X)<= 0.0: return None return len(X) / sum([1.0/x for x in X]) def gm(X): """ geometric mean of X """ if min(X) <= 0.0: return None return exp(sum([log(x) for x in X])/len(X)) def pvar(X , npasses = 1): """ Population variance of X, divisor is n. """ sxsqr = sum([x*x for x in X]) xbar = mean(X, npasses) n = len(X) if npasses == 2: return sum([(x - xbar)**2 for x in X])/n else: return sxsqr/n - (xbar)**2 def svar(X, npasses = 1): """ Sample variance of X, divisor is n-1. """ n = len(X) return pvar(X, npasses) * n / (n-1.0) def pstd(X, npasses = 1): """ Population standard deviation. """ return sqrt(pvar(X, npasses)) def sstd(X, npasses = 1): """ Sample standard deviation. """ return sqrt((svar(X, npasses))) avg = mean am = mean pstddev = pstd psdev = pstd ssdev = sstd sstddev = sstd def median(X): """ Median of X """ return quantile(X, 0.5) def mad(X, xbar = None): """ Mean absolute deviation of X from mean. """ if xbar is None: xbar = mean(X) return float(sum([abs(x-xbar) for x in X]))/len(X) madmean = mad def madmed(X, xmed = None): """ Mean absolute deviation of X from median. """ if xmed is None: xmed = median(X) return float(sum([abs(x- xmed) for x in X]))/len(X) def Q1(X): """ First quartile of X """ return quantile(X, 0.25) def Q3(X): """ Third quartile of X """ return quantile(X, 0.75) def midhinge(X): """ average of first and third quartile of X. """ return 0.5 * (Q1(X) + Q2(X)) def trimean(X): return 0.25 * (Q1(X) + 2 * median(X) + Q3(X)) def IQR(X): """ Interquartile range of X. """ return Q3(X) - Q1(x) def summary(X): """ Tuple of quartiles of X. """ return (min(X), Q1(X), median(X), Q3(x), max(X)) def skew(X, npasses = 1, xbar=None, sdev = None): """ Skewness of X. """ if xbar is None: xbar = mean(X, npasses) if sdev is None: sdev = sstd(X, npasses) S = sum([(x - xbar)**3 for x in X])/((len(X) -1.0)* sdev**3) return S def kurt(X, npasses = 1, xbar = None, sdev = None): """ Excess Kurtosis of X. """ if xbar is None: xbar = mean(X, npasses) if sdev is None: sdev = sstd(X, npasses) K = sum([(x - xbar)**4 for x in X])/((len(X)-1.0) * sdev**4) - 3.0 return K if __name__ == "__main__": import scipy.stats as stat X = stat.norm.rvs(loc = 5, scale = 2, size=10) print "mean = ", mean(X) print "rms= ", rms(X) print "hm=", hm(X) print "gm=", gm(X) print "min=", min(X) print "Q1 = ", Q1(X) print "median=", median(X) print "Q3 = ", Q3(X) print "max=", max(X) print "skew=", skew(X) print "kurt =", kurt(X)When the above code was run, it printed the following: python descstats.py mean = 4.84676942423 rms= 5.03778343707 hm= 4.32905902716 gm= 4.61426927173 min= 2.0131960908 Q1 = 4.13817781222 median= 4.92637729064 Q3 = 5.47954029648 max= 7.31749490871 skew= -0.233219987989 kurt = -0.280050258646 The quantile.py may be found in the article Statistics: Computing quantiles with Python.
TBD. lower and upper hinges.Some authors define lower and upper hinges as synonymous with Q1 and Q3 respectively but we will check with the more authoritative R software later.
R follows Tukey 1977. See boxplot.stats:
ReplyDeleteThe two ‘hinges’ are versions of the first and third quartile, i.e., close to quantile(x, c(1,3)/4). The hinges equal the quartiles for odd n (where n <- length(x)) and differ for even n. Whereas the quartiles only equal observations for n %% 4 == 1 (n = 1 mod 4), the hinges do so additionally for n %% 4 == 2 (n = 2 mod 4), and are in the middle of two observations otherwise.