Here is an incomplete Python program (we will come back to make it more comprehensive).
""" xfreqdescstats.py Descriptive statistics on frequency data (x, freq). We assume that freq consists of positive integers. version 0.0.1 aug.15, 2010 basic functions, w/out skew and kurtosis. author Dr. Ernesto Adorio UPDEPP(UP Clarkfield, Philippines) email ernesto.adorio@gmail.com """ from math import * def xfi(X,freq, i): """ element at position i for (data, freq) inputs WARNING: The X and its associated freq must be sorted by the elements in X. """ n = sum(freq) if i < 0 or i >= n: return None tot = 0 for x,f in zip(X,freq): tot += f if i < tot: return x return None def xfquantile(x,freq, q, qtype = 7, issorted = False): """ Args: x - input data freq - frequency data q - quantile qtype - algorithm issorted- True if x already sorted. Compute quantiles from input array x given q.For median, specify q=0.5. References: http://reference.wolfram.com/mathematica/ref/Quantile.html http://wiki.r-project.org/rwiki/doku.php?id=rdoc:stats:quantile Author: Ernesto P.Adorio Ph.D. UP Extension Program in Pampanga, Clark Field. """ if not issorted: xf = sorted(zip(X,freq)) y, yfreq=zip(*xf) #unzip!! else: y, yfreq = x, freq if not (1 <= qtype <= 9): return None # error! # Parameters for the Hyndman and Fan algorithm abcd = [(0, 0, 1, 0), # inverse empirical distrib.function., R type 1 (0.5, 0, 1, 0), # similar to type 1, averaged, R type 2 (0.5, 0, 0, 0), # nearest order statistic,(SAS) R type 3 (0, 0, 0, 1), # California linear interpolation, R type 4 (0.5, 0, 0, 1), # hydrologists method, R type 5 (0, 1, 0, 1), # mean-based estimate(Weibull method), (SPSS,Minitab), type 6 (1, -1, 0, 1), # mode-based method,(S, S-Plus), R type 7 (1.0/3, 1.0/3, 0, 1), # median-unbiased , R type 8 (3/8.0, 0.25, 0, 1) # normal-unbiased, R type 9. ] a, b, c, d = abcd[qtype-1] n = sum(yfreq) g, j = modf( a + (n+b) * q -1) if j < 0: return y[0] elif j > n: return y[n] j = int(floor(j)) if g == 0: return xfi(y, yfreq, j) else: return xfi(y, yfreq, j) + (xfi(y, yfreq, j+1)-xfi(y, yfreq, j))* (c + d * g) def xfmean(X, freq): """ Arithmetic mean of X. """ n = float(sum(freq)) return sum([x*f for x,f in zip(X, freq)])/ n def xfhm(X, freq): """ Harmonic mean of X """ n = float(sum(freq)) return n / sum([f/x for x, freq in zip(X, freq)]) def xfgm(X, freq): """ Geometric mean of X. """ n = float(sum(freq)) return exp( sum([f * log(x) for x,f in zip(X, freq)])/n) def xfrms(X, freq): """ Root mean square of x. """ n = float(sum(freq)) return sqrt(sum([x*x * f for x, f in zip(X, freq)])/ n) def xfpvar(X, freq): """ Pop Variance of X. """ n = float(sum(freq)) xbar = xfmean(X, freq) sum([f * (x - xbar)**2 for x, f in zip(X, freq)])/n def xfsvar(X, freq): """ Sample variance of X. """ n = float(sum(freq)) return xfpvar(X, freq) * n/(n-1.0) def xfpstd(X, freq): """ Pop. std. deviation of X """ return sqrt(xfpvar(X, freq)) def xfsstd(X, freq): """ Sample std. deviation of X """ return sqrt(xfsvar(X, freq)) def xfmedian(X, freq): return xfquantile(X, freq, 0.50) def xfQ1(X, freq): return xfquantile(X, freq, 0.25) def xfQ3(X, freq): return xfquantile(X, freq, 0.75) def xfdisttable(X, freq): """ Returns a table of tuples consisting of columns for X, freq, rf and crf. """ n = float(sum(freq)) rf = [f/n for f in freq] crf = rf[:] for i in range(1, len(rf)): # Scipy has cumsum! crf[i] += crf[i-1] return zip(X, freq, rf, crf) if __name__ == "__main__": X = [1, 2, 5, 7] freq = [5,10, 9, 11] #for i in range(sum(freq)): # print i, xfi(X, freq, i) print xfQ1(X, freq), xfmedian(X, freq) ,xfQ3(X, freq) for row in xfdisttable(X, freq): print row
We will add more functions to the above Python code and we shall always be doing tests to see if it is correct. Add comments for your wishlist and suggested modifications to make the code robust.
No comments:
Post a Comment