""" file stem.py author Ernesto Adorio, Ph.D. UPDEPP, UP at Clarkfield ernesto.adorio@gmail.com version 0.0.4 2010.08.16 with freq. counts on stems. """ from math import * def stemleafpairs(X, stempos= 0, leafwidth=1): """ X - data array stempos - position of last digit of stem, from decimal point. leafwidth - number of digits in leaves. Return value: a list of stem-leaf pairs """ stem10 = pow(10, stempos) leaf10 = 10**leafwidth output = [] for x in X: y = x if stempos > 0: leaf, stem = modf (x * stem10) else: leaf, stem = modf(x/ stem10) leaf = abs(leaf * leaf10) #print x, int(stem), round(leaf) # decomment after testing! output.append((int(stem), int(leaf) )) return output def stemleafplot(Pairs,scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 0): """ Given a list of Pairs (stem, leaf), prints it out. Arguments: Pairs - liest of (stem-leaf) pairs scale - 1 for standard stems, 2 with upper half-stems. sortQ - True if data is to be sorted and false if unsorted. stemwidth - printing width of stem leafwidth - number of digits in display of leaves. withcounts - 0 - no count, 1 - simple count. 2 - cumulative count. """ if sortQ: Pairs.sort() minstem, minleaf = min(Pairs) print "minstem=", minstem maxstem, maxleaf = max(Pairs) print "maxstem=", maxstem # Transform list into a dictionary. stemleaves = {} if scale == 1: for (stem, leaf) in Pairs: stem = str(stem) if stem not in stemleaves: stemleaves[stem]= [leaf] else: stemleaves[stem].append(leaf) elif scale == 2: half = int("5" + "0" * (leafwidth-1)) for (stem, leaf) in Pairs: stem = str(stem) if leaf >= half: stem = stem + "*" if stem in stemleaves: stemleaves[stem].append(leaf) else: stemleaves[stem] = [leaf] # the actual printing of output. totcount = 0 for i in range(minstem, maxstem+1): key = str(i) count = 0 if key in stemleaves: count = len(stemleaves[key]) totcount += count if withcounts == 0: print "%*s|" %(stemwidth,key), elif withcounts == 1: print "%*s(%4d)|" %(stemwidth,key, count), elif withcounts == 2: print "%*s(%4d)|" %(stemwidth,key, totcount), if key in stemleaves: leaves = "" for leaf in stemleaves[key]: leaves += "%0*d" %(leafwidth, int(leaf))+sep print leaves else: print if scale == 2: key = key + "*" print "%*s|" % (stemwidth, key), if key in stemleaves: leaves = "" for leaf in stemleaves[key]: leaves += "%0*d" %(leafwidth, int(leaf))+sep print leaves else: print def stem(X, stempos=1, scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 2): Pairs = stemleafpairs (X, stempos, leafwidth) stemleafplot(Pairs,scale=scale,sortQ=sortQ,sep = sep, stemwidth=stemwidth,leafwidth=leafwidth, withcounts= withcounts) if __name__ =="__main__": import scipy.stats as stat X = stat.norm.rvs(size=1000) stem(X)
When the above program runs with default arguments, it outputs the following:
-34( 1)| 6
-33( 1)|
-32( 2)| 1
-31( 2)|
-30( 2)|
-29( 2)|
-28( 4)| 08
-27( 4)|
-26( 5)| 4
-25( 7)| 77
-24( 10)| 422
-23( 13)| 260
-22( 19)| 694086
-21( 21)| 75
-20( 27)| 163143
-19( 28)| 5
-18( 33)| 36358
-17( 43)| 6893302148
-16( 52)| 322560070
-15( 59)| 1331906
-14( 70)| 49124950854
-13( 91)| 401253263854452827628
-12( 109)| 572452780211853243
-11( 130)| 077133985997876037339
-10( 150)| 04430092495280373731
-9( 175)| 4293411168169465118423909
-8( 190)| 818880143091624
-7( 224)| 5161410514534334310234657019034655
-6( 253)| 59607404249373790654201441626
-5( 291)| 53836210159491267372596227328789414461
-4( 329)| 52652987680976425075914021503791325718
-3( 354)| 2038247991214802390377294
-2( 396)| 520316774771912137458409442885171857745958
-1( 443)| 51714559553431520132437128841770795660895852361
0( 529)| 44036499255722225220796139361838505887904141332066633213167414864454127175508117523351
1( 569)| 5590562775582330092803462031362997581151
2( 604)| 37111480345836150687428470279322769
3( 652)| 446677471660096619850750047910077556028146374802
4( 687)| 27892444604915179645262316905991017
5( 713)| 34250730580367963818408036
6( 752)| 616035247612979989663910648454898469322
7( 791)| 548156635043052102545471356231163311052
8( 813)| 6445159984577578411322
9( 838)| 2022629067693963656545664
10( 863)| 5431874466621296418005519
11( 881)| 710747499048405915
12( 901)| 46507445580872987358
13( 917)| 1622476524280308
14( 929)| 335731472657
15( 944)| 669096613933089
16( 958)| 26144266812863
17( 968)| 6245893442
18( 972)| 5365
19( 982)| 5123787375
20( 985)| 584
21( 988)| 211
22( 990)| 00
23( 995)| 37063
24( 996)| 7
25( 997)| 7
26( 998)| 4
27( 998)|
28( 999)| 0
29( 999)|
30(1000)| 2
The code looks complicated, I may rewrite it later. I will appreciate someone else to simplify, simplify the code!
No comments:
Post a Comment