"""
file stem.py
author Ernesto Adorio, Ph.D.
UPDEPP, UP at Clarkfield
ernesto.adorio@gmail.com
version 0.0.4 2010.08.16 with freq. counts on stems.
"""
from math import *
def stemleafpairs(X, stempos= 0, leafwidth=1):
"""
X - data array
stempos - position of last digit of stem,
from decimal point.
leafwidth - number of digits in leaves.
Return value:
a list of stem-leaf pairs
"""
stem10 = pow(10, stempos)
leaf10 = 10**leafwidth
output = []
for x in X:
y = x
if stempos > 0:
leaf, stem = modf (x * stem10)
else:
leaf, stem = modf(x/ stem10)
leaf = abs(leaf * leaf10)
#print x, int(stem), round(leaf) # decomment after testing!
output.append((int(stem), int(leaf) ))
return output
def stemleafplot(Pairs,scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 0):
"""
Given a list of Pairs (stem, leaf), prints it out.
Arguments:
Pairs - liest of (stem-leaf) pairs
scale - 1 for standard stems, 2 with upper half-stems.
sortQ - True if data is to be sorted and false if unsorted.
stemwidth - printing width of stem
leafwidth - number of digits in display of leaves.
withcounts - 0 - no count,
1 - simple count.
2 - cumulative count.
"""
if sortQ:
Pairs.sort()
minstem, minleaf = min(Pairs)
print "minstem=", minstem
maxstem, maxleaf = max(Pairs)
print "maxstem=", maxstem
# Transform list into a dictionary.
stemleaves = {}
if scale == 1:
for (stem, leaf) in Pairs:
stem = str(stem)
if stem not in stemleaves:
stemleaves[stem]= [leaf]
else:
stemleaves[stem].append(leaf)
elif scale == 2:
half = int("5" + "0" * (leafwidth-1))
for (stem, leaf) in Pairs:
stem = str(stem)
if leaf >= half:
stem = stem + "*"
if stem in stemleaves:
stemleaves[stem].append(leaf)
else:
stemleaves[stem] = [leaf]
# the actual printing of output.
totcount = 0
for i in range(minstem, maxstem+1):
key = str(i)
count = 0
if key in stemleaves:
count = len(stemleaves[key])
totcount += count
if withcounts == 0:
print "%*s|" %(stemwidth,key),
elif withcounts == 1:
print "%*s(%4d)|" %(stemwidth,key, count),
elif withcounts == 2:
print "%*s(%4d)|" %(stemwidth,key, totcount),
if key in stemleaves:
leaves = ""
for leaf in stemleaves[key]:
leaves += "%0*d" %(leafwidth, int(leaf))+sep
print leaves
else:
print
if scale == 2:
key = key + "*"
print "%*s|" % (stemwidth, key),
if key in stemleaves:
leaves = ""
for leaf in stemleaves[key]:
leaves += "%0*d" %(leafwidth, int(leaf))+sep
print leaves
else:
print
def stem(X, stempos=1, scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 2):
Pairs = stemleafpairs (X, stempos, leafwidth)
stemleafplot(Pairs,scale=scale,sortQ=sortQ,sep = sep, stemwidth=stemwidth,leafwidth=leafwidth, withcounts= withcounts)
if __name__ =="__main__":
import scipy.stats as stat
X = stat.norm.rvs(size=1000)
stem(X)
When the above program runs with default arguments, it outputs the following:
-34( 1)| 6
-33( 1)|
-32( 2)| 1
-31( 2)|
-30( 2)|
-29( 2)|
-28( 4)| 08
-27( 4)|
-26( 5)| 4
-25( 7)| 77
-24( 10)| 422
-23( 13)| 260
-22( 19)| 694086
-21( 21)| 75
-20( 27)| 163143
-19( 28)| 5
-18( 33)| 36358
-17( 43)| 6893302148
-16( 52)| 322560070
-15( 59)| 1331906
-14( 70)| 49124950854
-13( 91)| 401253263854452827628
-12( 109)| 572452780211853243
-11( 130)| 077133985997876037339
-10( 150)| 04430092495280373731
-9( 175)| 4293411168169465118423909
-8( 190)| 818880143091624
-7( 224)| 5161410514534334310234657019034655
-6( 253)| 59607404249373790654201441626
-5( 291)| 53836210159491267372596227328789414461
-4( 329)| 52652987680976425075914021503791325718
-3( 354)| 2038247991214802390377294
-2( 396)| 520316774771912137458409442885171857745958
-1( 443)| 51714559553431520132437128841770795660895852361
0( 529)| 44036499255722225220796139361838505887904141332066633213167414864454127175508117523351
1( 569)| 5590562775582330092803462031362997581151
2( 604)| 37111480345836150687428470279322769
3( 652)| 446677471660096619850750047910077556028146374802
4( 687)| 27892444604915179645262316905991017
5( 713)| 34250730580367963818408036
6( 752)| 616035247612979989663910648454898469322
7( 791)| 548156635043052102545471356231163311052
8( 813)| 6445159984577578411322
9( 838)| 2022629067693963656545664
10( 863)| 5431874466621296418005519
11( 881)| 710747499048405915
12( 901)| 46507445580872987358
13( 917)| 1622476524280308
14( 929)| 335731472657
15( 944)| 669096613933089
16( 958)| 26144266812863
17( 968)| 6245893442
18( 972)| 5365
19( 982)| 5123787375
20( 985)| 584
21( 988)| 211
22( 990)| 00
23( 995)| 37063
24( 996)| 7
25( 997)| 7
26( 998)| 4
27( 998)|
28( 999)| 0
29( 999)|
30(1000)| 2
The code looks complicated, I may rewrite it later. I will appreciate someone else to simplify, simplify the code!
No comments:
Post a Comment