## Sunday, August 15, 2010

### Python, Statistics: Stem and Leaf Part 4

I am publishing stem and leaf article here. Wordpress (at Digital Explorations) chokes on our latest work! (fixed) Our old stem and leaf code has been updated to include frequency counts. Here is the latest incarnation. Here is the latest incarnation, following or last past discussion in Part 3

"""
file     stem.py
UPDEPP, UP at Clarkfield
version  0.0.4   2010.08.16 with freq. counts on stems.
"""

from   math import *

def stemleafpairs(X, stempos= 0, leafwidth=1):
"""
X - data array
stempos   - position of last digit of stem,
from decimal point.
leafwidth - number of digits in leaves.

Return value:
a list of stem-leaf pairs
"""
stem10 = pow(10, stempos)
leaf10 = 10**leafwidth

output = []
for x in X:
y = x
if stempos > 0:
leaf, stem = modf (x * stem10)
else:
leaf, stem = modf(x/ stem10)

leaf = abs(leaf * leaf10)
#print x, int(stem), round(leaf) # decomment after testing!
output.append((int(stem), int(leaf) ))
return output

def stemleafplot(Pairs,scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 0):
"""
Given a list of Pairs (stem, leaf), prints it out.

Arguments:
Pairs     - liest of (stem-leaf) pairs
scale     - 1 for standard stems, 2 with upper half-stems.
sortQ     - True if data is to be sorted and false if unsorted.
stemwidth - printing width of stem
leafwidth - number of digits in display of leaves.
withcounts - 0 - no count,
1 - simple count.
2 - cumulative count.
"""
if sortQ:
Pairs.sort()

minstem, minleaf = min(Pairs)
print "minstem=", minstem
maxstem, maxleaf = max(Pairs)
print "maxstem=", maxstem

# Transform list into a dictionary.
stemleaves = {}
if scale == 1:
for (stem, leaf) in Pairs:
stem = str(stem)
if stem not in stemleaves:
stemleaves[stem]= [leaf]
else:
stemleaves[stem].append(leaf)
elif scale == 2:
half = int("5" + "0" * (leafwidth-1))
for (stem, leaf) in Pairs:
stem = str(stem)
if leaf >= half:
stem = stem + "*"
if stem in stemleaves:
stemleaves[stem].append(leaf)
else:
stemleaves[stem] = [leaf]

# the actual printing of output.
totcount = 0
for i in range(minstem, maxstem+1):
key = str(i)
count = 0
if key in stemleaves:
count = len(stemleaves[key])

totcount += count
if withcounts == 0:
print "%*s|" %(stemwidth,key),
elif withcounts == 1:
print "%*s(%4d)|" %(stemwidth,key, count),
elif withcounts == 2:
print "%*s(%4d)|" %(stemwidth,key, totcount),
if key in stemleaves:
leaves = ""
for leaf in stemleaves[key]:
leaves += "%0*d" %(leafwidth, int(leaf))+sep
print leaves
else:
print

if scale == 2:
key = key + "*"
print "%*s|" % (stemwidth, key),
if key in stemleaves:
leaves = ""
for leaf in stemleaves[key]:
leaves += "%0*d" %(leafwidth, int(leaf))+sep
print leaves
else:
print

def stem(X, stempos=1, scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 2):
Pairs = stemleafpairs (X, stempos, leafwidth)
stemleafplot(Pairs,scale=scale,sortQ=sortQ,sep = sep, stemwidth=stemwidth,leafwidth=leafwidth, withcounts= withcounts)

if __name__ =="__main__":
import scipy.stats as stat
X = stat.norm.rvs(size=1000)
stem(X)

When the above program runs with default arguments, it outputs the following:

-34( 1)| 6
-33( 1)|
-32( 2)| 1
-31( 2)|
-30( 2)|
-29( 2)|
-28( 4)| 08
-27( 4)|
-26( 5)| 4
-25( 7)| 77
-24( 10)| 422
-23( 13)| 260
-22( 19)| 694086
-21( 21)| 75
-20( 27)| 163143
-19( 28)| 5
-18( 33)| 36358
-17( 43)| 6893302148
-16( 52)| 322560070
-15( 59)| 1331906
-14( 70)| 49124950854
-13( 91)| 401253263854452827628
-12( 109)| 572452780211853243
-11( 130)| 077133985997876037339
-10( 150)| 04430092495280373731
-9( 175)| 4293411168169465118423909
-8( 190)| 818880143091624
-7( 224)| 5161410514534334310234657019034655
-6( 253)| 59607404249373790654201441626
-5( 291)| 53836210159491267372596227328789414461
-4( 329)| 52652987680976425075914021503791325718
-3( 354)| 2038247991214802390377294
-2( 396)| 520316774771912137458409442885171857745958
-1( 443)| 51714559553431520132437128841770795660895852361
0( 529)| 44036499255722225220796139361838505887904141332066633213167414864454127175508117523351
1( 569)| 5590562775582330092803462031362997581151
2( 604)| 37111480345836150687428470279322769
3( 652)| 446677471660096619850750047910077556028146374802
4( 687)| 27892444604915179645262316905991017
5( 713)| 34250730580367963818408036
6( 752)| 616035247612979989663910648454898469322
7( 791)| 548156635043052102545471356231163311052
8( 813)| 6445159984577578411322
9( 838)| 2022629067693963656545664
10( 863)| 5431874466621296418005519
11( 881)| 710747499048405915
12( 901)| 46507445580872987358
13( 917)| 1622476524280308
14( 929)| 335731472657
15( 944)| 669096613933089
16( 958)| 26144266812863
17( 968)| 6245893442
18( 972)| 5365
19( 982)| 5123787375
20( 985)| 584
21( 988)| 211
22( 990)| 00
23( 995)| 37063
24( 996)| 7
25( 997)| 7
26( 998)| 4
27( 998)|
28( 999)| 0
29( 999)|
30(1000)| 2

The code looks complicated, I may rewrite it later. I will appreciate someone else to simplify, simplify the code!