Sunday, August 15, 2010

Python, Statistics: Stem and Leaf Part 4

I am publishing stem and leaf article here. Wordpress (at Digital Explorations) chokes on our latest work! (fixed) Our old stem and leaf code has been updated to include frequency counts. Here is the latest incarnation. Here is the latest incarnation, following or last past discussion in Part 3

"""
file     stem.py
author   Ernesto Adorio, Ph.D.
         UPDEPP, UP at Clarkfield
         ernesto.adorio@gmail.com
version  0.0.4   2010.08.16 with freq. counts on stems.
"""


from   math import *


def stemleafpairs(X, stempos= 0, leafwidth=1):
    """
    X - data array
    stempos   - position of last digit of stem,
                  from decimal point.
    leafwidth - number of digits in leaves.

    Return value:
     a list of stem-leaf pairs
    """
    stem10 = pow(10, stempos)
    leaf10 = 10**leafwidth

    output = []
    for x in X:
        y = x
        if stempos > 0:
           leaf, stem = modf (x * stem10)
        else:
           leaf, stem = modf(x/ stem10)

        leaf = abs(leaf * leaf10)
        #print x, int(stem), round(leaf) # decomment after testing!
        output.append((int(stem), int(leaf) ))
    return output


def stemleafplot(Pairs,scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 0):
    """
    Given a list of Pairs (stem, leaf), prints it out.
    
    Arguments:
     Pairs     - liest of (stem-leaf) pairs
     scale     - 1 for standard stems, 2 with upper half-stems.
     sortQ     - True if data is to be sorted and false if unsorted.
     stemwidth - printing width of stem
     leafwidth - number of digits in display of leaves.
     withcounts - 0 - no count,
                  1 - simple count.
                  2 - cumulative count.
    """
    if sortQ:
        Pairs.sort()

    minstem, minleaf = min(Pairs)
    print "minstem=", minstem
    maxstem, maxleaf = max(Pairs)
    print "maxstem=", maxstem

    # Transform list into a dictionary.
    stemleaves = {}
    if scale == 1:
      for (stem, leaf) in Pairs:
       stem = str(stem)
       if stem not in stemleaves:
          stemleaves[stem]= [leaf]
       else:
          stemleaves[stem].append(leaf)
    elif scale == 2:
       half = int("5" + "0" * (leafwidth-1)) 
       for (stem, leaf) in Pairs:
           stem = str(stem)
           if leaf >= half:
              stem = stem + "*"
           if stem in stemleaves:
               stemleaves[stem].append(leaf)
           else:
               stemleaves[stem] = [leaf]

    # the actual printing of output.
    totcount = 0
    for i in range(minstem, maxstem+1):
        key = str(i)
        count = 0
        if key in stemleaves:
           count = len(stemleaves[key])
        
        totcount += count
        if withcounts == 0:
           print "%*s|" %(stemwidth,key),
        elif withcounts == 1:
           print "%*s(%4d)|" %(stemwidth,key, count),
        elif withcounts == 2:
           print "%*s(%4d)|" %(stemwidth,key, totcount),
        if key in stemleaves:
           leaves = ""
           for leaf in stemleaves[key]:
               leaves += "%0*d" %(leafwidth, int(leaf))+sep
           print leaves
        else:
           print 

        if scale == 2:
           key = key + "*"
           print "%*s|" % (stemwidth, key),
           if key in stemleaves:
               leaves = ""
               for leaf in stemleaves[key]:
                   leaves += "%0*d" %(leafwidth, int(leaf))+sep
               print leaves
           else:
               print

def stem(X, stempos=1, scale=1,sortQ=False,sep = "", stemwidth=4,leafwidth=1, withcounts= 2):
    Pairs = stemleafpairs (X, stempos, leafwidth)
    stemleafplot(Pairs,scale=scale,sortQ=sortQ,sep = sep, stemwidth=stemwidth,leafwidth=leafwidth, withcounts= withcounts)

if __name__ =="__main__":    
   import scipy.stats as stat
   X = stat.norm.rvs(size=1000)
   stem(X)

When the above program runs with default arguments, it outputs the following:




-34( 1)| 6
-33( 1)|
-32( 2)| 1
-31( 2)|
-30( 2)|
-29( 2)|
-28( 4)| 08
-27( 4)|
-26( 5)| 4
-25( 7)| 77
-24( 10)| 422
-23( 13)| 260
-22( 19)| 694086
-21( 21)| 75
-20( 27)| 163143
-19( 28)| 5
-18( 33)| 36358
-17( 43)| 6893302148
-16( 52)| 322560070
-15( 59)| 1331906
-14( 70)| 49124950854
-13( 91)| 401253263854452827628
-12( 109)| 572452780211853243
-11( 130)| 077133985997876037339
-10( 150)| 04430092495280373731
-9( 175)| 4293411168169465118423909
-8( 190)| 818880143091624
-7( 224)| 5161410514534334310234657019034655
-6( 253)| 59607404249373790654201441626
-5( 291)| 53836210159491267372596227328789414461
-4( 329)| 52652987680976425075914021503791325718
-3( 354)| 2038247991214802390377294
-2( 396)| 520316774771912137458409442885171857745958
-1( 443)| 51714559553431520132437128841770795660895852361
0( 529)| 44036499255722225220796139361838505887904141332066633213167414864454127175508117523351
1( 569)| 5590562775582330092803462031362997581151
2( 604)| 37111480345836150687428470279322769
3( 652)| 446677471660096619850750047910077556028146374802
4( 687)| 27892444604915179645262316905991017
5( 713)| 34250730580367963818408036
6( 752)| 616035247612979989663910648454898469322
7( 791)| 548156635043052102545471356231163311052
8( 813)| 6445159984577578411322
9( 838)| 2022629067693963656545664
10( 863)| 5431874466621296418005519
11( 881)| 710747499048405915
12( 901)| 46507445580872987358
13( 917)| 1622476524280308
14( 929)| 335731472657
15( 944)| 669096613933089
16( 958)| 26144266812863
17( 968)| 6245893442
18( 972)| 5365
19( 982)| 5123787375
20( 985)| 584
21( 988)| 211
22( 990)| 00
23( 995)| 37063
24( 996)| 7
25( 997)| 7
26( 998)| 4
27( 998)|
28( 999)| 0
29( 999)|
30(1000)| 2



The code looks complicated, I may rewrite it later. I will appreciate someone else to simplify, simplify the code!

No comments:

Post a Comment