/*-------------------------------------------------------------------* | Name: sieve.sas | | Title: Sieve diagrams for two-way tables | | | | Ref: Reidwyl & Schuepbach (1983). | | | *-------------------------------------------------------------------* * Author: Michael Friendly * * Created: 14 Apr 1991 14:12:14 (c) 1991-1998 * * Revised: 13 May 2004 12:05:13 * * Version: 1.4 * * 1.2 Add colors global variable * * 1.3 Added filltype='OBSP' to print cell freq in cell * * 1.4 Made colors consistent with mosaics * * Default font now depends on device driver * * Added global HTEXT variable * * Upcased FILLTYPE and MARGINS * * * * From ``Visualizing Categorical Data'', Michael Friendly (2000) * *-------------------------------------------------------------------*/ /*= =Description: The SIEVE program is a collection of SAS/IML modules for drawing sieve (or parquet) diagrams for a two-way contingency table. =Usage: proc iml; %include iml(sieve); run sieve( f, vnames, lnames, title ); ==Parameters: The required parameters for the RUN SIEVE statement are: * f two-way contingency table, size r x c * vnames variable names, a 1x2 character matrix. vnames[,1]=row variable, vnames[,2]=column variable * lnames category names, a 2 x max(r,c) character matrix. lnames[1,]=row categories, vnames[2,]=column categories. * title plot title ==Global variables: * filltype 'OBS': fill cells in proportion to observed frequencies 'OBSP': like OBS, but also write obs. freq. in cell 'DEV': fill in proportion to deviations from independence. 'EXL': no fill, write expected frequency in cell 'EXP': expfill, write expected frequency in cell * margins '' : nothing in margins 'TOTALS': row/col totals in margins * font font for text * colors names of two colors to use for the positive and negative residuals. [Default: {BLUE RED}] * htext height for text labels =*/ start sieve (f, vnames, lnames, title ) global(filltype, margins, colors, gout, name, font, htext); if type(filltype) ^= 'C' then filltype='OBS'; if type(margins ) ^= 'C' then margins =''; if type(colors) ^='C' then colors= {BLUE RED}; if type(gout ) ^= 'C' then gout='WORK.GSEG'; if type(name ) ^= 'C' then name='SEIVE'; if type(lnames ) = 'N' then lnames = trim(left(char(lnames))); if type(htext ) ^= 'N' then htext=1.1; margins = upcase(margins); filltype = upcase(filltype); *-- Set default font based on device driver name; if type(font ) ^= 'C' then do; call execute('device = upcase("&sysdevic");'); if index(device,'PS') > 0 then font= 'hwpsl009'; /* Helvetica for PS drivers */ else /* if device='WIN' then */ font = 'SWISS'; end; filltype = upcase(filltype); margins = upcase(margins); verbose=0; print filltype margins colors htext; *show filltype; nr= nrow(f); nc= ncol(f); nrc=nr#nc; r = f[,+]; * row totals; c = f[+,]; * col totals; n = c[+]; * grand total; e = r * c / n; * expected frequencies; d = (f - e) / sqrt(e); * standard deviates; print "Observed Frequencies", f [r=(lnames[1,]) c=(lnames[2,]) format=7.0]; print "Expected Frequencies", e [r=(lnames[1,]) c=(lnames[2,]) format=7.1]; print "Standardized Pearson Deviates", d [r=(lnames[1,]) c=(lnames[2,]) format=7.2]; chisq = chisq(f,e); df = (nr-1)#(nc-1); prob = 1 - probchi(chisq,df); print "Test of Independence", df ' ' chisq[r={'G.F.' 'L.R.'} format=9.3] prob [format=8.4]; rl= (100-nr+1) # r / n; cl= (100-nc+1) # c / n; cy = 101; do i = 1 to nr; cx = 0; cy = cy - rl[i,] - 1; do j = 1 to nc; if j= 1 then cx = 0; else cx = sum(cl[,1:(j-1)]) + j-1; boxes = boxes // ( cx || cy || cl[j] || rl[i] ); end; end; * print boxes[c={'BotX' 'BotY' 'LenX' 'LenY'} format=8.3]; call gstart(gout); call gopen(name,1); call gwindow({ -20 -20 110 110}); if margins = 'TOTALS' /* y-values for col labels */ then lcl = {-11 -17 -6}; /* lnames, vnames, totals */ else lcl = {-6 -13}; *-- category names; height = htext; call gset('FONT',font); lrx = 15; run gheight(lnames,font,lrx-1,1.75, len,height); * print lnames len[format=8.2] ; *-- row labels; lry = boxes[nc#(1:nr),2] + rl/2; lrx = repeat((-lrx), nr, 1); labelx = labelx // ( lrx || lry || repeat((0||height),nr,1) ); labels = labels // ( shape(trim(lnames[1,]), nr,1) ); *-- col labels; lcx = boxes[1:nc,1] + (cl/2)`; lcy = repeat(lcl[1], nc, 1); labelx = labelx // ( (lcx-(len[2,1:nc]`)/2) || lcy || repeat((0||height),nc,1) ); labels = labels // ( shape(trim(lnames[2,]), nc,1) ); *-- variable names; ht = 1.1 # height; call gstrlen(len,vnames,ht,font); center = ( 100 - len)/2; labelx = labelx // ( -16.5 || center[1] || 90 || ht) // ( center[2] || lcl[2] || 0 || ht); labels = labels // trim(vnames[1]) // trim(vnames[2]); *-- title; if length(title)>1 then do; ht = 1.8; run gheight(title,font,100,2.2,len,ht); center = ( 100 - len)/2; labelx = labelx // ( center || 105 || 0 || ht ); labels = labels // trim(title); end; *-- cell frequencies; if filltype='OBS' then do; f1 = loc(f = 1); if ncol(f1)>0 then value = repeat({"1"}, ncol(f1), 1); end; if filltype='EXL' | filltype='EXP' then do; f1 = 1:nrc; value = compress(char(shape(e, nrc, 1),5,1)); end; if filltype = 'OBSP' then do; f1 = 1:nrc; value = compress(char(shape(f, nrc, 1),5,0)); end; if /*filltype^='EXP' & */ ncol(f1)>0 then do; ht = height - .05; center = boxes[f1,] * ( I(2) // .5#I(2) ); call gstrlen(len, value,ht,font); center[,1] = center[,1] - len/2; labelx = labelx // ( center || repeat((0||ht), ncol(f1), 1) ); labels = labels // value; end; if margins = 'TOTALS' then do; ht = height - .05; center = repeat({101},nr,1) || lry; labelx = labelx // ( center || repeat((0||ht),nr,1) ); labels = labels // char(shape(r, nr, 1),4,0); value = char( shape( c||n , nc+1, 1), 4,0 ); call gstrlen(len, value,ht,font); lcx = lcx // {101}; len[nc+1,]=0; center = (lcx -len/2) || repeat(lcl[3],nc+1,1); labelx = labelx // ( center || repeat((0||ht),nc+1,1) ); labels = labels // value; end; reset fw=7; labelx = round(labelx,.001); * print labelx[c={'X' 'Y' 'Angle' 'Ht'}] labels[format=$25.]; d = shape(d, nrc,1); if filltype = 'EXL' then fill = shape({0}, nrc, 1); if filltype = 'DEV' then fill = shape( d , nrc, 1); if filltype = 'OBS' then fill = shape( f , nrc, 1); if filltype = 'OBSP' then fill = shape( f , nrc, 1); if filltype = 'EXP' then do; d = j(nrc,1,0); fill = shape( e , nrc, 1); end; run gboxes(boxes, labels, labelx, fill, d ); call gshow; finish; start gheight(label, font, maxlen, maxht, len, height); *-- determine height maxlen | max < .80#maxlen then do; height = min(maxht, height # .1 # int(10 # maxlen / max)); call gstrlen(len,label,height,font); end; finish; start gboxes( boxes, labels, labelx, fill,dev); call gopen('sieve'); *-- locate the 4 corners of each box; ll = boxes[,{1 2}]; lr = boxes[,{1 3}][,+] || boxes[,2] ; ul = boxes[,1] || boxes[,{2 4}][,+] ; ur = boxes[,{1 3}][,+] || boxes[,{2 4}][,+]; xy = ll || ul || ur || lr; max = max(ur[,1]) || max(ur[,2]); do i=1 to nrow(boxes); box = shape(xy[i,], 4); color='BLACK'; pat = 'EMPTY'; call gpoly( box[,1], box[,2], 1, color, pat, color); end; run fillbox(boxes,fill,dev); height = 1; do f=1 to nrow(labels); lxya = labelx[f,]; ht = lxya[,4]; labl = labels[f ]; call gscript( lxya[,1], lxya[,2], labl, lxya[,3], 0, ht); end; finish; start fillbox(boxes, fill, sign) global(filltype, colors); *-- fill each box proportional to abs(fill); w = boxes[,3]; h = boxes[,4]; totarea = sum(h#w); totfill = sum(abs(fill)); scale = 1; if totfill>.1 & filltype='DEV' /* ^all( fill = int(fill) ) */ then scale = sqrt(totarea / totfill); * print totarea totfill scale; if filltype='DEV' /* standardized deviations */ then do; s = sqrt(100 / (abs(fill)) ) ; s = s + 100#(abs(fill)<2) ; end; else s = sqrt(h # w / (abs(fill)#scale) ); *space between lines; nxl = int(w/s); nyl = int(h/s); * print 'Width, Height, Square', w h s fill nxl nyl; do i = 1 to nrow(boxes); if sign[i] >= 0 then do; line = 1; color = colors[1]; /* 'BLUE' */ end; else do; line = 3; color = colors[2]; /* 'RED ' */ end; if filltype='EXP' then line=34; if nxl[i]>0 then do; from = ((boxes[i,1] + (1:nxl[i]) # s[i]))`|| shape(boxes[i,2], nxl[i], 1); to = from[,1] || shape(sum(boxes[i,{2 4}]), nxl[i], 1); call gdrawl( from, to, line, color ); end; if nyl[i]>0 then do; from = shape(boxes[i,1], nyl[i], 1) || ((boxes[i,2] + (1:nyl[i]) # s[i]))` ; to = shape(sum(boxes[i,{1 3}]), nyl[i], 1) || from[,2] ; call gdrawl( from, to, line, color ); end; end; finish; start chisq(obs, fit); *-- Find Pearson and likelihood ratio chisquares; gf = sum ( (obs - fit)##2 / ( fit + (fit=0) ) ); lr = 2 # sum ( obs # log ( (obs+(obs=0)) / (fit + (fit=0)) ) ); return (gf // lr); finish;