/****************************************************************/ /* S A S P R O G R A M */ /* */ /* NAME: Bill StJohn /* TITLE: batters2.sas /* PRODUCT: SAS /* SYSTEM: UNIX /* KEYS: /* PROCS: PRINT mean, univariate, freq, chart */ /* DATA: batter2.dat */ /* */ /* REF: */ /* MISC: */ /* DESC: This SAS program maniputlates data from a file listing /* observances of U.S. pro baseball players (non-pitchers */ /* who were born in California or Pennsylvania. /* All but one of the PROCs have been commented out; the /* viewers are encouraged to download this file and /* the data file (batters2.dat) and alter the comments /* to run the other procs. /* I got the data from a web site: /* /* http://www.baseball-reference.com/bio/ /* */ /****************************************************************/ options linesize=140 pagesize=200; data nodate batters2; filename batters2 './batters2.dat'; infile batters2; /*var yr_1st;*/ input st $ 1-2 /* player's home state */ na $ 3-22 /* player's name */ yr_1st 24-27 /* career first year */ dec1st 24-26 /* career first decade */ yr_last 29-32 /* career last year */ ab 38-43 /* career at bats */ hr 62-65 /* career home runs */ rbi 66-70 /* career rbi */ bat_avg 83-86; /* career batting average */ /* bp $ 109-130; */ term = (yr_last - yr_1st); /* term = career length in years. */ hr_rate = hr / ab * 100; /* hr_rate = (Home runs per at-bats) * 100 */ if ab < 100 | term < 5 then delete; /* Disregard all players with fewer than */ /* 100 ab-bats or careers shorter than */ /* five years. */ if yr_1st < 1950 then post1950='PRE ''50'; /* Indicate whether player began */ else post1950='POST ''50'; /* career before or after 1950. */ run; /***********************************************/ /* NOTE: Several of the following PROCs (procedures) are commented out. */ /* Feel free to add or remove comments based */ /* on which procs you want to test. */ /***********************************************/ /* proc print data=batters2; run; proc means data=batters2 mean minimum maximum maxdec=3; class st post1950; var bat_avg hr_rate; title1 'COMPARING BATTING AVERAGES & HOME RUNS AMONG'; title2 'PLAYERS BORN IN CALIF. & PENN., SUB-GROUPING'; title3 'BY PRE- OR POST-1950 CAREER START.'; run; proc univariate data=batters2 plot; var bat_avg; id st; run; proc freq data=batters2; tables dec1st term; label dec1st = 'Rookie Decade' term = 'Years played'; run; proc chart data=batters2; hbar st / group=post1950; label bat_avg='batting avg.' st='state'; run; */ proc chart data=batters2; hbar bat_avg / group=st midpoints= 0 to .4 by .025; label st='state' bat_avg='Batting avg.'; title1 'COMPARISON OF BATTING AVERAGES AMONG'; title2 'PLAYERS BORN IN TWO U.S. STATES:'; title3 'CALIFORNIA AND PENNSYLVANIA'; run;