* Calculate IV for binned variables;
%macro IVcal_qcbnum(dsn=, tgt=, qvar=, keyv=);
data &dsn.;
set &dsn.;
TargetBinary = &tgt.;
good = &tgt.;
bad = 1-&tgt.;
run;
proc sort data = &dsn.; by &keyv.; run;
proc means data = &dsn. missing noprint;
class &keyv.;
var good bad &qvar.;
output out = &dsn._mean sum(good)=goodct sum(bad)=badct mean(&qvar.)=x_mean median(&qvar.)=x_median;
run;
proc sql noprint;
select goodct, badct into :_totgood, :_totbad
from &dsn._mean
where _type_ = 0;
quit;
/*%put <<<_totgood = &_totgood. and _totbad = &_totbad.>>>;*/
data &dsn._mean0 (drop=_type_ rename=(&keyv. = vgroup _freq_=volume));
/*retain IV_cum 0;*/
retain varname &keyv. _freq_ pvolume goodct badct pgood pbad goodrate badrate IV WOE logodds tgt_rate;
set &dsn._mean ;
format pvolume goodrate badrate pgood pbad percent10.2;
format logodds WOE IV 8.3;
format tgt_rate percent10.2;
if _type_=1;
varname = "&qvar.";
pvolume = _freq_/(&_totgood. + &_totbad.);
goodrate = goodct/(goodct+badct);
badrate = badct/(goodct+badct);
if goodrate ne 0 and badrate ne 0 then logodds = log(goodrate/badrate);
else logodds = .;
pgood = goodct/&_totgood.;
pbad = badct/&_totbad.;
WOE = 0;
if (pbad > 0 and pgood > 0) then WOE = log(pgood/pbad);
IV = (pgood-pbad)*woe;
Tgt_Rate = goodrate;
/*IV_cum = IV_cum + IV;*/
run;
proc sql noprint;
select sum(IV), count(*) into :&qvar._IV_tot, :&qvar._numgrp
from &dsn._mean0
quit;
data &dsn._IV;
varname = "&qvar.";
IV = &&&qvar._IV_tot.;
Num_Group = &&&qvar._numgrp.;
run;
%if %sysfunc(exist(&dsn._IV)) %then %do;
data IV;
set IV &dsn._IV;
run;
%end;
%if %sysfunc(exist(&dsn._mean0)) %then %do;
data vargroup;
set vargroup &dsn._mean0;
run;
%end;
/*Delete temperary data*/
proc datasets lib=work nolist;
delete &dsn._mean &dsn._mean0 &dsn._IV;
run;
quit;
%mend IVcal_qcbnum;
%macro IVcal_qcbnum(dsn=, tgt=, qvar=, keyv=);
data &dsn.;
set &dsn.;
TargetBinary = &tgt.;
good = &tgt.;
bad = 1-&tgt.;
run;
proc sort data = &dsn.; by &keyv.; run;
proc means data = &dsn. missing noprint;
class &keyv.;
var good bad &qvar.;
output out = &dsn._mean sum(good)=goodct sum(bad)=badct mean(&qvar.)=x_mean median(&qvar.)=x_median;
run;
proc sql noprint;
select goodct, badct into :_totgood, :_totbad
from &dsn._mean
where _type_ = 0;
quit;
/*%put <<<_totgood = &_totgood. and _totbad = &_totbad.>>>;*/
data &dsn._mean0 (drop=_type_ rename=(&keyv. = vgroup _freq_=volume));
/*retain IV_cum 0;*/
retain varname &keyv. _freq_ pvolume goodct badct pgood pbad goodrate badrate IV WOE logodds tgt_rate;
set &dsn._mean ;
format pvolume goodrate badrate pgood pbad percent10.2;
format logodds WOE IV 8.3;
format tgt_rate percent10.2;
if _type_=1;
varname = "&qvar.";
pvolume = _freq_/(&_totgood. + &_totbad.);
goodrate = goodct/(goodct+badct);
badrate = badct/(goodct+badct);
if goodrate ne 0 and badrate ne 0 then logodds = log(goodrate/badrate);
else logodds = .;
pgood = goodct/&_totgood.;
pbad = badct/&_totbad.;
WOE = 0;
if (pbad > 0 and pgood > 0) then WOE = log(pgood/pbad);
IV = (pgood-pbad)*woe;
Tgt_Rate = goodrate;
/*IV_cum = IV_cum + IV;*/
run;
proc sql noprint;
select sum(IV), count(*) into :&qvar._IV_tot, :&qvar._numgrp
from &dsn._mean0
quit;
data &dsn._IV;
varname = "&qvar.";
IV = &&&qvar._IV_tot.;
Num_Group = &&&qvar._numgrp.;
run;
%if %sysfunc(exist(&dsn._IV)) %then %do;
data IV;
set IV &dsn._IV;
run;
%end;
%if %sysfunc(exist(&dsn._mean0)) %then %do;
data vargroup;
set vargroup &dsn._mean0;
run;
%end;
/*Delete temperary data*/
proc datasets lib=work nolist;
delete &dsn._mean &dsn._mean0 &dsn._IV;
run;
quit;
%mend IVcal_qcbnum;