查看原文
其他

异常值处理

66号学苑 2022-09-08

The following article is from 风控建模 Author Monica

本段代码是异常值处理,只要是大于99分位数的让其等于99分位数,小于1分位数的让其等于1分位数。

代码如下:

其中:


lib =逻辑库;

out=输出的数据集;

open_data=包含99分位数和1分位数的数据集;

input_data=要处理异常值的数据集。



data model_data6;

set model_data5;

drop &py_var;

run;

/*异常值处理:大于99分位的等于99分位,小于1分位的等于1分位*/

proc contents data = model_data6 noprintout =model_var6(keep = name type label);

run;

proc sql noprint;

select name into:num_varseparated by''frommodel_var6

where name not in("default","account_place_p","education","house_product_type","insurance_money_type","position","submit_time1","submit_time2","unit_type"and type = 1;

quit;

%put &num_var.;

/*创建199分位数*/

odslisting close;

ods results off;

odsoutput

extremeobs =num_extremobs2

quantiles =num_quantiles;

proc univariate data =model_data6;

var &num_var. ;

run;

ods output close;

ods results on;

odslisting;

 

data num_quantiles_99 num_quantiles_1;

set num_quantiles;

if index(quantile, "99%"then output num_quantiles_99;

if index (quantile,"1%"then output num_quantiles_1;

run;

/*给数值型变量加后缀*/

data model_var7;

set model_var6;

if type = 1then name1 = compress(name)||"_"||"2";

run;

/*拼接所有数值型变量99分位数表*/

procsql;

create table tmp_num_991 as

select a.name,name1,b.estimate

from model_var7 a leftjoin num_quantiles_99 b

on a.name = b.varname

wheretype =1and Estimate ^= .

;

createtabletmp_num_11 as

select a.name,name1,b.estimate

from model_var7 a leftjoin num_quantiles_1 b

on a.name = b.varname

wheretype =1and Estimate ^=.

;

quit;

procsortdata = tmp_num_11;

by name;

run;

procsortdata = tmp_num_991;

by name;

run;

data tmp_num;

merge tmp_num_11(rename = (Estimate =Estimate1)) tmp_num_991(rename = (Estimate = Estimate99)); 

attrib _all_ label='';

run;


%macro yq_ycz(lib =,out=,open_data=,input_data=);

%if%sysfunc(exist(&lib..&out.)) ne 0%then%do;

proc datasetslib = &lib.nolist;

delete &out.;

quit;

%end;

 

%let dsid = %sysfunc(open(&open_data.));

%if &dsid gt 0%then%do;

%let nobs = %sysfunc(attrn(&dsid,nobs));

%do i = 1%to &nobs;

    %let rc = %sysfunc(fetchobs(&dsid,&i));

    %let varnume1 = %sysfunc(varnum(&dsid,name));

    %let value1 = %sysfunc(getvarc(&dsid,&varnume1));

    %let varnume2 = %sysfunc(varnum(&dsid,name1));

    %let value2 = %sysfunc(getvarc(&dsid,&varnume2));

    %let varnume3 = %sysfunc(varnum(&dsid,estimate1));

    %let value3 = %sysfunc(getvarn(&dsid,&varnume3));/*1分位*/

    %let varnume4 = %sysfunc(varnum(&dsid,Estimate99));

    %let value4 = %sysfunc(getvarn(&dsid,&varnume4));/*99分位*/

    

data  out_&i.;

set &lib..&input_data.(keep = &value1.);

if &value1.<&value3. then &value2. =&value3.;

else if &value1.>&value4. then &value2. =&value4.;

else &value2. = &value1.;

keep &value2.;

label &value2. = "&value2.";

run;

 

%if &i.=1%then%do;

data &out.;

set  out_&i.;

keep &value2.;

run;

%end;

%else %do;

data &out.;

merge  &out. out_&i.;

run;

%end;

%end;

%let dsid = %sysfunc(close(&dsid));

%end;

%mend;

%yq_ycz(lib=work,out = outdata_num,open_data = tmp_num,input_data = model_data6);

 

procdatasetslib=work  nolist;

   delete out_: / memtype=data;

quit;


来源|风控建模

作者|Monica

更多精彩,戳这里:


|这是一份可以让你很牛很牛的风控技能包|

|信用评分卡课程(一):数据处理|

|信用评分卡课程(二):特征变量|

|最最常用的分类模型评价指标整理|

|概率图模型在反欺诈的应用|


您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存