函数用Python中的下限和上限替换异常值 从sklearn导入数据集 作为pd进口熊猫 将numpy作为np导入 dt=数据集。加载糖尿病() data=pd.DataFrame(data=np.c_dt['data'],dt['target']],列= dt['feature_names']+['target']) data=data.drop('sex',axis=1) #平均值+-2sigma #函数计算变量的异常值 def输出1(x): mu=np.平均值(x) 西格玛=np.标准(x) LL=mu-2*西格玛#下限 UL=mu+2*sigma#上限 out=[1如果(a>=UL)|(a=mu+2*sigma和=

函数用Python中的下限和上限替换异常值 从sklearn导入数据集 作为pd进口熊猫 将numpy作为np导入 dt=数据集。加载糖尿病() data=pd.DataFrame(data=np.c_dt['data'],dt['target']],列= dt['feature_names']+['target']) data=data.drop('sex',axis=1) #平均值+-2sigma #函数计算变量的异常值 def输出1(x): mu=np.平均值(x) 西格玛=np.标准(x) LL=mu-2*西格玛#下限 UL=mu+2*sigma#上限 out=[1如果(a>=UL)|(a=mu+2*sigma和=,python,pandas,machine-learning,statistics,Python,Pandas,Machine Learning,Statistics,提前感谢!使用 使用 我认为clip在所有列中应用相同的LL和UL。我有没有办法使其与特定列的LL和UL一起工作?我认为clip在所有列中应用相同的LL和UL。我有没有办法使其与特定列的LL和UL一起工作? from sklearn import datasets import pandas as pd import numpy as np dt = datasets.load_diabetes() data = pd.DataFrame(data= np.c_[dt['data'], dt[

提前感谢!

使用

使用


我认为clip在所有列中应用相同的LL和UL。我有没有办法使其与特定列的LL和UL一起工作?我认为clip在所有列中应用相同的LL和UL。我有没有办法使其与特定列的LL和UL一起工作?
from sklearn import datasets
import pandas as pd
import numpy as np

dt = datasets.load_diabetes()
data = pd.DataFrame(data= np.c_[dt['data'], dt['target']],columns= 
dt['feature_names'] + ['target'] )
data = data.drop('sex', axis = 1)

# mean +- 2sigma
# function to calculate outlier of a variable
def out1(x):
    mu = np.average(x)
    sigma = np.std(x)
    LL = mu - 2*sigma # Lower limit 
    UL = mu + 2*sigma # Upper limit
    out = [1 if (a >= UL) | (a <= LL) else 0 for a in x]
    return(out)

# check #outliers in each variable
print(data.apply(out1).apply(sum))


# Function to Replace outlier with LL / UL

def out_impute(x):
    mu = np.average(x)
    sigma = np.std(x)
    LL = mu - 2*sigma # Lower limit 
    UL = mu + 2*sigma # Upper limit
    xnew = "Enter Code Here"
    return(xnew)

data1 = data.apply(out_impute) # Create new data with inputed values
LL = mu - 2*sigma # Lower limit 
UL = mu + 2*sigma # Upper limit
df['data'].clip(LL, UL)