Sql server 2012 甚至将一个组拆分到1%以内

Sql server 2012 甚至将一个组拆分到1%以内,sql-server-2012,ssms-2016,Sql Server 2012,Ssms 2016,我的任务是将一组客户分成两个相等的组,分别对应于每个门店位置。请求的结果集将使每个门店位置的两个组在客户数量上彼此相差不超过1%,在订单数量上彼此相差不超过1%,在订购金额上彼此相差不超过1% 下面是我提出的代码,它工作得相当好,大多数时候都能得到预期的结果,但有时我认为由于组中存在异常值,%将比1%更差 If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders Select StoreID

我的任务是将一组客户分成两个相等的组,分别对应于每个门店位置。请求的结果集将使每个门店位置的两个组在客户数量上彼此相差不超过1%,在订单数量上彼此相差不超过1%,在订购金额上彼此相差不超过1%

下面是我提出的代码,它工作得相当好,大多数时候都能得到预期的结果,但有时我认为由于组中存在异常值,%将比1%更差

If OBJECT_ID('tempdb.dbo.#Orders')  IS NOT NULL     DROP TABLE #Orders

Select
     StoreID
    ,CustomerID
    ,Sum(OrderID) as Orders
    ,Sum(OrderAmount) as AmountSold

Into #Orders

From CustomerOrders

Group by StoreID,CustomerID

IF OBJECT_ID('tempdb.dbo.#OrderRanking')    IS NOT NULL     DROP TABLE #OrderRanking

Select
     O.*
    ,ROW_NUMBER() Over(Partition by StoreID Order by AmountSold, Orders) as Ranking

Into #OrderRanking

From #Orders as O


Select
     R.StoreID
    ,Count(CustomerID) as CustomerCount
    ,Sum(R.Orders) as Orders
    ,Sum(R.AmountSold) as Amountsold
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'

From #OrderRanking as R

Group by
     R.StoreID
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End
有没有更好的方法来分割组以确保1%的差异?或者是一种通过几个不同的分割循环直到找到1%的方法?如果循环需要一个故障保护,以防止在不可能拆分的情况下出现无限循环,比如在x循环之后,只进行最近的拆分

我正在使用SQL Server 2012和SSMS 2016。谢谢你能提供的帮助

编辑: 我曾试图将代码转换成与公司无关的东西,但我把代码弄乱了。我意识到了这一点,并对代码进行了调整,以显示真正受欢迎的内容

编辑2:我自己也取得了一些进展,想更新这个问题

所以我在做更多的工作,我能够让它在每次运行代码时按随机顺序排序,并让它显示每个组的方差。现在我想添加的是一种循环X次的方法,并保持总方差最低的一个。这个周末我可能会尝试更多的东西。但现在,下面是我提到的新代码

If OBJECT_ID('tempdb.dbo.#Orders')  IS NOT NULL     DROP TABLE #Orders

Select
     StoreID
    ,CustomerID
    ,Sum(OrderID) as Orders
    ,Sum(OrderAmount) as AmountSold
    ,Rand() as Random

Into #Orders

From CustomerOrders

Group by StoreID,CustomerID

IF OBJECT_ID('tempdb.dbo.#OrderRanking')    IS NOT NULL     DROP TABLE #OrderRanking

Select
     O.*
    ,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking

Into #OrderRanking

From #Orders as O


If OBJECT_ID('tempdb.dbo.#Split')   IS NOT NULL     DROP TABLE #Split

Select
     R.StoreID
    ,Count(CustomerID) as CustomerCount
    ,Sum(R.Orders) as Orders
    ,Sum(R.AmountSold) as Amountsold
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'

Into #Split

From #OrderRanking as R

Group by
     R.StoreID
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End


Select
     S.StoreID
     ,((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100 as CustomerCountVar
     ,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100 as OrderVar
    ,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100 as AmountsoldVar


From #Split as S

Group by S.StoreID

所以,我们真的不可能像大家预期的那样始终保持在1%以内,但正如我所说的,我们可以尝试在X次尝试后尽可能接近。我已经想出了如何做到这一点。下面是我目前使用的代码,设置为10次尝试,但可以更改为任何适合业务的数字

If OBJECT_ID('tempdb.dbo.#TestB')       IS NOT NULL     DROP TABLE #TestB
Create Table #TestB
(
 StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)

If OBJECT_ID('tempdb.dbo.#BestPrep')        IS NOT NULL     DROP TABLE #BestPrep
Create Table #BestPrep
(
 StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)


Declare @Giveup int

Set     @GiveUp = 10






WHILE @GiveUp > 0
BEGIN 


If OBJECT_ID('tempdb.dbo.#Orders')  IS NOT NULL     DROP TABLE #Orders

Select
     StoreID
    ,CustomerID
    ,Sum(OrderID) as Orders
    ,Sum(OrderAmount) as AmountSold
    ,Rand() as Random

Into #Orders

From CustomerOrders

Group by StoreID,CustomerID

IF OBJECT_ID('tempdb.dbo.#OrderRanking')    IS NOT NULL     DROP TABLE #OrderRanking

Select
     O.*
    ,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking

Into #OrderRanking

From #Orders as O


If OBJECT_ID('tempdb.dbo.#Split')   IS NOT NULL     DROP TABLE #Split

Select
     R.StoreID
    ,Count(CustomerID) as CustomerCount
    ,Sum(R.Orders) as Orders
    ,Sum(R.AmountSold) as Amountsold
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'

Into #Split

From #OrderRanking as R

Group by
     R.StoreID
    ,Case When Ranking%2 = 0 Then 'A' Else 'B' End

If OBJECT_ID('Tempdb.dbo.#Var')     IS NOT NULL     DROP TABLE #Var

Select
     S.StoreID

    ,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100) as CustomerCountVar

    ,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100) as OrderVar

    ,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as AmountsoldVar

    ,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100) 
    +
     ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
        / Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as CombinedVar

INTO #Var    

From #Split as S

Group by S.StoreID

If Exists (Select * From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)
If Object_ID('tempdb.dbo.#TestA')       IS NOT NULL     DROP TABLE #TestA

Select
 A.StoreID 
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar

Into #TestA

From #OrderRanking as A
    Join #var as V
        on A.StoreID = V.StoreID

Where A.StoreID in 
(Select StoreID From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)


Insert Into #TestB

Select
 A.StoreID 
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,A.CombinedVar


From #TestA as A
    Left Join #TestB as B
        on      A.CustomerID = B.CustomerID

Where
        B.CustomerID is null


Insert Into #BestPrep

Select
 A.StoreID 
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar

From #OrderRanking as A
    Join #Var as V
        on A.StoreID = V.StoreID
    Left Join #BestPrep as B
        on      A.CustomerID = B.CustomerID
            and V.CombinedVar > B.CombinedVar

Where
        B.CustomerID is null


Set @Giveup = @Giveup-1



END




If Object_ID('tempdb.dbo.#bestPrep2')       IS NOT NULL DROP TABLE #bestPrep2
Select
     A.StoreID 
    ,Min(CombinedVar) as CombinedVar

Into #BestPrep2

From #BestPrep as A

Group by
     A.StoreID 

Select A.*

From #BestPrep as A
    Join #BestPrep2 as B
        on      A.StoreID = B.StoreID
            and A.CombinedVar = B.CombinedVar
Union
Select * From #TestB

听起来是一个奇怪的要求,这只是一个技术练习还是一个现实世界的要求?您是否有固定的数据集可供使用,或者它是否随时间而变化?正如你所说,任何数据集中的离群值都可能使结果偏离1%,我不确定你是否能保证。这是一个真实世界的请求,数据不断变化。我同意,由于离群值,1%可能无法保证,这就是我对循环X次和选择最接近1%的想法的想法。只是不知道如何编码。我不知道我是否完全理解你的需求。。。但使用RANK可能比使用Density_RANK更好,甚至可以创建一个单独的排名für销售量和订单,然后取平均值。你也可以看看NTILE。。。