Sql server 文件表分隔字符串拆分
编辑-为了清楚起见,我将把目标句放在顶部。测试 我的问题是,是否有办法获得相同的性能 作为临时表而不使用临时表Sql server 文件表分隔字符串拆分,sql-server,tsql,user-defined-functions,filetable,Sql Server,Tsql,User Defined Functions,Filetable,编辑-为了清楚起见,我将把目标句放在顶部。测试 我的问题是,是否有办法获得相同的性能 作为临时表而不使用临时表 <?xml version="1.0" encoding="utf-16"?> <ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.2" Build="12.0.4100.1"
<?xml version="1.0" encoding="utf-16"?>
<ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.2" Build="12.0.4100.1" xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan">
<BatchSequence>
<Batch>
<Statements>
<StmtSimple StatementCompId="1" StatementEstRows="17486" StatementId="1" StatementOptmLevel="TRIVIAL" CardinalityEstimationModelVersion="120" StatementSubTreeCost="0.166487" StatementText="Select --TextData,
 dbo.udf_StringSplit(TextData, 1, '|'), dbo.udf_StringSplit(TextData, 2, '|'), dbo.udf_StringSplit(TextData, 3, '|'),
 dbo.udf_StringSplit(TextData, 4, '|'), dbo.udf_StringSplit(TextData, 5, '|'), dbo.udf_StringSplit(TextData, 6, '|'),
 dbo.udf_StringSplit(TextData, 7, '|')--, TextData
 From #Temp" StatementType="SELECT" QueryHash="0xC4D6F0215D332F3D" QueryPlanHash="0xC50CFAF9494B5DBE" RetrievedFromCache="true">
<StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
<QueryPlan DegreeOfParallelism="0" NonParallelPlanReason="CouldNotGenerateValidParallelPlan" CachedPlanSize="24" CompileTime="1" CompileCPU="1" CompileMemory="168">
<MemoryGrantInfo SerialRequiredMemory="0" SerialDesiredMemory="0" />
<OptimizerHardwareDependentProperties EstimatedAvailableMemoryGrant="838735" EstimatedPagesCached="419367" EstimatedAvailableDegreeOfParallelism="4" />
<RelOp AvgRowSize="28023" EstimateCPU="0.0017486" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Compute Scalar" NodeId="0" Parallel="false" PhysicalOp="Compute Scalar" EstimatedTotalSubtreeCost="0.166487">
<OutputList>
<ColumnReference Column="Expr1003" />
<ColumnReference Column="Expr1004" />
<ColumnReference Column="Expr1005" />
<ColumnReference Column="Expr1006" />
<ColumnReference Column="Expr1007" />
<ColumnReference Column="Expr1008" />
<ColumnReference Column="Expr1009" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<ComputeScalar>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="Expr1003" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(1),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(1)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1004" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(2),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(2)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1005" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(3),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(3)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1006" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(4),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(4)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1007" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(5),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(5)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1008" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(6),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(6)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1009" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(7),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(7)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<RelOp AvgRowSize="4011" EstimateCPU="0.0193131" EstimateIO="0.145426" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Table Scan" NodeId="1" Parallel="false" PhysicalOp="Table Scan" EstimatedTotalSubtreeCost="0.164739" TableCardinality="17486">
<OutputList>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<TableScan Ordered="false" ForcedIndex="false" ForceScan="false" NoExpandHint="false" Storage="RowStore">
<DefinedValues>
<DefinedValue>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</DefinedValue>
</DefinedValues>
<Object Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" IndexKind="Heap" Storage="RowStore" />
</TableScan>
</RelOp>
</ComputeScalar>
</RelOp>
</QueryPlan>
</StmtSimple>
</Statements>
</Batch>
</BatchSequence>
</ShowPlanXML>
我觉得这应该是一个简单的问题,但我被卡住了。我正在SQL2014中试验文件表。我知道有几种方法可以很好地工作,但目标是确定从文件表中提取文本子字符串的可行性
此测试有35000个文本文件,其中一行文本如下,每个文件平均包含100字节的非unicode文本
Aaa|Bbb|Ccc|Ddd|Eee|Fff|Ggg
所需的输出是每个文件一行,并将分隔字符串拆分为七列
我发现了一个快速字符串解析器函数,但与varchar列相比,在filestream上运行会对性能产生显著影响
运行此查询需要18秒。我试图让从filestream到varchar的转换只执行一次,但我认为调用UDF可能会导致每一行(文件)都发生这种情况
我试过将其作为视图、cte和子查询。唯一有帮助的是创建一个临时表。创建临时表需要1秒,查询需要1秒。因此,对于35k行,总查询时间为2秒,而不是18秒
Drop Table #Temp
(Select file_type, Convert(Varchar(8000),file_stream) TextData Into #Temp From HumanaInputFiles Where file_type = 'adr')
Select --TextData,
dbo.udf_StringSplit(TextData, 1, '|'), dbo.udf_StringSplit(TextData, 2, '|'), dbo.udf_StringSplit(TextData, 3, '|'),
dbo.udf_StringSplit(TextData, 4, '|'), dbo.udf_StringSplit(TextData, 5, '|'), dbo.udf_StringSplit(TextData, 6, '|'),
dbo.udf_StringSplit(TextData, 7, '|')--, TextData
From #Temp
我读过很多关于filetables和temp table与单查询性能的文章和博客,但我似乎无法理解。这可能与sargable或统计数据有关?任何建议都将不胜感激
这是我在MSDN博客/论坛上找到的UDF,是迄今为止我发现的最好的执行者
ALTER FUNCTION [dbo].[udf_StringSplit](
@TEXT varchar(8000)
,@COLUMN tinyint
,@SEPARATOR char(1)
)RETURNS varchar(8000)
AS
BEGIN
DECLARE @POS_START int = 1
DECLARE @POS_END int = CHARINDEX(@SEPARATOR, @TEXT, @POS_START)
WHILE (@COLUMN >1 AND @POS_END> 0)
BEGIN
SET @POS_START = @POS_END + 1
SET @POS_END = CHARINDEX(@SEPARATOR, @TEXT, @POS_START)
SET @COLUMN = @COLUMN - 1
END
IF @COLUMN > 1 SET @POS_START = LEN(@TEXT) + 1
IF @POS_END = 0 SET @POS_END = LEN(@TEXT) + 1
RETURN SUBSTRING (@TEXT, @POS_START, @POS_END - @POS_START)
END
这是临时表的执行计划
<?xml version="1.0" encoding="utf-16"?>
<ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.2" Build="12.0.4100.1" xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan">
<BatchSequence>
<Batch>
<Statements>
<StmtSimple StatementCompId="1" StatementEstRows="17486" StatementId="1" StatementOptmLevel="TRIVIAL" CardinalityEstimationModelVersion="120" StatementSubTreeCost="0.166487" StatementText="Select --TextData,
 dbo.udf_StringSplit(TextData, 1, '|'), dbo.udf_StringSplit(TextData, 2, '|'), dbo.udf_StringSplit(TextData, 3, '|'),
 dbo.udf_StringSplit(TextData, 4, '|'), dbo.udf_StringSplit(TextData, 5, '|'), dbo.udf_StringSplit(TextData, 6, '|'),
 dbo.udf_StringSplit(TextData, 7, '|')--, TextData
 From #Temp" StatementType="SELECT" QueryHash="0xC4D6F0215D332F3D" QueryPlanHash="0xC50CFAF9494B5DBE" RetrievedFromCache="true">
<StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
<QueryPlan DegreeOfParallelism="0" NonParallelPlanReason="CouldNotGenerateValidParallelPlan" CachedPlanSize="24" CompileTime="1" CompileCPU="1" CompileMemory="168">
<MemoryGrantInfo SerialRequiredMemory="0" SerialDesiredMemory="0" />
<OptimizerHardwareDependentProperties EstimatedAvailableMemoryGrant="838735" EstimatedPagesCached="419367" EstimatedAvailableDegreeOfParallelism="4" />
<RelOp AvgRowSize="28023" EstimateCPU="0.0017486" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Compute Scalar" NodeId="0" Parallel="false" PhysicalOp="Compute Scalar" EstimatedTotalSubtreeCost="0.166487">
<OutputList>
<ColumnReference Column="Expr1003" />
<ColumnReference Column="Expr1004" />
<ColumnReference Column="Expr1005" />
<ColumnReference Column="Expr1006" />
<ColumnReference Column="Expr1007" />
<ColumnReference Column="Expr1008" />
<ColumnReference Column="Expr1009" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<ComputeScalar>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="Expr1003" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(1),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(1)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1004" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(2),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(2)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1005" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(3),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(3)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1006" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(4),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(4)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1007" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(5),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(5)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1008" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(6),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(6)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1009" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([tempdb].[dbo].[#Temp].[TextData],(7),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(7)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<RelOp AvgRowSize="4011" EstimateCPU="0.0193131" EstimateIO="0.145426" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Table Scan" NodeId="1" Parallel="false" PhysicalOp="Table Scan" EstimatedTotalSubtreeCost="0.164739" TableCardinality="17486">
<OutputList>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<TableScan Ordered="false" ForcedIndex="false" ForceScan="false" NoExpandHint="false" Storage="RowStore">
<DefinedValues>
<DefinedValue>
<ColumnReference Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" Column="TextData" />
</DefinedValue>
</DefinedValues>
<Object Database="[tempdb]" Schema="[dbo]" Table="[#Temp]" IndexKind="Heap" Storage="RowStore" />
</TableScan>
</RelOp>
</ComputeScalar>
</RelOp>
</QueryPlan>
</StmtSimple>
</Statements>
</Batch>
</BatchSequence>
</ShowPlanXML>
这是视图的平面图
<?xml version="1.0" encoding="utf-16"?>
<ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.2" Build="12.0.4100.1" xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan">
<BatchSequence>
<Batch>
<Statements>
<StmtSimple StatementCompId="1" StatementEstRows="17486" StatementId="1" StatementOptmLevel="FULL" StatementOptmEarlyAbortReason="GoodEnoughPlanFound" CardinalityEstimationModelVersion="120" StatementSubTreeCost="0.905265" StatementText="Select --TextData,
 dbo.udf_StringSplit(TextData, 1, '|'), dbo.udf_StringSplit(TextData, 2, '|'), dbo.udf_StringSplit(TextData, 3, '|'),
 dbo.udf_StringSplit(TextData, 4, '|'), dbo.udf_StringSplit(TextData, 5, '|'), dbo.udf_StringSplit(TextData, 6, '|'),
 dbo.udf_StringSplit(TextData, 7, '|')--, TextData
 From vAddresses" StatementType="SELECT" QueryHash="0xB4F8A0B288802C4E" QueryPlanHash="0x28DA02D774B1AF53" RetrievedFromCache="true">
<StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
<QueryPlan DegreeOfParallelism="0" NonParallelPlanReason="CouldNotGenerateValidParallelPlan" CachedPlanSize="32" CompileTime="3" CompileCPU="3" CompileMemory="520">
<Warnings>
<PlanAffectingConvert ConvertIssue="Cardinality Estimate" Expression="CONVERT(varchar(8000),[DmProd01].[dbo].[HumanaInputFiles].[file_stream],0)" />
</Warnings>
<MemoryGrantInfo SerialRequiredMemory="0" SerialDesiredMemory="0" />
<OptimizerHardwareDependentProperties EstimatedAvailableMemoryGrant="838735" EstimatedPagesCached="419367" EstimatedAvailableDegreeOfParallelism="4" />
<RelOp AvgRowSize="28023" EstimateCPU="0.0017486" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Compute Scalar" NodeId="0" Parallel="false" PhysicalOp="Compute Scalar" EstimatedTotalSubtreeCost="0.905265">
<OutputList>
<ColumnReference Column="Expr1004" />
<ColumnReference Column="Expr1005" />
<ColumnReference Column="Expr1006" />
<ColumnReference Column="Expr1007" />
<ColumnReference Column="Expr1008" />
<ColumnReference Column="Expr1009" />
<ColumnReference Column="Expr1010" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<ComputeScalar>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="Expr1004" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(1),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(1)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1005" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(2),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(2)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1006" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(3),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(3)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1007" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(4),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(4)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1008" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(5),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(5)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1009" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(6),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(6)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
<DefinedValue>
<ColumnReference Column="Expr1010" />
<ScalarOperator ScalarString="[DmProd01].[dbo].[udf_StringSplit]([Expr1011],(7),'|')">
<UserDefinedFunction FunctionName="[DmProd01].[dbo].[udf_StringSplit]">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="(7)" />
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="'|'" />
</ScalarOperator>
</UserDefinedFunction>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<RelOp AvgRowSize="4019" EstimateCPU="0.0034972" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Compute Scalar" NodeId="1" Parallel="false" PhysicalOp="Compute Scalar" EstimatedTotalSubtreeCost="0.88673">
<OutputList>
<ColumnReference Column="Expr1011" />
</OutputList>
<ComputeScalar>
<DefinedValues>
<DefinedValue>
<ColumnReference Column="Expr1011" />
<ScalarOperator ScalarString="CONVERT(varchar(8000),[DmProd01].[dbo].[HumanaInputFiles].[file_stream],0)">
<Convert DataType="varchar" Length="8000" Style="0" Implicit="false">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[DmProd01]" Schema="[dbo]" Table="[HumanaInputFiles]" Column="file_stream" />
</Identifier>
</ScalarOperator>
</Convert>
</ScalarOperator>
</DefinedValue>
</DefinedValues>
<RelOp AvgRowSize="4043" EstimateCPU="0.0386262" EstimateIO="0.844606" EstimateRebinds="0" EstimateRewinds="0" EstimatedExecutionMode="Row" EstimateRows="17486" LogicalOp="Table Scan" NodeId="2" Parallel="false" PhysicalOp="Table Scan" EstimatedTotalSubtreeCost="0.883233" TableCardinality="34972">
<OutputList>
<ColumnReference Database="[DmProd01]" Schema="[dbo]" Table="[HumanaInputFiles]" Column="file_stream" />
</OutputList>
<RunTimeInformation>
<RunTimeCountersPerThread Thread="0" ActualRows="17486" ActualEndOfScans="1" ActualExecutions="1" />
</RunTimeInformation>
<TableScan Ordered="false" ForcedIndex="false" ForceScan="false" NoExpandHint="false" Storage="RowStore">
<DefinedValues>
<DefinedValue>
<ColumnReference Database="[DmProd01]" Schema="[dbo]" Table="[HumanaInputFiles]" Column="file_stream" />
</DefinedValue>
</DefinedValues>
<Object Database="[DmProd01]" Schema="[dbo]" Table="[HumanaInputFiles]" IndexKind="Heap" Storage="RowStore" />
<Predicate>
<ScalarOperator ScalarString="[DmProd01].[dbo].[HumanaInputFiles].[file_type]=N'adr'">
<Compare CompareOp="EQ">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[DmProd01]" Schema="[dbo]" Table="[HumanaInputFiles]" Column="file_type" ComputedColumn="true" />
</Identifier>
</ScalarOperator>
<ScalarOperator>
<Const ConstValue="N'adr'" />
</ScalarOperator>
</Compare>
</ScalarOperator>
</Predicate>
</TableScan>
</RelOp>
</ComputeScalar>
</RelOp>
</ComputeScalar>
</RelOp>
</QueryPlan>
</StmtSimple>
</Statements>
</Batch>
</BatchSequence>
</ShowPlanXML>
-- Create Some Sample/UNIQUE Data
Select N,TextData =concat(N,TextData )
Into #Temp
From (values ('Aaa|Bbb|Ccc|Ddd|Eee|Fff|Ggg') ) A (TextData )
Cross Apply (Select Top 35000 N=Row_Number() Over (Order By (Select NULL)) From master..spt_values n1,master..spt_values n2) B
Select B.*
From #Temp A
Cross Apply (
Select Pos1=max(case when RetSeq=1 then RetVal end)
,Pos2=max(case when RetSeq=2 then RetVal end)
,Pos3=max(case when RetSeq=3 then RetVal end)
,Pos4=max(case when RetSeq=4 then RetVal end)
,Pos5=max(case when RetSeq=5 then RetVal end)
,Pos6=max(case when RetSeq=6 then RetVal end)
,Pos7=max(case when RetSeq=7 then RetVal end)
From [dbo].[udf-Str-Parse-8K](A.TextData,'|') B1
) B
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (@String varchar(max),@Delimiter varchar(25))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter)) = @Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = LTrim(RTrim(Substring(@String, A.N, A.L)))
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
Select * from [dbo].[udf-Str-Parse-8K]('Aaa|Bbb|Ccc|Ddd|Eee|Fff|Ggg','|')
RetSeq RetVal
1 Aaa
2 Bbb
3 Ccc
4 Ddd
5 Eee
6 Fff
7 Ggg
USE tempdb
GO
IF OBJECT_ID('dbo.SourceTable') IS NOT NULL
DROP TABLE dbo.SourceTable
GO
CREATE TABLE dbo.SourceTable (
ID INT IDENTITY PRIMARY KEY,
Col1 VARCHAR(100) NOT NULL
);
INSERT dbo.SourceTable (Col1) VALUES ('Aaa|Bbb|Ccc|Ddd|Eee|Fff|Ggg')
INSERT dbo.SourceTable (Col1) VALUES ('hhh|iii|JJJ|kkk')
SELECT b.ID, c.XmlCol.value('.', 'VARCHAR(100)') AS ItemVal--, ROW_NUMBER() OVER(PARTITION BY b.ID ORDER BY c.XmlCol) AS RowNum
FROM (
SELECT a.ID, CONVERT(XML, '<root><i>' + REPLACE(a.Col1, '|', '</i><i>') + '</i></root>') AS Col1AsXML
FROM dbo.SourceTable a
) b OUTER APPLY b.Col1AsXML.nodes('root/i') c(XmlCol)
--OPTION(FORCE ORDER)
CREATE FUNCTION [UDF_Split] (
@InputStr NVARCHAR(Max),
@Delimiter NVARCHAR(255)
)
RETURNS TABLE
AS RETURN(
WITH lv0 AS (SELECT 0 g UNION ALL SELECT 0)
,lv1 AS (SELECT 0 g FROM lv0 a CROSS JOIN lv0 b) -- 4
,lv2 AS (SELECT 0 g FROM lv1 a CROSS JOIN lv1 b) -- 16
,lv3 AS (SELECT 0 g FROM lv2 a CROSS JOIN lv2 b) -- 256
,lv4 AS (SELECT 0 g FROM lv3 a CROSS JOIN lv3 b) -- 65,536
,lv5 AS (SELECT 0 g FROM lv4 a CROSS JOIN lv4 b) -- 4,294,967,296
,Tally (n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM lv5)
SELECT SUBSTRING(@InputStr, N, CHARINDEX(@Delimiter, @InputStr + @Delimiter, N) - N) AS TextLine
FROM Tally
WHERE N BETWEEN 1 AND LEN(@InputStr) + LEN(@InputStr)
AND SUBSTRING(@Delimiter + @InputStr, N, LEN(@Delimiter)) = @Delimiter);
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
WITH
cte_n1 (n) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (n)),
cte_n2 (n) AS (SELECT 1 FROM cte_n1 a CROSS JOIN cte_n1 b),
cte_n3 (n) AS (SELECT 1 FROM cte_n2 a CROSS JOIN cte_n2 b),
cte_Tally (n) AS (
SELECT TOP 100000
ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
cte_n3 a CROSS JOIN cte_n3 b
)
SELECT
ID = ISNULL(CAST(t.n AS INT), 0),
FilePath = CAST(fp.FilePath AS VARCHAR(1000))
INTO #TestData
FROM
cte_Tally t
CROSS APPLY ( VALUES (CONCAT(
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000, '|',
ABS(CHECKSUM(NEWID())) % 9999 + 1000
)
) ) fp (FilePath);
ALTER TABLE #TestData ADD PRIMARY KEY CLUSTERED (ID);
--=============================================================================
DECLARE -- Dump values into variables to eliminate display rendering from execution time.
@id INT,
@Col_1 VARCHAR(5),
@Col_2 VARCHAR(5),
@Col_3 VARCHAR(5),
@Col_4 VARCHAR(5),
@Col_5 VARCHAR(5),
@Col_6 VARCHAR(5),
@Col_7 VARCHAR(5);
SELECT
@ID = td.ID,
@Col_1 = SUBSTRING(td.FilePath, 1, ABS(d1.DelimLocation - 1)),
@Col_2 = SUBSTRING(td.FilePath, d1.DelimLocation + 1, ABS(d2.DelimLocation - d1.DelimLocation - 1)),
@Col_3 = SUBSTRING(td.FilePath, d2.DelimLocation + 1, ABS(d3.DelimLocation - d2.DelimLocation - 1)),
@Col_4 = SUBSTRING(td.FilePath, d3.DelimLocation + 1, ABS(d4.DelimLocation - d3.DelimLocation - 1)),
@Col_5 = SUBSTRING(td.FilePath, d4.DelimLocation + 1, ABS(d5.DelimLocation - d4.DelimLocation - 1)),
@Col_6 = SUBSTRING(td.FilePath, d5.DelimLocation + 1, ABS(d6.DelimLocation - d5.DelimLocation - 1)),
@Col_7 = SUBSTRING(td.FilePath, d6.DelimLocation + 1, 1000)
FROM
#TestData td
CROSS APPLY ( VALUES (LEN(td.FilePath) - LEN(REPLACE(td.FilePath, '|', ''))) ) dc (DelimiterCount)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 1, 1000, CHARINDEX('|', td.FilePath, 1))) ) d1 (DelimLocation)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 2, 1000, CHARINDEX('|', td.FilePath, d1.DelimLocation + 1))) ) d2 (DelimLocation)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 3, 1000, CHARINDEX('|', td.FilePath, d2.DelimLocation + 1))) ) d3 (DelimLocation)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 4, 1000, CHARINDEX('|', td.FilePath, d3.DelimLocation + 1))) ) d4 (DelimLocation)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 5, 1000, CHARINDEX('|', td.FilePath, d4.DelimLocation + 1))) ) d5 (DelimLocation)
CROSS APPLY ( VALUES (IIF(dc.DelimiterCount < 6, 1000, CHARINDEX('|', td.FilePath, d5.DelimLocation + 1))) ) d6 (DelimLocation)
CROSS APPLY dbo.MakeParallel() mp; -- Forces a parallel execution plan.
-- http://dataeducation.com/next-level-parallel-plan-forcing-an-alternative-to-8649/
--=============================================================================
DECLARE -- Dump values into variables to eliminate display rendering from execution time.
@id INT,
@Col_1 VARCHAR(5),
@Col_2 VARCHAR(5),
@Col_3 VARCHAR(5),
@Col_4 VARCHAR(5),
@Col_5 VARCHAR(5),
@Col_6 VARCHAR(5),
@Col_7 VARCHAR(5);
SELECT
@ID = td.ID,
@Col_1 = MAX(CASE WHEN sc.ItemNumber = 1 THEN sc.Item END),
@Col_2 = MAX(CASE WHEN sc.ItemNumber = 2 THEN sc.Item END),
@Col_3 = MAX(CASE WHEN sc.ItemNumber = 3 THEN sc.Item END),
@Col_4 = MAX(CASE WHEN sc.ItemNumber = 4 THEN sc.Item END),
@Col_5 = MAX(CASE WHEN sc.ItemNumber = 5 THEN sc.Item END),
@Col_6 = MAX(CASE WHEN sc.ItemNumber = 6 THEN sc.Item END),
@Col_7 = MAX(CASE WHEN sc.ItemNumber = 7 THEN sc.Item END)
FROM
#TestData td
CROSS APPLY dbo.DelimitedSplit8K(td.FilePath, '|') sc
CROSS APPLY dbo.MakeParallel() mp -- Forces a parallel execution plan.
-- http://dataeducation.com/next-level-parallel-plan-forcing-an-alternative-to-8649/
GROUP BY
td.ID;