Sql server PowerShell-使用数据透视导入CSV
我设法创建了一个PowerShell脚本,以下载并加载到SQL Server数据库中一些包含数据透视的文件:该系列的每一天都是一列,其中包含当天的值 问题是执行需要很长时间,加载每个平均有250行的文件大约需要15秒。我认为问题在于,当我试图通过“索引”获取字段的值时,因为我没有找到一种聪明的方法 有没有更好的方法可以使用PowerShell将这种每天都会更改的CSV数据插入数据库 我对PowerShell脚本了解不多,但我成功创建了以下脚本: 我从这里下载的文件: 处理文件的脚本:Sql server PowerShell-使用数据透视导入CSV,sql-server,powershell,Sql Server,Powershell,我设法创建了一个PowerShell脚本,以下载并加载到SQL Server数据库中一些包含数据透视的文件:该系列的每一天都是一列,其中包含当天的值 问题是执行需要很长时间,加载每个平均有250行的文件大约需要15秒。我认为问题在于,当我试图通过“索引”获取字段的值时,因为我没有找到一种聪明的方法 有没有更好的方法可以使用PowerShell将这种每天都会更改的CSV数据插入数据库 我对PowerShell脚本了解不多,但我成功创建了以下脚本: 我从这里下载的文件: 处理文件的脚本: Clea
Clear-Host
$date = (Get-Date).Date.AddDays(-1).ToString('MM-dd-yyyy')
$path = 'C:\Dataset\'
$items = (Get-ChildItem -Path ($path + "time_series*.csv") | Select FullName) #| Select -First 1
$sql_instance_name = '.'
$db_name = 'COVID-19'
foreach ($item in $items)
{
$dt_start = Get-Date
$schema = "stg"
$table = (Split-Path -Path $item.FullName -Leaf).Split('.')[0]
Write-Host "File:"(Split-Path -Path $item.FullName -Leaf) -ForegroundColor Yellow
Write-Host "Schema: $schema" -ForegroundColor Yellow
Write-Host "Table: [$table]" -ForegroundColor Yellow
$header = (Get-Content $item.FullName | Select -First 1).replace(",", "|,|")
$i = 0; $new_header = @();
foreach ($column in $header.Replace('|', '').split(','))
{
$new_header += "Column_$i"
$i++
}
$drop_table = "if (object_id('stg.[$table]')) is not null drop table $schema.[$table];"
Invoke-Sqlcmd -Database $db_name -Query $drop_table -ServerInstance $sql_instance_name
$create_table = ("if (object_id('stg.[$table]')) is null
create table $schema.[$table] (" +
" id int identity constraint [pk_$table] primary key," +
" [" + $header + "] varchar(500),`n`tload_date datetime`n);").Replace('|,|', "] varchar(500), [")
Invoke-Sqlcmd -Database $db_name -Query $create_table -ServerInstance $sql_instance_name
$csv = Import-Csv -Path $item.FullName -Header $new_header | Select -Skip 1
$insert = $null
foreach ($row in $csv)
{
$query = "insert into stg.[" + (Split-Path -Path $item.FullName -Leaf).Split('.')[0] + "] values ("
foreach ($column in $new_header)
{
<# Perhaps this part slows down the process by the means of
getting the value for the column (I couldn't find a way to
simply reference the column by index like $csv.column[$i]
till the last one)
#>
$value = ($row | Select $column)
$query += "nullif('" + ($value | % { $_.$(( $value | gm | ? { $_.membertype -eq "noteproperty"} )[0].name) }).Replace("'", "''") + "',''),"
}
$query += " current_timestamp);"
$insert = $query
#Write-Host $row.Column_1
Invoke-Sqlcmd -Database $db_name -Query $insert -ServerInstance $sql_instance_name
}
Write-Host "Lines:"$csv.count -ForegroundColor Yellow
#Start-Sleep -Seconds 2
$dt_end = Get-Date
Write-Host "Elapsed time:"(New-TimeSpan -Start $dt_start -End $dt_end).TotalSeconds -ForegroundColor Red
Write-Host ("-" * 50)
}
如果2016+,您可以下载JSON数据 如果
File: time_series_covid19_confirmed_global.csv
Schema: stg
Table: [time_series_covid19_confirmed_global]
Lines: 264
Elapsed time: 14,3725288
--------------------------------------------------
File: time_series_covid19_deaths_global.csv
Schema: stg
Table: [time_series_covid19_deaths_global]
Lines: 264
Elapsed time: 14,1963788
--------------------------------------------------
File: time_series_covid19_recovered_global.csv
Schema: stg
Table: [time_series_covid19_recovered_global]
Lines: 250
Elapsed time: 13,5150064
--------------------------------------------------
exec master..xp_cmdshell 'powershell.exe Invoke-WebRequest "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/" -OutFile "c:\working\covid.json"',no_output
Declare @json varchar(max);
Select @json = BulkColumn FROM OPENROWSET(BULK 'c:\working\covid.json', SINGLE_BLOB) x;
;with cte as (
Select [DataDate] = try_convert(date,DateRep,105)
,[CountryCd]= try_convert(varchar(50),countryterritoryCode)
,[Country] = try_convert(varchar(150),countriesAndTerritories)
,[Cases] = try_convert(int ,cases)
,[Deaths] = try_convert(int ,deaths)
,[Pop] = try_convert(int ,[popData2018])
,rtc = sum(try_convert(int ,cases)) over (partition by countryterritoryCode order by try_convert(date,DateRep,105))
,rtd = sum(try_convert(int ,deaths)) over (partition by countryterritoryCode order by try_convert(date,DateRep,105))
From (
Select Idx= B.[Key]
,C.*
From OpenJSON(@json ) A
Cross Apply OpenJson(A.value) B
Cross Apply OpenJson(B.value) C
) src
Pivot (max(value) for [Key] in ( [dateRep],[cases],[deaths],[countriesAndTerritories],[geoId],[countryterritoryCode],[popData2018] ) ) pvt
)
Select DataDate
,CountryCd
,Country
,Cases = format(Cases,'#,###')
,Deaths= format(Deaths,'#,###')
,Pop = format(Pop/1000000.0,'#,##0.0')+'MM'
,RTC = format(RTC,'#,###')
,RTD = format(RTD,'#,###')
,Mort = format((rtd*100.0) / nullif(rtc,0),'##0.00')+'%'
,PctPop= format((cases*100.0) / nullif(Pop,0),'##0.0000')+'%'
From cte
Where DataDate='2020-04-11'
Order By try_convert(money,RTC) desc