Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/powershell/13.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Sql server PowerShell-使用数据透视导入CSV_Sql Server_Powershell - Fatal编程技术网

Sql server PowerShell-使用数据透视导入CSV

Sql server PowerShell-使用数据透视导入CSV,sql-server,powershell,Sql Server,Powershell,我设法创建了一个PowerShell脚本,以下载并加载到SQL Server数据库中一些包含数据透视的文件:该系列的每一天都是一列,其中包含当天的值 问题是执行需要很长时间,加载每个平均有250行的文件大约需要15秒。我认为问题在于,当我试图通过“索引”获取字段的值时,因为我没有找到一种聪明的方法 有没有更好的方法可以使用PowerShell将这种每天都会更改的CSV数据插入数据库 我对PowerShell脚本了解不多,但我成功创建了以下脚本: 我从这里下载的文件: 处理文件的脚本: Clea

我设法创建了一个PowerShell脚本,以下载并加载到SQL Server数据库中一些包含数据透视的文件:该系列的每一天都是一列,其中包含当天的值

问题是执行需要很长时间,加载每个平均有250行的文件大约需要15秒。我认为问题在于,当我试图通过“索引”获取字段的值时,因为我没有找到一种聪明的方法

有没有更好的方法可以使用PowerShell将这种每天都会更改的CSV数据插入数据库

我对PowerShell脚本了解不多,但我成功创建了以下脚本:

我从这里下载的文件:

处理文件的脚本:

Clear-Host
$date = (Get-Date).Date.AddDays(-1).ToString('MM-dd-yyyy')

$path = 'C:\Dataset\'
$items = (Get-ChildItem -Path ($path + "time_series*.csv") | Select FullName) #| Select -First 1

$sql_instance_name = '.'
$db_name = 'COVID-19'

foreach ($item in $items)
{
    $dt_start = Get-Date
    $schema = "stg"
    $table = (Split-Path -Path $item.FullName -Leaf).Split('.')[0]

    Write-Host "File:"(Split-Path -Path $item.FullName -Leaf) -ForegroundColor Yellow
    Write-Host "Schema: $schema" -ForegroundColor Yellow
    Write-Host "Table: [$table]" -ForegroundColor Yellow

    $header = (Get-Content $item.FullName | Select -First 1).replace(",", "|,|")

    $i = 0; $new_header = @();

    foreach ($column in $header.Replace('|', '').split(','))
    {
       $new_header += "Column_$i"
       $i++
    }

    $drop_table = "if (object_id('stg.[$table]')) is not null drop table $schema.[$table];"

    Invoke-Sqlcmd -Database $db_name -Query $drop_table -ServerInstance $sql_instance_name

    $create_table = ("if (object_id('stg.[$table]')) is null
    create table $schema.[$table] (" +
    " id int identity constraint [pk_$table] primary key," +
    " [" + $header + "] varchar(500),`n`tload_date datetime`n);").Replace('|,|', "] varchar(500), [")

    Invoke-Sqlcmd -Database $db_name -Query $create_table -ServerInstance $sql_instance_name

    $csv = Import-Csv -Path $item.FullName -Header $new_header | Select -Skip 1

    $insert = $null
    foreach ($row in $csv)
    {
        $query = "insert into stg.[" + (Split-Path -Path $item.FullName -Leaf).Split('.')[0] + "] values ("
        foreach ($column in $new_header)
        {
          <# Perhaps this part slows down the process by the means of
             getting the value for the column (I couldn't find a way to
             simply reference the column by index like $csv.column[$i]
             till the last one)
           #>
            $value = ($row | Select $column)
            $query += "nullif('" + ($value | % { $_.$(( $value | gm | ? { $_.membertype -eq "noteproperty"} )[0].name) }).Replace("'", "''") + "',''),"
        }
        $query += " current_timestamp);"
        $insert = $query
        #Write-Host $row.Column_1

        Invoke-Sqlcmd -Database $db_name -Query $insert -ServerInstance $sql_instance_name
    }
    Write-Host "Lines:"$csv.count -ForegroundColor Yellow
    #Start-Sleep -Seconds 2
    $dt_end = Get-Date
    Write-Host "Elapsed time:"(New-TimeSpan -Start $dt_start -End $dt_end).TotalSeconds -ForegroundColor Red
    Write-Host ("-" * 50)
}

如果2016+,您可以下载JSON数据

如果
File: time_series_covid19_confirmed_global.csv
Schema: stg
Table: [time_series_covid19_confirmed_global]
Lines: 264
Elapsed time: 14,3725288
--------------------------------------------------
File: time_series_covid19_deaths_global.csv
Schema: stg
Table: [time_series_covid19_deaths_global]
Lines: 264
Elapsed time: 14,1963788
--------------------------------------------------
File: time_series_covid19_recovered_global.csv
Schema: stg
Table: [time_series_covid19_recovered_global]
Lines: 250
Elapsed time: 13,5150064
--------------------------------------------------
exec master..xp_cmdshell 'powershell.exe Invoke-WebRequest "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/" -OutFile "c:\working\covid.json"',no_output

Declare @json varchar(max); 
Select @json = BulkColumn FROM  OPENROWSET(BULK 'c:\working\covid.json', SINGLE_BLOB) x; 

;with cte as (
Select [DataDate] = try_convert(date,DateRep,105)
      ,[CountryCd]= try_convert(varchar(50),countryterritoryCode)
      ,[Country]  = try_convert(varchar(150),countriesAndTerritories)
      ,[Cases]    = try_convert(int ,cases)
      ,[Deaths]   = try_convert(int ,deaths)
      ,[Pop]      = try_convert(int ,[popData2018])
      ,rtc        = sum(try_convert(int ,cases)) over (partition by countryterritoryCode order by try_convert(date,DateRep,105))
      ,rtd        = sum(try_convert(int ,deaths)) over (partition by countryterritoryCode order by try_convert(date,DateRep,105))
 From (
Select Idx= B.[Key]
       ,C.* 
 From  OpenJSON(@json  ) A
 Cross Apply OpenJson(A.value) B
 Cross Apply OpenJson(B.value) C
) src
Pivot (max(value) for [Key] in ( [dateRep],[cases],[deaths],[countriesAndTerritories],[geoId],[countryterritoryCode],[popData2018]  ) ) pvt
)
Select DataDate
      ,CountryCd
      ,Country
      ,Cases = format(Cases,'#,###')
      ,Deaths= format(Deaths,'#,###')
      ,Pop   = format(Pop/1000000.0,'#,##0.0')+'MM'
      ,RTC   = format(RTC,'#,###')
      ,RTD   = format(RTD,'#,###')
      ,Mort  = format((rtd*100.0) / nullif(rtc,0),'##0.00')+'%'
      ,PctPop= format((cases*100.0) / nullif(Pop,0),'##0.0000')+'%'
 From  cte 
 Where DataDate='2020-04-11'
 Order By try_convert(money,RTC) desc