R 在Shining中从大数据图高效地绘制数据点 目标

R 在Shining中从大数据图高效地绘制数据点 目标,r,shiny,large-data,R,Shiny,Large Data,实现一个闪亮的应用程序,有效地可视化和调整上传的数据集。每套可包含100000至200000行。数据调整完成后,可以下载调整后的数据。分步骤: 数据上传 数据选择与可视化 数据(点)删除 下载选项 问题 虽然该应用程序基本上可以正常工作,但数据可视化和删除需要花费太多时间 代码 样本数据 生成了一些示例数据。数据可以上传到Shining应用程序上。示例数据分布与我的实际数据不相似。实际数据包含清晰可识别的异常值,看起来像一个有峰值的光谱 a = sample(1:1e12, 1e5, repla

实现一个闪亮的应用程序,有效地可视化和调整上传的数据集。每套可包含100000至200000行。数据调整完成后,可以下载调整后的数据。分步骤:

  • 数据上传
  • 数据选择与可视化
  • 数据(点)删除
  • 下载选项
  • 问题 虽然该应用程序基本上可以正常工作,但数据可视化和删除需要花费太多时间

    代码 样本数据 生成了一些示例数据。数据可以上传到Shining应用程序上。示例数据分布与我的实际数据不相似。实际数据包含清晰可识别的异常值,看起来像一个有峰值的光谱

    a = sample(1:1e12, 1e5, replace=TRUE)
    b = sample(1:1e12, 1e5, replace=TRUE)
    dummy1 = data.frame(Frequency = a, Amplitude = a)
    dummy2 = data.frame(Frequency = b, Amplitude = b)
    dummy3 = data.frame(Frequency = a, Amplitude = b)
    # Sample data
    write.csv(dummy1,'dummy1.csv')
    write.csv(dummy2,'dummy2.csv')
    write.csv(dummy3,'dummy2.csv')
    
    闪亮应用 该应用程序获取上传的数据并进行打印。(样本虚拟数据可以上传到应用程序上。)可以删除部分数据点并下载新数据

    # Packages
    library(shiny)
    library(ggplot2)
    library(data.table)
    # UI
    ui = fluidPage(
        fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
        fluidRow(plotOutput(outputId = "plot", brush = "plot_brush_"), 
                 downloadButton('download',"Download the data"))
    )
    
    # Server
    server = function(session, input, output){
        # Pop up for data upload
        query_modal = modalDialog(title = "Upload Spectrum",
                                  fileInput("file", 
                                  "file",
                                  multiple = TRUE,
                                  accept = c(".csv")),
                                  easyClose = FALSE)
        showModal(query_modal)
    
        ## Upload
        mt1 = reactive({
           req(input$file)
           cs = list()
           for(nr in 1:length(input$file[ , 1])){
              c = read.csv(input$file[[nr, 'datapath']])
              cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                                    Amplitude = as.numeric(c[[2]]), 
                                    Indicator = as.factor(nr))}
            c = do.call(rbind, cs)
            c = reactiveValues(data = c)
            return(c)})
    
        ## Input selection
        observeEvent(
          mt1(),
          updateSelectInput(
            session, 
            "selection", 
            "Set Selection:", 
            choices = levels(mt1()$data$Indicator), 
            selected = 'Entire'))
        
        ## Plot
        output$plot <- renderPlot({
          mt = mt1()$data
          mt = mt[mt$Indicator %in% input$selection,]
          p = ggplot(mt, aes(Frequency, Amplitude, color = Indicator)) 
          p + geom_point(show.legend = TRUE)})
        
        ## Download
        output$download = downloadHandler(
          filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
          content = function(fname){
            mt = mt1()$data
            mt = mt[, .SD, .SDcols= c('Frequency', 
                                      'Amplitude', 
                                      'Indicator')]
            write.csv(mt, fname, row.names = FALSE)})
        
        ## Adjust
        observe({
          d = mt$data
          keep = mt$data[!Indicator %in% input$selection]
          df = brushedPoints(d, brush = input$plot_brush_, allRows = TRUE) 
          df = df[selected_ == FALSE]
          df$selected_ = NULL
          mt$data = rbind(keep , df[Indicator %in% input$selection,  ])})
    }
    
    # Run app
    shinyApp(ui = ui, server = server)
    
    #包
    图书馆(闪亮)
    图书馆(GG2)
    库(数据表)
    #用户界面
    ui=fluidPage(
    fluidRow(selectInput(“selection”,“Set selection:”,choices='',selected='',multiple=TRUE)),
    fluidRow(绘图输出(outputId=“plot”,brush=“plot\u brush”),
    下载按钮(“下载”,“下载数据”))
    )
    #服务器
    服务器=功能(会话、输入、输出){
    #数据上传弹出窗口
    query_modal=modalDialog(title=“上传频谱”,
    文件输入(“文件”,
    “文件”,
    倍数=真,
    接受=c(“.csv”),
    easyClose=FALSE)
    showModal(查询模式)
    ##上传
    mt1=无功({
    请求(输入$file)
    cs=列表()
    for(1中的nr:长度(输入$file[,1])){
    c=read.csv(输入$file[[nr,'datapath']])
    cs[[nr]]=data.table(频率=as.numeric(c[[1]]),
    振幅=数值形式(c[[2]]),
    指标=as.factor(nr))}
    c=do.call(rbind,cs)
    c=反应值(数据=c)
    返回(c)})
    ##输入选择
    敏锐的(
    mt1(),
    更新选择输入(
    一场
    “选择”,
    “集合选择:”,
    选项=级别(mt1()$data$指示器),
    选定项=“整个”)
    ##密谋
    
    output$plot您可以使用
    matplotlib
    Python绘图库,在R中使用
    networkite
    包:

  • 设置包和库:
  • 测试安装:

  • 您需要手动处理颜色,因为
    matplotlib
    不是
    ggplot2

    所有与shiny相关的代码都是不相关的。您有一个纯粹的绘图/ggplot2问题。绘制多个点很慢。您需要重新设计绘图并进行更有效的数据可视化。绘制1e5点是不明智的。您将不得不ramatic过度绘图。如果您不想进行更有效的可视化,我的回答可能会很有用:“我现在更改数据本身”如果绘图仍然完全相同,问题是什么?无法区分绘图中的1e5点。此外,您还应该查看其他选项(如hexbin绘图)@Roland,我同意这是一个绘图问题。但是,绘制数据点是必要的,因为绘图是一个可视化工具,可以知道要删除哪些数据点,也可以作为数据删除的数据操作工具。我喜欢你的近似方法。我必须知道数据的大小和精度,这是没有问题的。我不知道et为什么R的ggplot或base plot比Python的matplotlib花费的时间要多。我想,除了近似之外,最好在具有近似和简化数据的绘图顶部的原始数据上运行相同的选择层功能。@Roland,我确认1e5点对于matplotlib和Matlab来说都不是什么大问题,请参阅我的答:当我运行这个闪亮的应用程序时,可能很有用,通过数据选择手动删除数据点是不可能的。尽管如此,通过Networkite集成matplotlib还是很方便的。
    install.packages('reticulate')
    
    # Install python environment
    reticulate::install_miniconda() 
    # if Python is already installed, you can specify the path with use_python(path)
    
    # Install matplotlib library
    reticulate::py_install('matplotlib')
    
    library(reticulate)
    mpl <- import("matplotlib")
    mpl$use("Agg") # Stable non interactive backend
    mpl$rcParams['agg.path.chunksize'] = 0 # Disable error check on too many points
    
    plt <- import("matplotlib.pyplot")
    np <- import("numpy")
    
    # generate lines cloud
    xx = np$random$randn(100000L)
    yy = np$random$randn(100000L)
    
    plt$figure()
    plt$plot(xx,yy)
    plt$savefig('test.png')
    plt$close(plt$gcf())
    
    # Packages
    library(shiny)
    library(ggplot2)
    library(data.table)
    # UI
    ui = fluidPage(
      fluidRow(selectInput("selection", "Set Selection:", choices = '', selected = '', multiple = TRUE)),
      fluidRow(imageOutput(outputId = "image"), 
               downloadButton('download',"Download the data"))
    )
    
    # Server
    server = function(session, input, output){
      
      # Setup Python objects
      mpl <- reticulate::import("matplotlib")
      plt <- reticulate::import("matplotlib.pyplot")
      mpl$use("Agg") 
      mpl$rcParams['agg.path.chunksize'] = 0
      
      
      # Pop up for data upload
      query_modal = modalDialog(title = "Upload Spectrum",
                                fileInput("file", 
                                          "file",
                                          multiple = TRUE,
                                          accept = c(".csv")),
                                easyClose = FALSE)
      showModal(query_modal)
      
      ## Upload
      mt1 = reactive({
        req(input$file)
        cs = list()
        for(nr in 1:length(input$file[ , 1])){
          c = read.csv(input$file[[nr, 'datapath']])
          cs[[nr]] = data.table(Frequency = as.numeric(c[[1]]), 
                                Amplitude = as.numeric(c[[2]]), 
                                Indicator = as.factor(nr))}
        c = do.call(rbind, cs)
        c = reactiveValues(data = c)
        return(c)})
      
      ## Input selection
      observeEvent(
        mt1(),
        updateSelectInput(
          session, 
          "selection", 
          "Set Selection:", 
          choices = levels(mt1()$data$Indicator), 
          selected = 'Entire'))
      
      ## Render matplotlib image
      output$image <- renderImage({
        # Read myImage's width and height. These are reactive values, so this
        # expression will re-run whenever they change.
        width  <- session$clientData$output_image_width
        height <- session$clientData$output_image_height
        
        # For high-res displays, this will be greater than 1
        pixelratio <- session$clientData$pixelratio
        
        # A temp file to save the output.
        outfile <- tempfile(fileext='.png')
        
        # Generate the image file
        mt = mt1()$data
        mt = mt[mt$Indicator %in% input$selection,]
        xx = mt$Frequency
        yy = mt$Amplitude
        
        plt$figure()
        plt$plot(xx,yy)
        plt$savefig(outfile)
        plt$close(plt$gcf())
        
        # Return a list containing the filename
        list(src = outfile,
             width = width,
             height = height,
             alt = "This is alternate text")
      }, deleteFile = TRUE)
      
      ## Download
      output$download = downloadHandler(
        filename = function(){paste(gsub('.{1}$', '', input$file$name[1]), 'manipulated', '.csv', sep= '')}, 
        content = function(fname){
          mt = mt1()$data
          mt = mt[, .SD, .SDcols= c('Frequency', 
                                    'Amplitude', 
                                    'Indicator')]
          write.csv(mt, fname, row.names = FALSE)})
      
      ## Adjust
      observe({
        mt = mt1()
        df = brushedPoints(mt$data, brush = input$plot_brush_, allRows = TRUE) 
        mt$data = df[df$selected_ == FALSE,  ]})
    }
    
    # Run app
    shinyApp(ui = ui, server = server)