在ruby中解析CSV之前或期间删除逗号

在ruby中解析CSV之前或期间删除逗号,ruby,csv,Ruby,Csv,我正在导入一个正在解析的CSV文件,并将分隔符设置为“|”,我想删除逗号或对其进行注释,以免它们弄乱列 这是我认为应该有一个代码来删除的部分,' namespace :postonce do desc "Check postonce ftp files and post loads and trucks." task :post => :environment do files = %x[ls /home/web2_postonce/].split("\n") fi

我正在导入一个正在解析的CSV文件,并将分隔符设置为“|”,我想删除逗号或对其进行注释,以免它们弄乱列

这是我认为应该有一个代码来删除的部分,'

namespace :postonce do
  desc "Check postonce ftp files and post loads and trucks."
  task :post => :environment do
    files = %x[ls /home/web2_postonce/].split("\n")
    files.each do |file|
      %x[ iconv -t UTF-8 /home/web2_postonce/#{file} > /home/deployer/postonce/#{file} ]
      %x[ mv /home/web2_postonce/#{file} /home/deployer/postonce_backup/ ]
    end
    files = %x[ ls /home/deployer/postonce/ ].split("\n")
    files.each do |file|
      begin
        lines = CSV.read("/home/deployer/postonce/#{file}")
      rescue Exception => e
        log.error e
        next
      end
      h = lines.shift
      header = CSV.parse_line(h[0], { :col_sep => "|" } )
      lines.each do |line|
        fields = CSV.parse_line(line[0],{:col_sep => "|"})
        post = Hash[header.zip fields]
     if post["EmailAddress"].blank?
       log.error "Blank Email #{post["EmailAddress"]}"
        else
       log.debug "Email #{post["EmailAddress"]}"
        end
下面是提取文件并将文件解析为列的完整代码

require 'resque'
require 'logger'
log = Logger.new("#{Rails.root}/log/PostOnce.log")
log.datetime_format = "%F %T"
namespace :postonce do
  desc "Check postonce ftp files and post loads and trucks."
  task :post => :environment do
    files = %x[ls /home/web2_postonce/].split("\n")
    files.each do |file|
      %x[ iconv -t UTF-8 /home/web2_postonce/#{file} > /home/deployer/postonce/#{file} ]
      %x[ mv /home/web2_postonce/#{file} /home/deployer/postonce_backup/ ]
    end
    files = %x[ ls /home/deployer/postonce/ ].split("\n")
    files.each do |file|
      begin
        lines = CSV.read("/home/deployer/postonce/#{file}")
      rescue Exception => e
        log.error e
        next
      end
      h = lines.shift
      header = CSV.parse_line(h[0], { :col_sep => "|" } )
      lines.each do |line|
        fields = CSV.parse_line(line[0],{:col_sep => "|"})
        post = Hash[header.zip fields]
     if post["EmailAddress"].blank?
       log.error "Blank Email #{post["EmailAddress"]}"
        else
       log.error "Email #{post["EmailAddress"]}"
        end
        if post["Notes"].blank?
          post["Notes"] = "~PostOnce~"
        else
          post["Notes"] = post["Notes"]+" ~PostOnce~" 
        end
        if Company.where(:name => post["Company"]).first.nil?
          c = Company.new
          c.name = post["Company"]
          c.dispatch = post["Customer_Phone"]
          c.save
        end
        if User.where(:email => ["EmailAddress"]).first.blank?
          u = User.new
          c = Company.where(:name => post["Company"]).first unless Company.where(:name => post["Company"]).first.nil?
          u.company_id = c.id
          u.username = post["EmailAddress"].gsub(/@.*/,"") unless post["EmailAddress"].nil?
          u.password = Time.now.to_s
          u.email = post["EmailAddress"]
          u.dispatch = post["Customer_Phone"]
          u.save
        end
        #If Load
        if file.start_with?("PO_loads")
          record = Hash.new
          begin
            record[:user_id] = User.where(:email => post["EmailAddress"]).first.id
          rescue Exception => e
            log.error e
            next
          end
          record[:origin] = "#{post["Starting_City"]}, #{post["Starting_State"]}"
          record[:dest] = "#{post["Destination_City"]}, #{post["Destination_State"]}"
          record[:pickup] = Time.parse(post["Pickup_Date_Time"])
          record[:ltl] = false
          record[:ltl] = true unless post["#Load_Type_Full"] = "FULL"
          begin
            record[:equipment_id] = Equipment.where(:code => post["Type_of_Equipment"]).first.id
          rescue Exception => e
            record[:equipment_id] = 34
          end
          record[:comments] = post["Notes"]
          record[:weight] = post["Weight"] 
          record[:length] = post["Length"]
          record[:rate] = post["Payment_amount"]
          record[:rate] = '' if post["Payment_amount"] == 'Call' or post["Payment_amount"] == 'CALL'
          Resque.enqueue(MajorPoster, record) 
        #If Truck
        elsif file.start_with?("PO_trucks")
          record = Hash.new
          begin
            record[:user_id] = User.where(:email => post["EmailAddress"]).first.id
          rescue Exception => e
            log.error e
            next
          end
          record[:origin] = "#{post["Starting_City"]}, #{post["Starting_State"]}"
          record[:dest] = "#{post["Destination_City"]}, #{post["Destination_State"]}"
          record[:available] = Time.parse(post["Pickup_Date_Time"])
          record[:expiration] = record[:available] + 8.days
          begin
            record[:equipment_id] = Equipment.where(:code => post["Type_of_Equipment"]).first.id
          rescue Exception => e
            record[:equipment_id] = 34
          end
          record[:comments] = post["Notes"]
          Resque.enqueue(MajorPoster, record) 
        end
      end
    #  %x[rm /home/deployer/postonce/#{file}]
    end
  end
end
这是一个数据示例,我正在尝试加载客户联系人中的逗号,在备注中,这些数据通过FTP发送给我们

Member_ID|Action_type|Entry_Number|Pickup_Date_Time|Starting_City|Starting_State|Destination_City|Destination_State|Type_of_Equipment|Length|Quantity|#Load_type_full|Extra_Stops|Payment_amount|Weight|Distance|Notes|Customer_Phone|Extension|Customer_Contact|EmailAddress|Company|
SUMMIT|L-delete|16491978|20140213|PEWAMO|MI|DENVER|CO|FT|45|1|FULL|0|Call|46000|||866-807-4968||DISPATCH, Dispatch|IANP@SUMMITTRANS.NET|SUMMIT TRANSPORTATION SERVICES INC.|
SUMMIT|L-delete|16490693|20140213|PEWAMO|MI|DENVER|CO|V|48|1|FULL|0|Call|44000|||866-807-4968||DISPATCH|IANP@SUMMITTRANS.NET|SUMMIT TRANSPORTATION SERVICES INC.|
SUMMIT|L-delete|16490699|20140214|PEWAMO|MI|DENVER|CO|V|48|1|FULL|0|Call|44000|||866-807-4968||DISPATCH|IANP@SUMMITTRANS.NET|SUMMIT TRANSPORTATION SERVICES INC.|
megacorpwv|L-Delete|16491928|20140214|WAITE PARK|MN|DOLTON|IL|R||1|FULL|0|CALL|0|0|(859) 538-1660  x2007|877-670-2837|||snewman@megacorplogistics.com|MEGACORP LOGISTICS 03|
我的日志显示:如您所见,我在第一条记录的一个字段中手动添加了一个逗号,它充当了分隔符

2014-02-13 12:29:41 ERROR -- Blank Email 
2014-02-13 12:29:41 ERROR -- undefined method `id' for nil:NilClass
2014-02-13 12:29:41 DEBUG -- Email IANP@SUMMITTRANS.NET
2014-02-13 12:29:42 DEBUG -- Email IANP@SUMMITTRANS.NET
2014-02-13 12:29:42 DEBUG -- Email snewman@megacorplogistics.com

我认为您的问题在于您只解析数组“h”和“line”的第一个元素。请尝试从这两行中删除“[0]”。不是电子邮件是空白的,而是除了会员ID之外的所有内容都是空白的

header = CSV.parse_line(h, { :col_sep => "|" } )
lines.each do |line|
fields = CSV.parse_line(line,{:col_sep => "|"})
啊。好啊Phillip Hallstrom已经发现了这个问题。它在CSV.read语句中。默认情况下,CSV.read将尝试使用逗号“,”分隔。CSV.read试图做的是将每一行作为数组元素读取,然后将每一行解析为另一个数组。因此,如果您的文件如下所示:

a|b|c|d|e
apple|ball, bearing|cantelope|date|elephant
它将在CSV.read上返回以下数组

[["a|b|c|d|e"], ["apple|ball", " bearing|cantelope|date|elephant"]]
您可以看到,CSV.read正在尝试在您有机会指定分隔符之前执行完整解析


使用普通文件I/O读取中的行,或者重新编码以指定CSV.read语句中的分隔符,这样做很好。但是您能提供更多的示例数据和预期输出吗?如果您的CSV是|分隔的,为什么不能执行
CSV.read(“文件”,:col_sep=>“|”)
刚开始时?我已经更新了帖子我在帖子中添加了我的完整编码,因此你可以看到我正在尝试解析所有内容,只是尝试调试它。没有逗号,它工作得很好,但是有逗号它会出错