如何使用Python从导入的csv计算lat/long点之间的距离?

如何使用Python从导入的csv计算lat/long点之间的距离?,python,pandas,numpy,csv,haversine,Python,Pandas,Numpy,Csv,Haversine,我正在尝试导入一个包含四列位置数据(lat/long)的.csv,计算点之间的距离,将距离写入一个新列,将函数循环到下一组坐标,并将输出数据帧写入一个新的.csv。我已经编写了以下代码,并且已经完成了完成这些步骤后,我发现一个错误。 示例数据: lat1 lon1 lat2 lon2 33.58144 -57.73018 32.44873 -99.46281 25.46212 -46.62017 34.64971 -96.70271

我正在尝试导入一个包含四列位置数据(lat/long)的.csv,计算点之间的距离,将距离写入一个新列,将函数循环到下一组坐标,并将输出数据帧写入一个新的.csv。我已经编写了以下代码,并且已经完成了完成这些步骤后,我发现一个错误。

示例数据:

lat1       lon1        lat2       lon2
33.58144   -57.73018   32.44873   -99.46281
25.46212   -46.62017   34.64971   -96.70271
39.97521   -80.27027   68.69710   -83.27182
42.74529   -73.73028   36.17318   -28.18201
import pandas as pd
import numpy as np
input_file = "input.csv"
output_file = "output.csv"
df = pd.read_csv(input_file)                       #Dataframe specification
df = df.convert_objects(convert_numeric = True)

def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

new_column = []                    #empty column for distance
for index,row in df.iterrows():
  lat1 = row['lat1'] #first row of location.lat column here
  lon1 = row['lon1'] #first row of location.long column here
  lat2 = row['lat2'] #second row of location.lat column here
  lon2 = row['lon2'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

df.insert(4,"Distance",new_column)  #4 is the index where you want to place your column. Column index starts with 0. "Distance" is the header and new_column are the values in the column.

with open(output_file,'ab') as f:
  df.to_csv(f,index = False)       #creates the output.csv
FutureWarning: convert_objects is deprecated.  To re-infer data dtypes for object columns, use DataFrame.infer_objects()
For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.
代码:

lat1       lon1        lat2       lon2
33.58144   -57.73018   32.44873   -99.46281
25.46212   -46.62017   34.64971   -96.70271
39.97521   -80.27027   68.69710   -83.27182
42.74529   -73.73028   36.17318   -28.18201
import pandas as pd
import numpy as np
input_file = "input.csv"
output_file = "output.csv"
df = pd.read_csv(input_file)                       #Dataframe specification
df = df.convert_objects(convert_numeric = True)

def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

new_column = []                    #empty column for distance
for index,row in df.iterrows():
  lat1 = row['lat1'] #first row of location.lat column here
  lon1 = row['lon1'] #first row of location.long column here
  lat2 = row['lat2'] #second row of location.lat column here
  lon2 = row['lon2'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

df.insert(4,"Distance",new_column)  #4 is the index where you want to place your column. Column index starts with 0. "Distance" is the header and new_column are the values in the column.

with open(output_file,'ab') as f:
  df.to_csv(f,index = False)       #creates the output.csv
FutureWarning: convert_objects is deprecated.  To re-infer data dtypes for object columns, use DataFrame.infer_objects()
For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.
输出:

lat1       lon1        lat2       lon2
33.58144   -57.73018   32.44873   -99.46281
25.46212   -46.62017   34.64971   -96.70271
39.97521   -80.27027   68.69710   -83.27182
42.74529   -73.73028   36.17318   -28.18201
import pandas as pd
import numpy as np
input_file = "input.csv"
output_file = "output.csv"
df = pd.read_csv(input_file)                       #Dataframe specification
df = df.convert_objects(convert_numeric = True)

def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

new_column = []                    #empty column for distance
for index,row in df.iterrows():
  lat1 = row['lat1'] #first row of location.lat column here
  lon1 = row['lon1'] #first row of location.long column here
  lat2 = row['lat2'] #second row of location.lat column here
  lon2 = row['lon2'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

df.insert(4,"Distance",new_column)  #4 is the index where you want to place your column. Column index starts with 0. "Distance" is the header and new_column are the values in the column.

with open(output_file,'ab') as f:
  df.to_csv(f,index = False)       #creates the output.csv
FutureWarning: convert_objects is deprecated.  To re-infer data dtypes for object columns, use DataFrame.infer_objects()
For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.
因此,在完成操作后,output.csv文件是一个单独的文件,其中包含前面的4列以及第5列(距离)。您可以使用for循环来执行此操作。我在这里展示的方法读取每一行并计算距离,然后将其附加到一个空列表中,该列表是新的列“distance”,并最终创建output.csv

错误:

lat1       lon1        lat2       lon2
33.58144   -57.73018   32.44873   -99.46281
25.46212   -46.62017   34.64971   -96.70271
39.97521   -80.27027   68.69710   -83.27182
42.74529   -73.73028   36.17318   -28.18201
import pandas as pd
import numpy as np
input_file = "input.csv"
output_file = "output.csv"
df = pd.read_csv(input_file)                       #Dataframe specification
df = df.convert_objects(convert_numeric = True)

def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

new_column = []                    #empty column for distance
for index,row in df.iterrows():
  lat1 = row['lat1'] #first row of location.lat column here
  lon1 = row['lon1'] #first row of location.long column here
  lat2 = row['lat2'] #second row of location.lat column here
  lon2 = row['lon2'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

df.insert(4,"Distance",new_column)  #4 is the index where you want to place your column. Column index starts with 0. "Distance" is the header and new_column are the values in the column.

with open(output_file,'ab') as f:
  df.to_csv(f,index = False)       #creates the output.csv
FutureWarning: convert_objects is deprecated.  To re-infer data dtypes for object columns, use DataFrame.infer_objects()
For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.
TypeError回溯(最近一次调用)
在里面
33
34打开(输出_文件,'ab')作为f:
--->35 df.to_csv(f,index=False)#创建output.csv
~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in to_csv(self、path或buf、sep、na_rep、float格式、列、标题、索引、索引标签、模式、编码、压缩、引号、行终止符、chunksize、tupleize cols、date格式、双引号、转义、十进制)
3018双引号=双引号,
3019 escapechar=escapechar,十进制=十进制)
->3020格式化程序。保存()
3021
3022如果路径_或_buf为无:
保存中的~/anaconda3/lib/python3.7/site-packages/pandas/io/formats/csvs.py(self)
170 self.writer=UnicodeWriter(f,**writer_-kwargs)
171
-->172自我保存()
173
174最后:
~/anaconda3/lib/python3.7/site-packages/pandas/io/formats/csvs.py in_save(self)
272 def_保存(自):
273
-->274 self._save_header()
275
276 nrows=len(自数据索引)
头文件中的~/anaconda3/lib/python3.7/site-packages/pandas/io/formats/csvs.py(self)
240如果没有_mi_列或有_别名:
241编码标签+=列表(写入列)
-->242 writer.writerow(编码的标签)
243其他:
244#写出mi
TypeError:需要类似字节的对象,而不是“str”
类似问题:

lat1       lon1        lat2       lon2
33.58144   -57.73018   32.44873   -99.46281
25.46212   -46.62017   34.64971   -96.70271
39.97521   -80.27027   68.69710   -83.27182
42.74529   -73.73028   36.17318   -28.18201
import pandas as pd
import numpy as np
input_file = "input.csv"
output_file = "output.csv"
df = pd.read_csv(input_file)                       #Dataframe specification
df = df.convert_objects(convert_numeric = True)

def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

new_column = []                    #empty column for distance
for index,row in df.iterrows():
  lat1 = row['lat1'] #first row of location.lat column here
  lon1 = row['lon1'] #first row of location.long column here
  lat2 = row['lat2'] #second row of location.lat column here
  lon2 = row['lon2'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

df.insert(4,"Distance",new_column)  #4 is the index where you want to place your column. Column index starts with 0. "Distance" is the header and new_column are the values in the column.

with open(output_file,'ab') as f:
  df.to_csv(f,index = False)       #creates the output.csv
FutureWarning: convert_objects is deprecated.  To re-infer data dtypes for object columns, use DataFrame.infer_objects()
For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.

您应该应用以下更正:

而不是
df=df.convert\u对象(convert\u numeric=True)
put
df[:]=df[:].apply(pd.to\u numeric,errors='converce')

同样,通过
将open(output_file,'ab')作为f:
打开文件是以二进制格式,而应该使用
将open(output_file,'w')作为f:


那么它应该可以工作。

也许我误解了,但是为什么不使用
df=pd.read\u csv(input\u file,delim\u whitespace=True)
而不使用包含
convert\u对象的下一行?