Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/python-3.x/15.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 每个循环打开新选项卡_Python_Python 3.x_Selenium_Selenium Webdriver - Fatal编程技术网

Python 每个循环打开新选项卡

Python 每个循环打开新选项卡,python,python-3.x,selenium,selenium-webdriver,Python,Python 3.x,Selenium,Selenium Webdriver,大家好,我是用python编码的新手,我有这段代码,用于从facebook用户那里刮取数据,但当我开始项目时,csv中的每一行都会在“新建”选项卡中打开 import requests from bs4 import BeautifulSoup from selenium import webdriver import pandas as pd import time def dataset(): dff = pd.read_csv(r"names.csv")

大家好,我是用python编码的新手,我有这段代码,用于从facebook用户那里刮取数据,但当我开始项目时,csv中的每一行都会在“新建”选项卡中打开

import requests 
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import time
def dataset():
    dff = pd.read_csv(r"names.csv")
    dff=dff[dff.name.str.contains("[a-z,A-z]")==True]
    dff=dff[dff.name.str.contains("[@,0-9,/,-]")==True]
    dname=pd.concat([dff]) 
    return dname['name']
def facebookscrap(user):
    sections = {
        'photo_url': {'src':'//div[@id="objects_container"]//a/img[@alt][1]'},
    }
    driver = webdriver.Firefox(executable_path='facebook/geckodriver')
    driver.get("https://www.facebook.com")
    element = driver.find_element_by_id("email")
    element.send_keys('email')
    element = driver.find_element_by_id("pass")
    element.send_keys('password')
    element = driver.find_element_by_id("loginbutton")
    element.click()
    h=[]
    lh=[]
    mkk=[]
    for username in [l]:
        try:
            driver.get("https://mbasic.facebook.com/" + username)
name=driver.find_element_by_xpath('/html/body/div/div/div[2]/div/div[1]/div[1]/div[2]/div[1]/span/div/span/strong')
            d = {'name': name.text}
            x = driver.find_element_by_xpath
            xs = driver.find_elements_by_xpath
            for k,v in sections.items():
                    try:
                        if 'src' in v:
                            d[str(k)] = x(v['src']).get_attribute('src')
                        elif 'txt' in v:
                            d[str(k)] = x(v['txt']).text
                        elif 'href' in v:
                            d[str(k)] = x(v['href']).get_attribute('href')[8:].split('?')[0]
                        elif 'table' in v:
                            d['details'] = []
                            rows = xs(v['table']+'td[1])')
                            for i in range (1, len(rows)+1):
                                deets_key = x(v['table']+'td[1])'+'['+str(i)+']').text
                                deets_val = x(v['table']+'td[2])'+'['+str(i)+']').text
                                d['details'].append({deets_key:deets_val})
                        elif 'workedu' in v:
                            d[str(k)] = []
                            base = v['workedu']
                            rows = xs(base)
                            for i in range (1, len(rows)+1):
                                dd = {}
                                dd['link'] = x(base+'['+str(i)+']'+'/div/div[1]//a').get_attribute('href')[8:].split('&')[0].split('/')[0]
                                dd['org'] = x(base+'['+str(i)+']'+'/div/div[1]//a').text
                                dd['lines'] = []
                                lines = xs(base+'['+str(i)+']'+'/div/div[1]/div')
                                for l in range (2, len(lines)+1):
                                    line = x(base+'['+str(i)+']'+'/div/div[1]/div'+'['+str(l)+']').text
                                    dd['lines'].append(line)
                                d[str(k)].append(dd)
                        elif 'fam' in v:
                            d[str(k)] = []
                            base = v['fam']
                            rows = xs(base)
                            for i in range (1, len(rows)+1):
                                d[str(k)].append({
                                    'name': x(base+'['+str(i)+']'+'//h3[1]').text,
                                    'rel': x(base+'['+str(i)+']'+'//h3[2]').text,
                                    'alias': x(base+'['+str(i)+']'+'//h3[1]/a').get_attribute('href')[8:].split('?')[0]
                                })
                        elif 'life_events' in k:
                            d[str(k)] = []
                            base = v['years']
                            years = xs(base)
                            for i in range (1,len(years)+1):
                                year = x(base+'['+str(i)+']'+'/div[1]').text
                                events = xs(base+'['+str(i)+']'+'/div/div/a')
                                for e in range(1,len(events)+1):
                                    event = x('('+base+'['+str(i)+']'+'/div/div/a)'+'['+str(e)+']')
                                    d[str(k)].append({
                                        'year': year,
                                        'title': event.text,
                                        'link': event.get_attribute('href')[8:].split('refid')[0]
                                    })
                    except Exception:
                        pass
            lh.append(d)
            info_str = ""
            for key in d.keys():
                    h=[]
                    info_str = info_str + key.upper()+": "
                    if type(d[key]) is list:
                        info_str += "\n"
                        for itm in d[key]:
                            if type(itm) is dict:
                                #print(itm)
                                for kff in itm.keys():
                                    info_str = info_str + "\t"+kff.upper()+": "+str(itm[kff])+"\n"
                    else:
                        info_str = info_str + d[key]+"\n"
                    h.append(info_str)
                    mkk.append(info_str)
        except:
            pass
    return(pd.DataFrame(lh))
def ifd():
    if(len(fbdb())==0):
        df['id']=[i for i in range(len(df))]
    else:
        df['id'] = [i+1 for i in range(max(fbdb().id),len(df)+max(fbdb().id))]
    return df['id']
def connectdb():
    from pymongo import MongoClient as client
    connect = client('mongodb://localhost:27017/')
    db=connect.osint
    return db
def update():   
        y= connectdb()
        fd=y['fund_facebook']
        import json
        records = json.loads(df.T.to_json()).values()
        for r in records:
            fd.insert(r)
def fbdb():
    y= connectdb()
    df=y['fund_facebook']
    k = []
    for x in df.find():
        k.append(x)
    df = pd.DataFrame(k)
    try:
        df=df.drop('_id',axis=1)
    except:
        pass
    return df
if __name__=='__main__':
    start = time.time()
    for i in dataset():
        df=facebookscrap(i)
        time.sleep(30)
        ifd()
        update()
所需输出 寻找问题在哪里 在selenium的同一选项卡中打开所有链接,如将链接复制到地址栏并将其打开 问题在哪里? 对不起,我的英语不好

for username in l


事实上,
“Hello world”.split()
等于
[“Hello”,“world”]
直接在字符串上迭代时,“Hello world”就像在
[“H”,“e”,“l”,“l”,“o”,“w”,“o”,“r”,“l”,“d”]
上迭代一样

CSV看起来像什么?
for username in l.split()