Curl 如何卷曲需要首先单击同意按钮的页面?

Curl 如何卷曲需要首先单击同意按钮的页面?,curl,web-scraping,cookies,puppeteer,wget,Curl,Web Scraping,Cookies,Puppeteer,Wget,我喜欢像这样卷曲URL。(当您没有网站cookie时,URL需要欧盟国家/地区的同意。) 我拼凑了一个木偶剧剧本来做这件事,但在我看来它很重而且很脆弱。有更好的解决办法吗 #!/usr/bin/env node const url = process.argv[2]; const puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch() const page

我喜欢像这样卷曲URL。(当您没有网站cookie时,URL需要欧盟国家/地区的同意。)

我拼凑了一个木偶剧剧本来做这件事,但在我看来它很重而且很脆弱。有更好的解决办法吗

#!/usr/bin/env node

const url = process.argv[2];
const puppeteer = require('puppeteer');
(async () => {
  const browser = await puppeteer.launch()
  const page = await browser.newPage()


  await page.goto(url)

  await page.waitForSelector('.con-wizard > .wizard-body > #consent-text > .content-list > .list-item:nth-child(1)')
  await page.click('.con-wizard > .wizard-body > #consent-text > .content-list > .list-item:nth-child(1)')

  await page.waitForSelector('.con-wizard > .wizard-body > .actions > .consent-form > .primary')
  await page.click('.con-wizard > .wizard-body > .actions > .consent-form > .primary')


    const timeout = ((process.env.cfTimeout) || 20) * 1000

    await page.waitFor(timeout);
    const html = await page.content();
    console.log(html);

  await browser.close()
})()

通过监视Chrome的网络窗格中的请求,我成功地使用plain curl实现了这一点:

function techcrunch-curl() {
    local url="${1:?}"
    local con="$(curl -o /dev/null -w %{url_effective} $url)"
    if [[ "$con" =~ 'https://consent\.yahoo\.com/v2/collectConsent\?sessionId=(.*)' ]] ; then
        local sid="${match[1]}"

        curl -o /dev/stdout --fail --location --cookie-jar =() 'https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
            -H 'Connection: keep-alive' \
            -H 'Pragma: no-cache' \
            -H 'Cache-Control: no-cache' \
            -H 'Origin: https://consent.yahoo.com' \
            -H 'Upgrade-Insecure-Requests: 1' \
            -H 'DNT: 1' \
            -H 'Content-Type: application/x-www-form-urlencoded' \
            -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36' \
            -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
            -H 'Sec-Fetch-Site: same-origin' \
            -H 'Sec-Fetch-Mode: navigate' \
            -H 'Sec-Fetch-User: ?1' \
            -H 'Sec-Fetch-Dest: document' \
            -H 'Referer: https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
            -H 'Accept-Language: en-US,en;q=0.9' \
            --data-raw 'sessionId='$sid'&originalDoneUrl='"$(<<<$url url-encode.py)"'&namespace=techcrunch&agree=agree&agree=agree' \
            --compressed
    else
        curl $url
    fi
}
函数techcrunch-curl(){
本地url=“${1:?}”
local con=“$(curl-o/dev/null-w%{url\u effective}$url)”
如果[[“$con”=~”https://consent\.yahoo\.com/v2/collectApprove\?sessionId=(.*)];然后
本地sid=“${match[1]}”
curl-o/dev/stdout--fail--location--cookie jar=()'https://consent.yahoo.com/v2/collectConsent?sessionId=“$sid\
-H'连接:保持活力'\
-布拉格马:没有缓存\
-H'缓存控制:无缓存'\
-H'来源:https://consent.yahoo.com' \
-H'升级不安全请求:1'\
-H'DNT:1'\
-H'内容类型:应用程序/x-www-form-urlencoded'\
-H'用户代理:Mozilla/5.0(Macintosh;英特尔Mac OS X 10_15_0)AppleWebKit/537.36(KHTML,如Gecko)Chrome/86.0.4240.75 Safari/537.36'\
-H'接受:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed exchange;v=b3;q=0.9'\
-H'秒获取站点:相同来源'\
-H'秒提取模式:导航'\
-H'秒获取用户:?1'\
-H'秒获取目的地:文档'\
-H’Referer:https://consent.yahoo.com/v2/collectConsent?sessionId=“$sid\
-H'接受语言:en-US,en;q=0.9'\

--数据原始'sessionId='$sid'&originalDoneUrl=''“$(I通过监视Chrome的网络窗格中的请求,成功地使用纯curl实现了这一点:

function techcrunch-curl() {
    local url="${1:?}"
    local con="$(curl -o /dev/null -w %{url_effective} $url)"
    if [[ "$con" =~ 'https://consent\.yahoo\.com/v2/collectConsent\?sessionId=(.*)' ]] ; then
        local sid="${match[1]}"

        curl -o /dev/stdout --fail --location --cookie-jar =() 'https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
            -H 'Connection: keep-alive' \
            -H 'Pragma: no-cache' \
            -H 'Cache-Control: no-cache' \
            -H 'Origin: https://consent.yahoo.com' \
            -H 'Upgrade-Insecure-Requests: 1' \
            -H 'DNT: 1' \
            -H 'Content-Type: application/x-www-form-urlencoded' \
            -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36' \
            -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
            -H 'Sec-Fetch-Site: same-origin' \
            -H 'Sec-Fetch-Mode: navigate' \
            -H 'Sec-Fetch-User: ?1' \
            -H 'Sec-Fetch-Dest: document' \
            -H 'Referer: https://consent.yahoo.com/v2/collectConsent?sessionId='$sid \
            -H 'Accept-Language: en-US,en;q=0.9' \
            --data-raw 'sessionId='$sid'&originalDoneUrl='"$(<<<$url url-encode.py)"'&namespace=techcrunch&agree=agree&agree=agree' \
            --compressed
    else
        curl $url
    fi
}
函数techcrunch-curl(){
本地url=“${1:?}”
local con=“$(curl-o/dev/null-w%{url\u effective}$url)”
如果[[“$con”=~”https://consent\.yahoo\.com/v2/collectApprove\?sessionId=(.*)];然后
本地sid=“${match[1]}”
curl-o/dev/stdout--fail--location--cookie jar=()'https://consent.yahoo.com/v2/collectConsent?sessionId=“$sid\
-H'连接:保持活力'\
-布拉格马:没有缓存\
-H'缓存控制:无缓存'\
-H'来源:https://consent.yahoo.com' \
-H'升级不安全请求:1'\
-H'DNT:1'\
-H'内容类型:应用程序/x-www-form-urlencoded'\
-H'用户代理:Mozilla/5.0(Macintosh;英特尔Mac OS X 10_15_0)AppleWebKit/537.36(KHTML,如Gecko)Chrome/86.0.4240.75 Safari/537.36'\
-H'接受:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed exchange;v=b3;q=0.9'\
-H'秒获取站点:相同来源'\
-H'秒提取模式:导航'\
-H'秒获取用户:?1'\
-H'秒获取目的地:文档'\
-H’Referer:https://consent.yahoo.com/v2/collectConsent?sessionId=“$sid\
-H'接受语言:en-US,en;q=0.9'\
--原始数据'sessionId='$sid'&originalDoneUrl=''$(