Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/swift/18.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Swift 使用regex或alternative快速抓取网页_Swift_Regex_Web Scraping_Swift3 - Fatal编程技术网

Swift 使用regex或alternative快速抓取网页

Swift 使用regex或alternative快速抓取网页,swift,regex,web-scraping,swift3,Swift,Regex,Web Scraping,Swift3,先看下面的更新 我正试图为reddit上指定的子reddit刮取所有版主。 API只允许您获取sub-reddit的所有版主用户名,因此最初我已经获取了所有这些用户名,然后对每个配置文件执行了额外的请求,以获取化身url。这最终超过了API限制 因此,我只想获取以下页面的源代码,并在收集每个页面上的10个用户名和头像url的同时进行分页。这最终将以更少的请求轮询网站。我了解如何做分页部分,但现在我试图了解如何收集用户名和相邻的化身URL 因此,请使用以下url: https://www.redd

先看下面的更新

我正试图为reddit上指定的子reddit刮取所有版主。 API只允许您获取sub-reddit的所有版主用户名,因此最初我已经获取了所有这些用户名,然后对每个配置文件执行了额外的请求,以获取化身url。这最终超过了API限制

因此,我只想获取以下页面的源代码,并在收集每个页面上的10个用户名和头像url的同时进行分页。这最终将以更少的请求轮询网站。我了解如何做分页部分,但现在我试图了解如何收集用户名和相邻的化身URL

因此,请使用以下url:

https://www.reddit.com/r/videos/about/moderators/
所以我会调出整个页面的源代码

将所有mods用户名和url添加到mod对象中,然后添加到数组中

在我得到的字符串上使用正则表达式是个好主意吗

这是到目前为止我的代码,任何帮助都会很好:

    func tester() {
       let url = URL(string: "https://www.reddit.com/r/videos/about/moderators")!

       let task = URLSession.shared.dataTask(with: url) { data, response, error in
           guard let data = data, error == nil else {
               print("\(error)")
               return
           }

        let string = String(data: data, encoding: .utf8)

            let regexUsernames = try? NSRegularExpression(pattern: "href=\"/user/[a-z0-9]\"", options: .caseInsensitive)

            var results = regexUsernames?.matches(in: string as String, options: [], range: NSRange(location: 0, length: string.length))

            let regexProfileURLs = try? NSRegularExpression(pattern: "><img src=\"[a-z0-9]\" style", options: .caseInsensitive)

           print("\(results)") // This shows as empty array
       }

       task.resume()
   }

通过调用简单的正则表达式,您应该能够将所有名称和URL拉入两个单独的数组,方法如下:

func tester() {
    let url = URL(string: "https://www.reddit.com/r/pics/about/moderators")!

    let task = URLSession.shared.dataTask(with: url) { data, response, error in
        guard let data = data, error == nil else { return }
        guard let htmlString = String(data: data, encoding: .utf8) else { return }

        let names = htmlString.matching(regex: "href=\"/user/(.*?)\"")
        let imageUrls = htmlString.matching(regex: "><img src=\"(.*?)\" style")
        print(names)
        print(imageUrls)
    }
    task.resume()
}

extension String {
    func matching(regex: String) -> [String] {
        guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
        let result  = regex.matches(in: self, options: [], range: NSMakeRange(0, self.count))
        return result.map {
            return String(self[Range($0.range, in: self)!])
        }
    }
}
func测试仪(){
让url=url(字符串:https://www.reddit.com/r/pics/about/moderators")!
让task=URLSession.shared.dataTask(with:url){data,response,中的错误
guard let data=data,error==nil else{return}
guard let htmlString=String(数据:数据,编码:.utf8)else{return}
让names=htmlString.matching(regex:“href=\”/user/(.*)\“”)
让imageUrls=htmlString.matching(regex:“>[String]{
guard let regex=try?NSRegularExpression(模式:regex,选项:[])否则{return[]}
让result=regex.matches(in:self,选项:[],范围:NSMakeRange(0,self.count))
返回result.map{
返回字符串(self[Range($0.Range,in:self)!)
}
}
}

您可以为每个
创建一个对象,然后根据需要获取要使用的名称和URL。

您应该能够通过调用一个简单的正则表达式将所有名称和URL拉入两个单独的数组,方法如下:

func tester() {
    let url = URL(string: "https://www.reddit.com/r/pics/about/moderators")!

    let task = URLSession.shared.dataTask(with: url) { data, response, error in
        guard let data = data, error == nil else { return }
        guard let htmlString = String(data: data, encoding: .utf8) else { return }

        let names = htmlString.matching(regex: "href=\"/user/(.*?)\"")
        let imageUrls = htmlString.matching(regex: "><img src=\"(.*?)\" style")
        print(names)
        print(imageUrls)
    }
    task.resume()
}

extension String {
    func matching(regex: String) -> [String] {
        guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
        let result  = regex.matches(in: self, options: [], range: NSMakeRange(0, self.count))
        return result.map {
            return String(self[Range($0.range, in: self)!])
        }
    }
}
func测试仪(){
让url=url(字符串:https://www.reddit.com/r/pics/about/moderators")!
让task=URLSession.shared.dataTask(with:url){data,response,中的错误
guard let data=data,error==nil else{return}
guard let htmlString=String(数据:数据,编码:.utf8)else{return}
让names=htmlString.matching(regex:“href=\”/user/(.*)\“”)
让imageUrls=htmlString.matching(regex:“>[String]{
guard let regex=try?NSRegularExpression(模式:regex,选项:[])否则{return[]}
让result=regex.matches(in:self,选项:[],范围:NSMakeRange(0,self.count))
返回result.map{
返回字符串(self[Range($0.Range,in:self)!)
}
}
}

您可以为每个
创建一个对象,然后根据需要获取要使用的名称和URL。

使用第三方库不是更好,而不是重新发明轮子吗?我找不到一个比大约2-3年更新的好对象。而且代码中的用户和化身也没有y div ID所以我不知道插件如何帮助以一种好的方式获取数据?ID似乎是随机的,在重新加载时重新生成。当然希望被证明是错误的!使用第三方库而不是重新发明轮子不是更好吗?我找不到一个比大约2-3年更新的好的。还有代码中的用户和虚拟角色没有任何div id,因此我不认为插件可以很好地帮助获取数据?id似乎是随机的,在重新加载时重新生成。当然,我希望被证明是错误的!
func tester() {
       let url = URL(string: "https://www.reddit.com/r/pics/about/moderators")!

       let task = URLSession.shared.dataTask(with: url) { data, response, error in
           guard let data = data, error == nil else {
            print("data was nil")
               return
           }

        guard let htmlString = String(data: data, encoding: .utf8) else {
            print("cannot cast data into string")
            return
        }


        let counter =  htmlString.components(separatedBy:"href=\"/user/")
        let count = counter.count

        for  i in 0...count {

            let leftSideOfUsernameValue = "href=\"/user/"
            let rightSideOfUsernameValue = "\""

            let leftSideOfAvatarURLValue = "><img src=\""
            let rightSideOfAvatarURLValue = "\">"


          guard let leftRange = htmlString.range(of: leftSideOfUsernameValue) else {
                print("cannot find range left")
                return
            }

            guard let rightRange = htmlString.range(of: rightSideOfUsernameValue) else {
                print("cannot find range right")
                return
            }

            let username = htmlString.slice(from: leftSideOfUsernameValue, to: rightSideOfUsernameValue)
            print(username)
            guard let avatarURL = htmlString.slice(from: leftSideOfAvatarURLValue, to: rightSideOfAvatarURLValue) else {
                print("Error")
                return
            }
            print(avatarURL)

        }

       }

       task.resume()
   }
           let endString = String(avatarURL + rightSideOfAvatarURLValue)
            let endIndex = htmlString.index(endString.endIndex, offsetBy: 0)
            let substringer = htmlString[endIndex...]
            htmlString = String(substringer)
func tester() {
    let url = URL(string: "https://www.reddit.com/r/pics/about/moderators")!

    let task = URLSession.shared.dataTask(with: url) { data, response, error in
        guard let data = data, error == nil else { return }
        guard let htmlString = String(data: data, encoding: .utf8) else { return }

        let names = htmlString.matching(regex: "href=\"/user/(.*?)\"")
        let imageUrls = htmlString.matching(regex: "><img src=\"(.*?)\" style")
        print(names)
        print(imageUrls)
    }
    task.resume()
}

extension String {
    func matching(regex: String) -> [String] {
        guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
        let result  = regex.matches(in: self, options: [], range: NSMakeRange(0, self.count))
        return result.map {
            return String(self[Range($0.range, in: self)!])
        }
    }
}