以vb6为例,获取给定的网址的外链的链接地址和链接文本,这里过滤了网站本身的网址,还有一些客服代码 也可以以常见的搜索引擎蜘蛛模拟访问 VB6需要引用 :在“项目”菜单上单击“引用“Microsoft VBScript Regular Expressions 5.5
Public Function HtmlGetLinks(url, zhizu) ' 从网址内获取外链地址 参数:网址,蜘蛛类型 'On Error Resume Next Dim str, reg, objMatches, key1 url2 = Replace(url, "http://", "") url = "http://" & url2 str = GetUrlHtmlUTF(url, "auto", zhizu) Set reg = New RegExp reg.IgnoreCase = True reg.Global = True reg.Pattern = "href=[""'s]?http://([^s]+)(.*?)b[""'s]?(.*?)>(.*?)</a>" Set objMatches = reg.Execute(str) If objMatches.Count > 0 Then For i = 0 To objMatches.Count - 1 url1 = objMatches(i).SubMatches(0) url1 = Replace(url1, """", "") url1 = Replace(url1, "'", "") Keyword = objMatches(i).SubMatches(3) Keyword = LCase(Keyword) Keyword = Replace(Keyword, "'", """") If InStr(Keyword, "src") > 0 Then Keyword = FindStrMulti(Keyword, "src=""", """", "") Keyword = "图片:" & Keyword Else Keyword = RemoveHTML(Keyword) End If If InStr(url1, url2) = 0 And InStr(Keyword, "wpa.qq.com") = 0 And InStr(url1, "51.la") = 0 And InStr(url1, "cnzz.com") = 0 And InStr(url1, "taobao.com") = 0 And InStr(url1, "beian.gov.cn") = 0 Then key1 = key1 & (Keyword & "|" & url1) & vbCrLf End If DoEvents Next End If HtmlGetLinks = key1 End Function
Public Function GetUrlHtmlUTF(url, CodeBase, zhizu) Dim xmlHTTP1, GetCode, GetCode1 Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP") xmlHTTP1.Open "get", url, True If zhizu <> "" Then Select Case zhizu Case "baidu" xmlHTTP1.setRequestHeader "UserAgent:", "Baiduspider+(+http://www.baidu.com/search/spider.htm)" Case "google" xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" Case "yahoo" xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" Case "yahoo" xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" Case "youdao" xmlHTTP1.setRequestHeader "UserAgent:", "Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; )" Case "soso" xmlHTTP1.setRequestHeader "UserAgent:", "Sosospider+(+http://help.soso.com/webspider.htm)" Case "sogou" xmlHTTP1.setRequestHeader "UserAgent:", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)" End Select End If xmlHTTP1.send While xmlHTTP1.ReadyState <> 4 DoEvents Wend GetCode = xmlHTTP1.ResponseBody If CodeBase = "auto" Then GetCode1 = StrConv(Left(GetCode, 500), vbUnicode) If InStr(GetCode1, "charset=gb2312") > 0 Then GetCode = StrConv(GetCode, vbUnicode) Else If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, "UTF-8") End If Else If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase) End If GetUrlHtmlUTF = GetCode Set ObjXML = Nothing End Function (编辑:晋中站长网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|