代码语言
.
CSharp
.
JS
Java
Asp.Net
C
MSSQL
PHP
Css
PLSQL
Python
Shell
EBS
ASP
Perl
ObjC
VB.Net
VBS
MYSQL
GO
Delphi
AS
DB2
Domino
Rails
ActionScript
Scala
代码分类
文件
系统
字符串
数据库
网络相关
图形/GUI
多媒体
算法
游戏
Jquery
Extjs
Android
HTML5
菜单
网页交互
WinForm
控件
企业应用
安全与加密
脚本/批处理
开放平台
其它
【
CSharp
】
获取网页标题title的代码
作者:
Dezai.CN
/ 发布于
2011/9/21
/
663
<div></div> using System; using System.Net; using System.Collections; using System.ComponentModel; using System.Data; using System.Drawing; using System.Web; using System.Web.SessionState; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.UI.HtmlControls; using System.Data; using System.Data.OleDb; using System.IO; using System.Text; using System.Globalization; using System.Text.RegularExpressions; namespace GETURL ...{ public class getText : System.Web.UI.Page ...{ protected System.Web.UI.WebControls.Label label_mess; protected System.Web.UI.WebControls.Label content; protected System.Web.UI.WebControls.Label MetaUrl; protected System.Web.UI.WebControls.Label GetTitle; protected System.Web.UI.WebControls.Label label_Title; protected System.Web.UI.WebControls.Label startSpider; protected System.Web.UI.WebControls.TextBox searchurl; protected System.Web.UI.WebControls.Button urlButton; protected System.Web.UI.WebControls.Label txtsiteurl; private void Page_Load(Object sender,EventArgs e) ...{ if(Request.QueryString["url"]!=null && Request.QueryString["url"].ToString()!="") ...{ searchurl.Text=Request.QueryString["url"].ToString(); getHTTP(sender,e); } } public void getHTTP(Object sender,EventArgs e) ...{ startSpider.Text=""; GetTitle.Text=""; MetaUrl.Text=""; //string host_all_Path=Request.ServerVariables["PATH_TRANSLATED"]; //string host_aPath=Request.ServerVariables["PATH_INFO"]; //string host_IP=Request.ServerVariables["REMOTE_ADDR"]; //string host_Host_IP=Request.ServerVariables["LOCAL_ADDR"]; //string host_NAME=Request.ServerVariables["SERVER_NAME"]; if(searchurl.Text!="") ...{ txtsiteurl.Text="--spider url'Result!"; string aUrl; aUrl=searchurl.Text; string[] txtspiderurl=aUrl.Split('/'); if(txtspiderurl[0].ToString()!="http:") ...{ label_mess.Text="<hr>Url form must match'http://' ahead!!"; content.Text=""; } else ...{ if(txtspiderurl.Length>=3) ...{ //=========================url spider start================== if(txtspiderurl[2].ToString()!="" && txtspiderurl[1].ToString()=="") ...{ string description; string keywords; string title_site; //about the url to spider the hostname //and his IP_ADDR //and the the url's Title //the url's description,and the keywords. //and last to show the whole site's contents. string hosturl=txtspiderurl[2].ToString(); string defaultLanguage; //defaultLanguage=Request.ServerVariables["REMOTE_ADDR"]; //startSpider.Text="d: "+defaultLanguage; HttpWebRequest myReq=(HttpWebRequest)WebRequest.Create(aUrl); //(([a-z0-9A-Z]|/-)+/.)+[a-z0-9A-Z]{1,4}$ Regex match_hosturl=new Regex("^(/w+(-/w+)*)(/.(/w+(-/w+)*))*(/?/S*)?(|/.)$"); try ...{ if(match_hosturl.IsMatch(hosturl)) ...{ label_mess.Text="<hr>You spidering the site:<font color=red size='3'>"+aUrl+"</font>"; //-------------------------------look the domain is formed by NUMERIC----- Regex a_UrlRegex=new Regex(@"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"); if(a_UrlRegex.IsMatch(hosturl)) ...{ IPHostEntry IP_To_HostName=Dns.GetHostByAddress(hosturl); content.Text="Host Name: <font color=red size='3'> "+IP_To_HostName.HostName+"</font><hr color='red'>"; } else ...{ string s=""; content.Text+="Host domain: <font color=red size='3'> "+hosturl+"</font><hr color='red'>"; System.Net.IPAddress[] addressList=Dns.GetHostByName(hosturl).AddressList; for (int i = 0; i < addressList.Length; i ++) ...{ s += addressList[i].ToString(); } content.Text+="Host IP: <font color=red size=3>"+s+"</font><hr color=red>"; } //---------------------------------end--the --program application-------------------------------------- HttpWebResponse urlHttpWebRes=(HttpWebResponse)myReq.GetResponse(); myReq.ContentType="application/x-www-form-urlencoded"; Stream urlStream=urlHttpWebRes.GetResponseStream(); Encoding encode=System.Text.Encoding.GetEncoding("utf-8"); StreamReader urlStreamReader; //Encoding.Default if (a_UrlRegex.IsMatch(hosturl)) ...{ if(hosturl=="127.0.0.1" || hosturl=="172.19.23.14") ...{ if (aUrl.IndexOf("?")!=-1) ...{ string[] getStringAhead=aUrl.Split('?'); string[] getStringHtm=getStringAhead[0].ToString().Split('.'); if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html") ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,encode); } } else ...{ string[] getStringHtm=aUrl.Split('.'); if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html") ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,encode); } } //urlStreamReader=new StreamReader(urlStream,Encoding.Default); } else ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } } //The a to z URL else ...{ if(hosturl.ToLower()=="microsoft.com" || hosturl.ToLower()=="<a href="http://www.microsoft.com">www.microsoft.com</a>") ...{ urlStreamReader=new StreamReader(urlStream,encode); } else ...{ urlStreamReader=new StreamReader(urlStream,Encoding.Default); } } //The a to z URL End StringBuilder urlContent=new StringBuilder(); Char[] getChar=new Char[256]; int Count=urlStreamReader.Read(getChar,0,256); while (Count>0) ...{ String line=new String(getChar,0,Count); urlContent.Append(line); Count=urlStreamReader.Read(getChar,0,256); //Regex getTitle=new Regex(@"<title>",RegexOptions.IgnoreCase); //Regex endTitle=new Regex(@"</title>",RegexOptions.IgnoreCase); //***********************************Get the Site's Title Name*******Start******************************************* //*****************************************Writed 2005-5-19*******Author WeisNet System Information************************ Regex title_aReg=new Regex(@"<title"); Regex title_bReg=new Regex(@"</title"); int thefirstTitleIndexOf; String urlContentString; String urlContentStringToLower; urlContentString=urlContent.ToString(); urlContentStringToLower=urlContentString.ToLower(); if(title_aReg.IsMatch(urlContentStringToLower) && title_bReg.IsMatch(urlContentStringToLower)) ...{ thefirstTitleIndexOf=urlContentStringToLower.IndexOf("title"); int thelastTitleIndexOf; thelastTitleIndexOf=urlContentStringToLower.IndexOf("/title"); //**************Check first title left is have space *********88 string testIfthisIsTagleft; string getStringfromfirstTitle; //TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____ getStringfromfirstTitle=urlContentString.Substring(thefirstTitleIndexOf); testIfthisIsTagleft=getStringfromfirstTitle.Substring(5,1); //**************Check first title left is have space ***End****** if(testIfthisIsTagleft==">") ...{ int getTitleNum; getTitleNum=thelastTitleIndexOf-1-thefirstTitleIndexOf-6; string getTitleName; //TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____ getTitleName=urlContentString.Substring(thefirstTitleIndexOf+6,getTitleNum); GetTitle.Text="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length/1024+"KB<hr color=red>"; } else ...{ int getTitleNum; int isHaveSpacetitlerightTagNum; int lastTitlefromElseNum; isHaveSpacetitlerightTagNum=getStringfromfirstTitle.IndexOf(">"); lastTitlefromElseNum=getStringfromfirstTitle.IndexOf("</title"); getTitleNum=lastTitlefromElseNum-isHaveSpacetitlerightTagNum-1; string getTitleName; getTitleName=getStringfromfirstTitle.Substring(isHaveSpacetitlerightTagNum+1,getTitleNum); GetTitle.Text="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length%1024+"KB<hr color=red>"; } } else ...{ GetTitle.Text="No Title Document!"; } //***************************************Get the Site's Title Name*****End***2005-8-13*********************************** //****************************************Get The Site's Charset Code*******Start******2005-5-23***************************** string getCharsetValue; String bString; bString=urlContent.ToString(); String toLowerString; toLowerString=bString.ToLower(); Regex meta_first_last=new Regex("<meta"); int getFirstMetaNum; int getLastMetaNum; if(meta_first_last.IsMatch(toLowerString)) ...{ getFirstMetaNum=toLowerString.IndexOf("<meta"); getLastMetaNum=toLowerString.LastIndexOf("<meta"); string getStringFromLastMeta; getStringFromLastMeta=toLowerString.Substring(getLastMetaNum+1); int getLastMetaleftTagNum; getLastMetaleftTagNum=getStringFromLastMeta.IndexOf(">"); string MetaAllString; //OK WeisNet 0XDCNDIIGMT 08:00IKABIDKF2005-9-20 16:26:37JWEISNET__ MetaAllString=bString.Substring(getFirstMetaNum,getLastMetaleftTagNum+2+getLastMetaNum-getFirstMetaNum); string formatMetaStringNoSpace; formatMetaStringNoSpace=Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(MetaAllString," +",""),"<meta","<meta "),"http-equiv"," http-equiv"),"content"," content")," content-type","content-type")," +"," "); if (formatMetaStringNoSpace.IndexOf("http-equiv=")!=-1 && formatMetaStringNoSpace.IndexOf("content=")!=-1) MetaUrl.Text="OK"; else MetaUrl.Text="NO"; } else ...{ MetaUrl.Text="Not Charset Code"; } //MetaUrl.Text="ok"; //***************************************Get The Site's CharsetCode****End************************************************** } //startSpider.Text="<hr>content: " + urlContent.ToString()+"<hr color='red'>"; urlStreamReader.Close(); urlStream.Close(); urlHttpWebRes.Close(); } else ...{ label_mess.Text=""; content.Text="<hr>Host domain Wrong!"; } } catch(ArgumentException exp) //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-----Start----> ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); //content.Text="The Urls Spider has Time Out!,Try Again..."; //startSpider.Text="Or you Check the Url is true you want Test!"; } catch(WebException exp) ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); Console.WriteLine(exp.Status); //content.Text="The Urls Spider has Time Out!,Try Again..."; //startSpider.Text="Or you Check the Url is true you want Test!"; } catch(Exception exp) ...{ Console.WriteLine("The Urls Spider has Time Out!,Try Again..."); Console.WriteLine(exp.Message); //content.Text="The Urls Spider has Time Out!,Try Again..."; //startSpider.Text="Or you Check the Url is true you want Test!"; } //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-------------------------------End------> //===============================================Url spider Ending===================================================== } else ...{ label_mess.Text="<hr>Host Domain must Exists!!!"; content.Text=""; } } else ...{ label_mess.Text="<hr>Please write the whole url text!"; content.Text=""; } } } else ...{ label_mess.Text="<hr>Please enter the URL"; content.Text=""; } } //=========================Ending Application======================== WeisNet WebTools#region WeisNet WebTools override protected void OnInit(EventArgs e) ...{ InitializeComponent(); base.OnInit(e); } private void InitializeComponent() ...{ this.Load += new System.EventHandler(this.Page_Load); } #endregion } } /**//*---------------------------------------------------Power By WeisNet System-----------------------------------------------*
试试其它关键字
获取网页标题
同语言下
.
文件IO 操作类库
.
Check图片类型[JPEG(.jpg 、.jpeg),TIF,GIF,BMP,PNG,P
.
机器名和IP取得(IPV4 IPV6)
.
Tiff转换Bitmap
.
linqHelper
.
MadieHelper.cs
.
RegHelper.cs
.
如果关闭一个窗体后激活另一个窗体的事件或方法
.
创建日志通用类
.
串口辅助开发类
可能有用的
.
C#实现的html内容截取
.
List 切割成几份 工具类
.
SQL查询 多列合并成一行用逗号隔开
.
一行一行读取txt的内容
.
C#动态修改文件夹名称(FSO实现,不移动文件)
.
c# 移动文件或文件夹
.
c#图片添加水印
.
Java PDF转换成图片并输出给前台展示
.
网站后台修改图片尺寸代码
.
处理大图片在缩略图时的展示
Dezai.CN
贡献的其它代码
(
4037
)
.
多线程Socket服务器模块
.
生成随机密码
.
清除浮动样式
.
弹出窗口居中
.
抓取url的函数
.
使用base HTTP验证
.
div模拟iframe嵌入效果
.
通过header转向的方法
.
Session操作类
.
执行sqlite输入插入操作后获得自动编号的ID
Copyright © 2004 - 2024 dezai.cn. All Rights Reserved
站长博客
粤ICP备13059550号-3