搜索网络,发现C#通常有三种方法获取网页内容,使用WebClient、WebBrowser或者HttpWebRequest/HttpWebResponse。。。
方法一:使用WebClient (引用自:http://fbljava.blog.163.com/blog/static/265211742008712105145244/)
static void Main(string[] args) | |
{ | |
try { | |
WebClient MyWebClient = new WebClient(); | |
MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据 | |
Byte[] pageData = MyWebClient.DownloadData(“http://www.163.com”); //从指定网站下载数据 | |
string pageHtml = Encoding.Default.GetString(pageData); //如果获取网站页面采用的是GB2312,则使用这句 | |
//string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 | |
Console.WriteLine(pageHtml);//在控制台输入获取的内容 | |
using (StreamWriter sw = new StreamWriter("c:\\test\\ouput.html"))//将获取的内容写入文本 | |
{ | |
sw.Write(pageHtml); | |
} | |
Console.ReadLine(); //让控制台暂停,否则一闪而过了 | |
} | |
catch(WebException webEx) { | |
Console.WriteLine(webEx.Message.ToString()); | |
} | |
} |
方法二:使用WebBrowser (引用自:http://topic.csdn.net/u/20091225/14/4ea221cd-4c1e-4931-a6db-1fd4ee7398ef.html)
WebBrowser web = new WebBrowser(); | |
web.Navigate("http://www.xjflcp.com/ssc/"); | |
web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted); | |
void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) | |
{ | |
WebBrowser web = (WebBrowser)sender; | |
HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table"); | |
foreach (HtmlElement item in ElementCollection) | |
{ | |
File.AppendAllText("Kaijiang_xj.txt", item.InnerText); | |
} | |
} |
方法三:使用HttpWebRequest/HttpWebResponse (引用自:http://hi.baidu.com/onlyafar/blog/item/7ac4c6bf92d4810019d81f98.html)
HttpWebRequest httpReq; | |
HttpWebResponse httpResp; | |
string strBuff = ""; | |
char[] cbuffer = new char[256]; | |
int byteRead = 0; | |
string filename = @"c:\log.txt"; | |
///定义写入流操作 | |
public void WriteStream() | |
{ | |
Uri httpURL = new Uri(txtURL.Text); | |
///HttpWebRequest类继承于WebRequest,并没有自己的构造函数,需通过WebRequest的Creat方法 建立,并进行强制的类型转换 | |
httpReq = (HttpWebRequest)WebRequest.Create(httpURL); | |
///通过HttpWebRequest的GetResponse()方法建立HttpWebResponse,强制类型转换 | |
httpResp = (HttpWebResponse) httpReq.GetResponse(); | |
///GetResponseStream()方法获取HTTP响应的数据流,并尝试取得URL中所指定的网页内容 | |
///若成功取得网页的内容,则以System.IO.Stream形式返回,若失败则产生ProtoclViolationException错 误。在此正确的做法应将以下的代码放到一个try块中处理。这里简单处理 | |
Stream respStream = httpResp.GetResponseStream(); | |
///返回的内容是Stream形式的,所以可以利用StreamReader类获取GetResponseStream的内容,并以 | |
StreamReader类的Read方法依次读取网页源程序代码每一行的内容,直至行尾(读取的编码格式:UTF8) | |
StreamReader respStreamReader = new StreamReader(respStream,Encoding.UTF8); | |
byteRead = respStreamReader.Read(cbuffer,0,256); | |
while (byteRead != 0) | |
{ | |
string strResp = new string(cbuffer,0,byteRead); | |
strBuff = strBuff + strResp; | |
byteRead = respStreamReader.Read(cbuffer,0,256); | |
} | |
respStream.Close(); | |
txtHTML.Text = strBuff; | |
} |