黄色a级一级片,国产精品特级片,免费在线观看黄网站

本文實例講述了C#.Net基于正則表達式抓取百度百家文章列表的方法。分享給大家供大家參考，具體如下：

工作之余，學習了一下正則表達式，鑒于實踐是檢驗真理的唯一標準，于是便寫了一個利用正則表達式抓取百度百家文章的例子，具體過程請看下面源碼：

一、獲取百度百家網頁內容

				?

									public List<string[]> GetUrl()

									{

									  try

									  {

									    string url = "http://baijia.baidu.com/";

									    WebRequest webRequest = WebRequest.Create(url);

									    WebResponse webResponse = webRequest.GetResponse();

									    StreamReader reader = new StreamReader(webResponse.GetResponseStream());

									    string result = reader.ReadToEnd();

									    reader.Close();

									    webResponse.Close();

									    return AnalysisHtml(result);

									  }

									  catch (Exception ex)

									  {

									    throw ex;

									  }

									}

二、通過正則表達式篩選

				?

									public List<string[]> AnalysisHtml(string htmlContent)

									{

									  List<string[]> list = new List<string[]>();

									  string strPattern = "<h3><a\\s*.*>(?<Title>[^<]+)</a></h3>.*\\s*<p\\s*class=\"feeds-item-text\">(?<Abstract>[^<]+)<a\\s*href=\"(?<Url>.*)\"\\s*target=\"_blank\"\\s*class=\"feeds-item-more\"\\s*mon=\".*\\s*\">.*\\s*</a></p>";

									  Regex regex = new Regex(strPattern, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant);

									  if (regex.IsMatch(htmlContent))

									  {

									    MatchCollection matchCollection = regex.Matches(htmlContent);

									    foreach (Match match in matchCollection)

									    {

									      string[] str = new string[3];

									      str[0] = match.Groups[1].Value;//獲取到的是列表數據的標題

									      str[1] = match.Groups[2].Value;//獲取到的是內容

									      str[2] = match.Groups[3].Value;//獲取到的是鏈接到的地址

									      list.Add(str);

									    }

									  }

									  return list;

									}