c#使用正則表達式替換html標簽

Blake36Q 9年前發布 | 839 次閱讀 C# c# 正則表達式
using System.Text.RegularExpressions;     //包含正則表達式   

public static string NoHTML(string Htmlstring) //去除HTML標記   
{   
  //刪除腳本   
  Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);   
  //刪除HTML   
  Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"([/r/n])[/s]+", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);   

  Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "/"", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "/xa1", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "/xa2", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "/xa3", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "/xa9", RegexOptions.IgnoreCase);   
  Htmlstring = Regex.Replace(Htmlstring, @"&#(/d+);", "", RegexOptions.IgnoreCase);   

  Htmlstring.Replace("<", "");   
  Htmlstring.Replace(">", "");   
  Htmlstring.Replace("/r/n", "");   
  Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();   

  return Htmlstring;   
}  

/// <summary>   
/// 將所有HTML標簽替換成""   
/// </summary>   
/// <param name="strHtml"></param>   
/// <returns></returns>   
public static string StripHTML(string strHtml)   
{   
  string[] aryReg ={   
    @"<script[^>]*?>.*?</script>",   
    @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",   
    @"([/r/n])[/s]+",   
    @"&(quot|#34);",   
    @"&(amp|#38);",   
    @"&(lt|#60);",   
    @"&(gt|#62);",    
    @"&(nbsp|#160);",    
    @"&(iexcl|#161);",   
    @"&(cent|#162);",   
    @"&(pound|#163);",   
    @"&(copy|#169);",   
    @"&#(/d+);",   
    @"-->",   
    @"<!--.*/n"  

    };   

  string[] aryRep = {   
    "",   
    "",   
    "",   
    "/"",   
    "&",   
    "<",   
    ">",   
    " ",   
    "/xa1",//chr(161),   
    "/xa2",//chr(162),   
    "/xa3",//chr(163),   
    "/xa9",//chr(169),   
    "",   
    "/r/n",   
    ""  
    };   

  string newReg = aryReg[0];   
  string strOutput = strHtml;   
  for (int i = 0; i < aryReg.Length; i++)   
  {   
      Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);   
      strOutput = regex.Replace(strOutput, aryRep[i]);   
  }   

  strOutput.Replace("<", "");   
  strOutput.Replace(">", "");   
  strOutput.Replace("/r/n", "");   
  return strOutput;   
}
 本文由用戶 Blake36Q 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!