提取HTML代碼中文字的C#函數(shù)
更新時(shí)間:2007年03月16日 00:00:00 作者:
/// <summary>
/// 去除HTML標(biāo)記
/// </summary>
/// <param name="strHtml">包括HTML的源碼 </param>
/// <returns>已經(jīng)去除后的文字</returns>
public static string StripHTML(string strHtml)
{
string [] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string [] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//chr(161),
"\xa2",//chr(162),
"\xa3",//chr(163),
"\xa9",//chr(169),
"",
"\r\n",
""
};
string newReg =aryReg[0];
string strOutput=strHtml;
for(int i = 0;i<aryReg.Length;i++)
{
Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
strOutput = regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace("<","");
strOutput.Replace(">","");
strOutput.Replace("\r\n","");
return strOutput;
}
/// 去除HTML標(biāo)記
/// </summary>
/// <param name="strHtml">包括HTML的源碼 </param>
/// <returns>已經(jīng)去除后的文字</returns>
public static string StripHTML(string strHtml)
{
string [] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string [] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//chr(161),
"\xa2",//chr(162),
"\xa3",//chr(163),
"\xa9",//chr(169),
"",
"\r\n",
""
};
string newReg =aryReg[0];
string strOutput=strHtml;
for(int i = 0;i<aryReg.Length;i++)
{
Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
strOutput = regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace("<","");
strOutput.Replace(">","");
strOutput.Replace("\r\n","");
return strOutput;
}
相關(guān)文章
c# AcceptEx與完成端口(IOCP)結(jié)合的示例
這篇文章主要介紹了c# AcceptEx與完成端口(IOCP)結(jié)合的示例,幫助大家更好的理解和學(xué)習(xí)使用c#,感興趣的朋友可以了解下2021-03-03
淺析c#范型中的特殊關(guān)鍵字where & default
以下是對(duì)c#范型中的特殊關(guān)鍵字where和default進(jìn)行了詳細(xì)的介紹,需要的朋友可以過來參考下2013-09-09
C# 調(diào)用exe傳參,并獲取打印值的實(shí)例
這篇文章主要介紹了C# 調(diào)用exe傳參,并獲取打印值的實(shí)例,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2021-04-04
c#學(xué)習(xí)教程之JSON文件及解析實(shí)例
json作為互聯(lián)網(wǎng)上輕量便捷的數(shù)據(jù)傳輸格式,越來越受到重視,下面這篇文章主要給大家介紹了關(guān)于c#學(xué)習(xí)教程之JSON文件及解析的相關(guān)資料,文中通過實(shí)例代碼介紹的非常詳細(xì),需要的朋友可以參考下2022-08-08

