April 21, 2005

Top 10 Ranks Google C SHARP

Below are 2 functions in C# to get top 10 ranks from google


------------------------------­-


public static StringCollection SearchGoogleForKeyWord(string sKeyWord)
{
int nCount = 0;
bool bBreak = false;
string sURL ="";


StringCollection strCollection = new StringCollection();


while(true)
{
if(bBreak)
{
break;
}
string url = "http://www.google.com/search?q­=";
url += System.Web.HttpUtility.UrlEnco­de(sKeyWord);
url+= "&hl=en&lr=&ie=UTF-8&start=";


url += (nCount*10).ToString();


url += "&sa=N";
string sPage = GetPage(url);
nCount++;
if(sPage == null)
{
continue;
}
sURL ="";
bBreak = ExtractHrefFromSearchPages(sPa­ge , strCollection,ref sURL);
}
return strCollection;
}


public static bool ExtractHrefFromSearchPages(str­ing str,
StringCollection strCollection, ref string sURL )
{
Regex r;
Match m;
r = new Regex("href\\s*=\\s*(?:\"(?<1>­[^\"]*)\"|(?<1>\\S+))",
RegexOptions.IgnoreCase|RegexO­ptions.Compiled);
for (m = r.Match(str); m.Success; m = m.NextMatch())
{
if(!strCollection.Contains(m.G­roups[1].Value))
{
string sFound = m.Groups[1].Value;


if(sFound.IndexOf("&filter=0") >= 0)
{
break;
}


if(sFound.IndexOf("oi=news") >= 0)
{
continue;
}


if(sFound.IndexOf("google.com"­) >= 0)
{
continue;
}
if(sFound.IndexOf("&spell=1") >= 0)
{
continue;
}
if(sFound.IndexOf("&q=+site") >= 0)
{
continue;
}
if(sFound.IndexOf("/search") >= 0)
{
continue;
}
if(sFound.IndexOf("www.googleadservices.com") >= 0)
{
continue;
}
if(sFound.IndexOf("dictionary.­reference.com")>= 0)
{
continue;
}
if(sFound.IndexOf("language_to­ols") >= 0)
{
continue;
}
if(sFound.IndexOf("/swr?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/help/") >= 0)
{
continue;
}
if(sFound.IndexOf("search?q=ca­che") >= 0)
{
continue;
}
if(sFound.IndexOf("q=related:"­) >= 0)
{
continue;
}
if(sFound.IndexOf("/advanced_s­earch?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/preference­s?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/quality_fo­rm?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/ads/") >= 0)
{
continue;
}
if(sFound.IndexOf("/services/"­) >= 0)
{
continue;
}
if(sFound.IndexOf("/about.html­") >= 0)
{
continue;
}
if(sFound == "/options/")
{
continue;
}
if(sFound.IndexOf("/url?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("http://") != 0)
{
continue;
}
int nPos = -1;
nPos = sFound.IndexOf(">");
if(nPos != -1)
{
sFound = sFound.Substring(0,nPos).ToLow­er();
}
strCollection.Add(sFound);
}
}
return true;
}

No comments: