Below are 2 functions in C# to get top 10 ranks from google
-------------------------------
public static StringCollection SearchGoogleForKeyWord(string sKeyWord)
{
int nCount = 0;
bool bBreak = false;
string sURL ="";
StringCollection strCollection = new StringCollection();
while(true)
{
if(bBreak)
{
break;
}
string url = "http://www.google.com/search?q=";
url += System.Web.HttpUtility.UrlEncode(sKeyWord);
url+= "&hl=en&lr=&ie=UTF-8&start=";
url += (nCount*10).ToString();
url += "&sa=N";
string sPage = GetPage(url);
nCount++;
if(sPage == null)
{
continue;
}
sURL ="";
bBreak = ExtractHrefFromSearchPages(sPage , strCollection,ref sURL);
}
return strCollection;
}
public static bool ExtractHrefFromSearchPages(string str,
StringCollection strCollection, ref string sURL )
{
Regex r;
Match m;
r = new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))",
RegexOptions.IgnoreCase|RegexOptions.Compiled);
for (m = r.Match(str); m.Success; m = m.NextMatch())
{
if(!strCollection.Contains(m.Groups[1].Value))
{
string sFound = m.Groups[1].Value;
if(sFound.IndexOf("&filter=0") >= 0)
{
break;
}
if(sFound.IndexOf("oi=news") >= 0)
{
continue;
}
if(sFound.IndexOf("google.com") >= 0)
{
continue;
}
if(sFound.IndexOf("&spell=1") >= 0)
{
continue;
}
if(sFound.IndexOf("&q=+site") >= 0)
{
continue;
}
if(sFound.IndexOf("/search") >= 0)
{
continue;
}
if(sFound.IndexOf("www.googleadservices.com") >= 0)
{
continue;
}
if(sFound.IndexOf("dictionary.reference.com")>= 0)
{
continue;
}
if(sFound.IndexOf("language_tools") >= 0)
{
continue;
}
if(sFound.IndexOf("/swr?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/help/") >= 0)
{
continue;
}
if(sFound.IndexOf("search?q=cache") >= 0)
{
continue;
}
if(sFound.IndexOf("q=related:") >= 0)
{
continue;
}
if(sFound.IndexOf("/advanced_search?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/preferences?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/quality_form?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("/ads/") >= 0)
{
continue;
}
if(sFound.IndexOf("/services/") >= 0)
{
continue;
}
if(sFound.IndexOf("/about.html") >= 0)
{
continue;
}
if(sFound == "/options/")
{
continue;
}
if(sFound.IndexOf("/url?q=") >= 0)
{
continue;
}
if(sFound.IndexOf("http://") != 0)
{
continue;
}
int nPos = -1;
nPos = sFound.IndexOf(">");
if(nPos != -1)
{
sFound = sFound.Substring(0,nPos).ToLower();
}
strCollection.Add(sFound);
}
}
return true;
}
April 21, 2005
Top 10 Ranks Google C SHARP
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment