From 994ab898dbccdbcbdb70985cdb2192dee519a207 Mon Sep 17 00:00:00 2001 From: cxl Date: Mon, 16 Apr 2012 16:14:15 +0000 Subject: [PATCH] reference: GuiWebCrawler git-svn-id: svn://ultimatepp.org/upp/trunk@4792 f0d560ea-af0d-0410-9eb7-867de7ffcac7 --- reference/GuiWebCrawler/GuiWebCrawler.cpp | 155 ++++++++++++++++++++++ reference/GuiWebCrawler/GuiWebCrawler.lay | 6 + reference/GuiWebCrawler/GuiWebCrawler.upp | 11 ++ reference/GuiWebCrawler/init | 5 + reference/SQL_MySql/main.cpp | 3 +- 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 reference/GuiWebCrawler/GuiWebCrawler.cpp create mode 100644 reference/GuiWebCrawler/GuiWebCrawler.lay create mode 100644 reference/GuiWebCrawler/GuiWebCrawler.upp create mode 100644 reference/GuiWebCrawler/init diff --git a/reference/GuiWebCrawler/GuiWebCrawler.cpp b/reference/GuiWebCrawler/GuiWebCrawler.cpp new file mode 100644 index 000000000..90a490ef4 --- /dev/null +++ b/reference/GuiWebCrawler/GuiWebCrawler.cpp @@ -0,0 +1,155 @@ +#include + +using namespace Upp; + +#define LAYOUTFILE +#include + +struct WebCrawler : public WithCrawlerLayout { + BiVector todo; + VectorMap url; + + struct Work { + HttpRequest http; + int urli; + }; + Array http; + int64 total; + + void ExtractUrls(const String& html, int srci); + void ShowPath(); + void OpenURL(ArrayCtrl *a); + + typedef WebCrawler CLASSNAME; + +public: + void Run(); + + WebCrawler(); +}; + +bool IsUrlChar(int c) +{ + return c == ':' || c == '.' || IsAlNum(c) || c == '_' || c == '%' || c == '/'; +} + +void WebCrawler::ExtractUrls(const String& html, int srci) +{ + int q = 0; + while(q < html.GetCount()) { + q = html.Find("http://", q); + if(q < 0) + return; + int b = q; + while(q < html.GetCount() && IsUrlChar(html[q])) + q++; + String u = html.Mid(b, q - b); + if(url.Find(u) < 0) { + todo.AddTail(url.GetCount()); + url.Add(u, srci); + } + } +} + +void WebCrawler::Run() +{ + String seed = "www.ultimatepp.org"; + if(!EditText(seed, "GuiWebSpider", "Seed URL")) + return; + todo.AddTail(0); + url.Add(seed); + Open(); + while(IsOpen()) { + ProcessEvents(); + while(todo.GetCount() && http.GetCount() < 60) { + int i = todo.Head(); + todo.DropHead(); + Work& w = http.Add(); + w.urli = i; + w.http.Url(url.GetKey(i)) + .UserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:11.0) Gecko/20100101 Firefox/11.0") + .Timeout(0); + work.Add(url.GetKey(i)); + work.HeaderTab(0).SetText(Format("URL (%d)", work.GetCount())); + } + SocketWaitEvent we; + for(int i = 0; i < http.GetCount(); i++) + we.Add(http[i].http); + we.Wait(10); + int i = 0; + while(i < http.GetCount()) { + Work& w = http[i]; + w.http.Do(); + String u = url.GetKey(w.urli); + int q = work.Find(u); + if(w.http.InProgress()) { + if(q >= 0) + work.Set(q, 1, w.http.GetPhaseName()); + i++; + } + else { + String html = w.http; + total += html.GetCount(); + finished.Add(u, w.http.IsError() ? String().Cat() << w.http.GetErrorDesc() + : String().Cat() << w.http.GetStatusCode() + << ' ' << w.http.GetReasonPhrase() + << " (" << html.GetCount() << " bytes)", + w.urli); + finished.HeaderTab(0).SetText(Format("Finished (%d)", finished.GetCount())); + finished.HeaderTab(1).SetText(Format("Response (%` KB)", total >> 10)); + if(w.http.IsSuccess()) { + ExtractUrls(html, w.urli); + Title(AsString(url.GetCount()) + " URLs found"); + } + http.Remove(i); + work.Remove(q); + } + } + } +} + +void WebCrawler::ShowPath() +{ + path.Clear(); + if(!finished.IsCursor()) + return; + int i = finished.Get(2); + Vector p; + for(;;) { + p.Add(url.GetKey(i)); + if(i == 0) + break; + i = url[i]; + } + for(int i = p.GetCount() - 1; i >= 0; i--) + path.Add(p[i]); +} + +void WebCrawler::OpenURL(ArrayCtrl *a) +{ + String u = a->GetKey(); + WriteClipboardText(u); + LaunchWebBrowser(u); +} + +WebCrawler::WebCrawler() +{ + CtrlLayout(*this, "WebCrawler"); + work.AddColumn("URL"); + work.AddColumn("Status"); + finished.AddColumn("Finished"); + finished.AddColumn("Response"); + finished.WhenCursor = THISBACK(ShowPath); + finished.WhenLeftDouble = THISBACK1(OpenURL, &finished); + path.AddColumn("Path"); + path.WhenLeftDouble = THISBACK1(OpenURL, &path); + total = 0; + Zoomable().Sizeable(); +} + +GUI_APP_MAIN +{ + HttpRequest::Trace(); + + WebCrawler().Run(); +} diff --git a/reference/GuiWebCrawler/GuiWebCrawler.lay b/reference/GuiWebCrawler/GuiWebCrawler.lay new file mode 100644 index 000000000..2e9d4a851 --- /dev/null +++ b/reference/GuiWebCrawler/GuiWebCrawler.lay @@ -0,0 +1,6 @@ +LAYOUT(CrawlerLayout, 680, 508) + ITEM(ArrayCtrl, work, LeftPosZ(4, 356).TopPosZ(4, 500)) + ITEM(ArrayCtrl, finished, LeftPosZ(364, 312).TopPosZ(4, 324)) + ITEM(ArrayCtrl, path, LeftPosZ(364, 312).TopPosZ(332, 172)) +END_LAYOUT + diff --git a/reference/GuiWebCrawler/GuiWebCrawler.upp b/reference/GuiWebCrawler/GuiWebCrawler.upp new file mode 100644 index 000000000..bae076728 --- /dev/null +++ b/reference/GuiWebCrawler/GuiWebCrawler.upp @@ -0,0 +1,11 @@ +uses + CtrlLib, + Core/SSL; + +file + GuiWebCrawler.cpp, + GuiWebCrawler.lay; + +mainconfig + "" = "GUI SSE2"; + diff --git a/reference/GuiWebCrawler/init b/reference/GuiWebCrawler/init new file mode 100644 index 000000000..4284a48ee --- /dev/null +++ b/reference/GuiWebCrawler/init @@ -0,0 +1,5 @@ +#ifndef _GuiWebCrawler_icpp_init_stub +#define _GuiWebCrawler_icpp_init_stub +#include "CtrlLib/init" +#include "Core/SSL/init" +#endif diff --git a/reference/SQL_MySql/main.cpp b/reference/SQL_MySql/main.cpp index 43b5cf978..d451ddef8 100644 --- a/reference/SQL_MySql/main.cpp +++ b/reference/SQL_MySql/main.cpp @@ -12,7 +12,7 @@ CONSOLE_APP_MAIN { MySqlSession session; // edit the connection parameters if necessary - if(session.Connect("root", "koblih", "test")) { + if(session.Connect("root", "Passw0rd", "test")) { Cout() << "Connected\n"; SQL = session; @@ -43,5 +43,4 @@ CONSOLE_APP_MAIN Cerr() <<"ERROR: Unable to connect to database\n"; SetExitCode(1); } - SetExitCode(0); }