blob: 89a4196e01e081ae4a2047313cf0a60c78d2ea2d [file] [log] [blame]
Torne (Richard Coles)58218062012-11-14 11:43:16 +00001// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/safe_browsing/client_side_detection_host.h"
6
7#include <vector>
8
9#include "base/logging.h"
10#include "base/memory/ref_counted.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/metrics/histogram.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000013#include "base/prefs/pref_service.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000014#include "base/sequenced_task_runner_helpers.h"
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +000015#include "base/strings/utf_string_conversions.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000016#include "chrome/browser/browser_process.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000017#include "chrome/browser/profiles/profile.h"
18#include "chrome/browser/safe_browsing/browser_feature_extractor.h"
19#include "chrome/browser/safe_browsing/client_side_detection_service.h"
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000020#include "chrome/browser/safe_browsing/database_manager.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000021#include "chrome/browser/safe_browsing/safe_browsing_service.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000022#include "chrome/common/pref_names.h"
23#include "chrome/common/safe_browsing/csd.pb.h"
24#include "chrome/common/safe_browsing/safebrowsing_messages.h"
25#include "content/public/browser/browser_thread.h"
26#include "content/public/browser/navigation_controller.h"
27#include "content/public/browser/navigation_details.h"
28#include "content/public/browser/navigation_entry.h"
29#include "content/public/browser/notification_details.h"
30#include "content/public/browser/notification_source.h"
31#include "content/public/browser/notification_types.h"
32#include "content/public/browser/render_process_host.h"
33#include "content/public/browser/render_view_host.h"
34#include "content/public/browser/resource_request_details.h"
35#include "content/public/browser/web_contents.h"
36#include "content/public/common/frame_navigate_params.h"
Ben Murdocheffb81e2014-03-31 11:51:25 +010037#include "content/public/common/url_constants.h"
Ben Murdocheb525c52013-07-10 11:40:50 +010038#include "url/gurl.h"
Torne (Richard Coles)58218062012-11-14 11:43:16 +000039
40using content::BrowserThread;
41using content::NavigationEntry;
42using content::ResourceRequestDetails;
Ben Murdoch116680a2014-07-20 18:25:52 -070043using content::ResourceType;
Torne (Richard Coles)58218062012-11-14 11:43:16 +000044using content::WebContents;
45
46namespace safe_browsing {
47
Torne (Richard Coles)46d4c2b2014-06-09 12:00:27 +010048const size_t ClientSideDetectionHost::kMaxUrlsPerIP = 20;
49const size_t ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000050
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +000051const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
52
Ben Murdocheffb81e2014-03-31 11:51:25 +010053typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
54
Torne (Richard Coles)58218062012-11-14 11:43:16 +000055// This class is instantiated each time a new toplevel URL loads, and
Ben Murdocheffb81e2014-03-31 11:51:25 +010056// asynchronously checks whether the malware and phishing classifiers should run
57// for this URL. If so, it notifies the host class by calling the provided
58// callback form the UI thread. Objects of this class are ref-counted and will
59// be destroyed once nobody uses it anymore. If |web_contents|, |csd_service|
60// or |host| go away you need to call Cancel(). We keep the |database_manager|
61// alive in a ref pointer for as long as it takes.
Torne (Richard Coles)58218062012-11-14 11:43:16 +000062class ClientSideDetectionHost::ShouldClassifyUrlRequest
63 : public base::RefCountedThreadSafe<
64 ClientSideDetectionHost::ShouldClassifyUrlRequest> {
65 public:
Ben Murdocheffb81e2014-03-31 11:51:25 +010066 ShouldClassifyUrlRequest(
67 const content::FrameNavigateParams& params,
68 const ShouldClassifyUrlCallback& start_phishing_classification,
69 const ShouldClassifyUrlCallback& start_malware_classification,
70 WebContents* web_contents,
71 ClientSideDetectionService* csd_service,
72 SafeBrowsingDatabaseManager* database_manager,
73 ClientSideDetectionHost* host)
74 : params_(params),
Torne (Richard Coles)58218062012-11-14 11:43:16 +000075 web_contents_(web_contents),
76 csd_service_(csd_service),
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +000077 database_manager_(database_manager),
Ben Murdocheffb81e2014-03-31 11:51:25 +010078 host_(host),
79 start_phishing_classification_cb_(start_phishing_classification),
80 start_malware_classification_cb_(start_malware_classification) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +000081 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
82 DCHECK(web_contents_);
83 DCHECK(csd_service_);
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +010084 DCHECK(database_manager_.get());
Torne (Richard Coles)58218062012-11-14 11:43:16 +000085 DCHECK(host_);
86 }
87
88 void Start() {
89 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
90
91 // We start by doing some simple checks that can run on the UI thread.
Ben Murdocheffb81e2014-03-31 11:51:25 +010092 UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ClassificationStart", 1);
93 UMA_HISTOGRAM_BOOLEAN("SBClientMalware.ClassificationStart", 1);
Torne (Richard Coles)58218062012-11-14 11:43:16 +000094
95 // Only classify [X]HTML documents.
96 if (params_.contents_mime_type != "text/html" &&
97 params_.contents_mime_type != "application/xhtml+xml") {
98 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
99 << " because it has an unsupported MIME type: "
100 << params_.contents_mime_type;
Ben Murdocheffb81e2014-03-31 11:51:25 +0100101 DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000102 }
103
104 if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
105 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
106 << " because of hosting on private IP: "
107 << params_.socket_address.host();
Ben Murdocheffb81e2014-03-31 11:51:25 +0100108 DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
109 DontClassifyForMalware(NO_CLASSIFY_PRIVATE_IP);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000110 }
111
Ben Murdocheffb81e2014-03-31 11:51:25 +0100112 // For phishing we only classify HTTP pages.
Torne (Richard Coles)010d83a2014-05-14 12:12:37 +0100113 if (!params_.url.SchemeIs(url::kHttpScheme)) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000114 VLOG(1) << "Skipping phishing classification for URL: " << params_.url
Ben Murdocheffb81e2014-03-31 11:51:25 +0100115 << " because it is not HTTP: "
116 << params_.socket_address.host();
117 DontClassifyForPhishing(NO_CLASSIFY_NOT_HTTP_URL);
118 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000119
Ben Murdocheffb81e2014-03-31 11:51:25 +0100120 // Don't run any classifier if the tab is incognito.
121 if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
122 VLOG(1) << "Skipping phishing and malware classification for URL: "
123 << params_.url << " because we're browsing incognito.";
124 DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
125 DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000126 }
127
128 // We lookup the csd-whitelist before we lookup the cache because
129 // a URL may have recently been whitelisted. If the URL matches
Ben Murdocheffb81e2014-03-31 11:51:25 +0100130 // the csd-whitelist we won't start phishing classification. The
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000131 // csd-whitelist check has to be done on the IO thread because it
132 // uses the SafeBrowsing service class.
Ben Murdocheffb81e2014-03-31 11:51:25 +0100133 if (ShouldClassifyForPhishing() || ShouldClassifyForMalware()) {
134 BrowserThread::PostTask(
135 BrowserThread::IO,
136 FROM_HERE,
137 base::Bind(&ShouldClassifyUrlRequest::CheckSafeBrowsingDatabase,
138 this, params_.url));
139 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000140 }
141
142 void Cancel() {
Ben Murdocheffb81e2014-03-31 11:51:25 +0100143 DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
144 DontClassifyForMalware(NO_CLASSIFY_CANCEL);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000145 // Just to make sure we don't do anything stupid we reset all these
146 // pointers except for the safebrowsing service class which may be
Ben Murdocheffb81e2014-03-31 11:51:25 +0100147 // accessed by CheckSafeBrowsingDatabase().
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000148 web_contents_ = NULL;
149 csd_service_ = NULL;
150 host_ = NULL;
151 }
152
153 private:
154 friend class base::RefCountedThreadSafe<
155 ClientSideDetectionHost::ShouldClassifyUrlRequest>;
156
157 // Enum used to keep stats about why the pre-classification check failed.
158 enum PreClassificationCheckFailures {
159 OBSOLETE_NO_CLASSIFY_PROXY_FETCH,
160 NO_CLASSIFY_PRIVATE_IP,
161 NO_CLASSIFY_OFF_THE_RECORD,
162 NO_CLASSIFY_MATCH_CSD_WHITELIST,
163 NO_CLASSIFY_TOO_MANY_REPORTS,
164 NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
Ben Murdocheffb81e2014-03-31 11:51:25 +0100165 NO_CLASSIFY_NO_DATABASE_MANAGER,
166 NO_CLASSIFY_KILLSWITCH,
167 NO_CLASSIFY_CANCEL,
168 NO_CLASSIFY_RESULT_FROM_CACHE,
169 NO_CLASSIFY_NOT_HTTP_URL,
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000170
171 NO_CLASSIFY_MAX // Always add new values before this one.
172 };
173
174 // The destructor can be called either from the UI or the IO thread.
175 virtual ~ShouldClassifyUrlRequest() { }
176
Ben Murdocheffb81e2014-03-31 11:51:25 +0100177 bool ShouldClassifyForPhishing() const {
178 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
179 return !start_phishing_classification_cb_.is_null();
180 }
181
182 bool ShouldClassifyForMalware() const {
183 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
184 return !start_malware_classification_cb_.is_null();
185 }
186
187 void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
188 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
189 if (ShouldClassifyForPhishing()) {
190 // Track the first reason why we stopped classifying for phishing.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000191 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
Ben Murdocheffb81e2014-03-31 11:51:25 +0100192 reason, NO_CLASSIFY_MAX);
193 DVLOG(2) << "Failed phishing pre-classification checks. Reason: "
194 << reason;
195 start_phishing_classification_cb_.Run(false);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000196 }
Ben Murdocheffb81e2014-03-31 11:51:25 +0100197 start_phishing_classification_cb_.Reset();
198 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000199
Ben Murdocheffb81e2014-03-31 11:51:25 +0100200 void DontClassifyForMalware(PreClassificationCheckFailures reason) {
201 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
202 if (ShouldClassifyForMalware()) {
203 // Track the first reason why we stopped classifying for malware.
204 UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
205 reason, NO_CLASSIFY_MAX);
206 DVLOG(2) << "Failed malware pre-classification checks. Reason: "
207 << reason;
208 start_malware_classification_cb_.Run(false);
209 }
210 start_malware_classification_cb_.Reset();
211 }
Ben Murdoch58e6fbe2013-07-26 10:20:38 +0100212
Ben Murdocheffb81e2014-03-31 11:51:25 +0100213 void CheckSafeBrowsingDatabase(const GURL& url) {
214 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
215 // We don't want to call the classification callbacks from the IO
216 // thread so we simply pass the results of this method to CheckCache()
217 // which is called on the UI thread;
218 PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
219 PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
220 if (!database_manager_.get()) {
221 // We cannot check the Safe Browsing whitelists so we stop here
222 // for safety.
223 malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
224 } else {
225 if (database_manager_->MatchCsdWhitelistUrl(url)) {
226 VLOG(1) << "Skipping phishing classification for URL: " << url
227 << " because it matches the csd whitelist";
228 phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
229 }
230 if (database_manager_->IsMalwareKillSwitchOn()) {
231 malware_reason = NO_CLASSIFY_KILLSWITCH;
232 }
233 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000234 BrowserThread::PostTask(
235 BrowserThread::UI,
236 FROM_HERE,
Ben Murdocheffb81e2014-03-31 11:51:25 +0100237 base::Bind(&ShouldClassifyUrlRequest::CheckCache,
238 this,
239 phishing_reason,
240 malware_reason));
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000241 }
242
Ben Murdocheffb81e2014-03-31 11:51:25 +0100243 void CheckCache(PreClassificationCheckFailures phishing_reason,
244 PreClassificationCheckFailures malware_reason) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000245 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Ben Murdocheffb81e2014-03-31 11:51:25 +0100246 if (phishing_reason != NO_CLASSIFY_MAX)
247 DontClassifyForPhishing(phishing_reason);
248 if (malware_reason != NO_CLASSIFY_MAX)
249 DontClassifyForMalware(malware_reason);
250 if (!ShouldClassifyForMalware() && !ShouldClassifyForPhishing()) {
251 return; // No point in doing anything else.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000252 }
Ben Murdocheffb81e2014-03-31 11:51:25 +0100253 // If result is cached, we don't want to run classification again.
254 // In that case we're just trying to show the warning.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000255 bool is_phishing;
256 if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
257 VLOG(1) << "Satisfying request for " << params_.url << " from cache";
Ben Murdocheffb81e2014-03-31 11:51:25 +0100258 UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.RequestSatisfiedFromCache", 1);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000259 // Since we are already on the UI thread, this is safe.
260 host_->MaybeShowPhishingWarning(params_.url, is_phishing);
Ben Murdocheffb81e2014-03-31 11:51:25 +0100261 DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000262 }
263
264 // We want to limit the number of requests, though we will ignore the
265 // limit for urls in the cache. We don't want to start classifying
266 // too many pages as phishing, but for those that we already think are
Ben Murdocheffb81e2014-03-31 11:51:25 +0100267 // phishing we want to send a request to the server to give ourselves
268 // a chance to fix misclassifications.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000269 if (csd_service_->IsInCache(params_.url)) {
270 VLOG(1) << "Reporting limit skipped for " << params_.url
271 << " as it was in the cache.";
Ben Murdocheffb81e2014-03-31 11:51:25 +0100272 UMA_HISTOGRAM_BOOLEAN("SBClientPhishing.ReportLimitSkipped", 1);
Torne (Richard Coles)90dce4d2013-05-29 14:40:03 +0100273 } else if (csd_service_->OverPhishingReportLimit()) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000274 VLOG(1) << "Too many report phishing requests sent recently, "
275 << "not running classification for " << params_.url;
Ben Murdocheffb81e2014-03-31 11:51:25 +0100276 DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
277 }
278 if (csd_service_->OverMalwareReportLimit()) {
279 DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000280 }
281
282 // Everything checks out, so start classification.
283 // |web_contents_| is safe to call as we will be destructed
284 // before it is.
Ben Murdochc5cede92014-04-10 11:22:14 +0100285 if (ShouldClassifyForPhishing()) {
Ben Murdocheffb81e2014-03-31 11:51:25 +0100286 start_phishing_classification_cb_.Run(true);
Ben Murdochc5cede92014-04-10 11:22:14 +0100287 // Reset the callback to make sure ShouldClassifyForPhishing()
288 // returns false.
289 start_phishing_classification_cb_.Reset();
290 }
291 if (ShouldClassifyForMalware()) {
Ben Murdocheffb81e2014-03-31 11:51:25 +0100292 start_malware_classification_cb_.Run(true);
Ben Murdochc5cede92014-04-10 11:22:14 +0100293 // Reset the callback to make sure ShouldClassifyForMalware()
294 // returns false.
295 start_malware_classification_cb_.Reset();
296 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000297 }
298
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000299 content::FrameNavigateParams params_;
300 WebContents* web_contents_;
301 ClientSideDetectionService* csd_service_;
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000302 // We keep a ref pointer here just to make sure the safe browsing
303 // database manager stays alive long enough.
304 scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000305 ClientSideDetectionHost* host_;
306
Ben Murdocheffb81e2014-03-31 11:51:25 +0100307 ShouldClassifyUrlCallback start_phishing_classification_cb_;
308 ShouldClassifyUrlCallback start_malware_classification_cb_;
309
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000310 DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
311};
312
313// static
314ClientSideDetectionHost* ClientSideDetectionHost::Create(
315 WebContents* tab) {
316 return new ClientSideDetectionHost(tab);
317}
318
319ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
320 : content::WebContentsObserver(tab),
321 csd_service_(NULL),
Ben Murdocheffb81e2014-03-31 11:51:25 +0100322 classification_request_(NULL),
323 should_extract_malware_features_(true),
324 should_classify_for_malware_(false),
Ben Murdoche5d81f52014-04-03 12:29:45 +0100325 pageload_complete_(false),
Torne (Richard Coles)c2e0dbd2013-05-09 18:35:53 +0100326 weak_factory_(this),
Ben Murdocheffb81e2014-03-31 11:51:25 +0100327 unsafe_unique_page_id_(-1) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000328 DCHECK(tab);
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000329 // Note: csd_service_ and sb_service will be NULL here in testing.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000330 csd_service_ = g_browser_process->safe_browsing_detection_service();
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000331 feature_extractor_.reset(new BrowserFeatureExtractor(tab, this));
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000332 registrar_.Add(this, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED,
333 content::Source<WebContents>(tab));
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000334
335 scoped_refptr<SafeBrowsingService> sb_service =
336 g_browser_process->safe_browsing_service();
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +0100337 if (sb_service.get()) {
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000338 ui_manager_ = sb_service->ui_manager();
339 database_manager_ = sb_service->database_manager();
340 ui_manager_->AddObserver(this);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000341 }
342}
343
344ClientSideDetectionHost::~ClientSideDetectionHost() {
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +0100345 if (ui_manager_.get())
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000346 ui_manager_->RemoveObserver(this);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000347}
348
349bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) {
350 bool handled = true;
351 IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message)
352 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_PhishingDetectionDone,
353 OnPhishingDetectionDone)
354 IPC_MESSAGE_UNHANDLED(handled = false)
355 IPC_END_MESSAGE_MAP()
356 return handled;
357}
358
359void ClientSideDetectionHost::DidNavigateMainFrame(
360 const content::LoadCommittedDetails& details,
361 const content::FrameNavigateParams& params) {
362 // TODO(noelutz): move this DCHECK to WebContents and fix all the unit tests
363 // that don't call this method on the UI thread.
364 // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
365 if (details.is_in_page) {
366 // If the navigation is within the same page, the user isn't really
367 // navigating away. We don't need to cancel a pending callback or
368 // begin a new classification.
369 return;
370 }
Ben Murdocheffb81e2014-03-31 11:51:25 +0100371 // Cancel any pending classification request.
372 if (classification_request_.get()) {
373 classification_request_->Cancel();
374 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000375 // If we navigate away and there currently is a pending phishing
376 // report request we have to cancel it to make sure we don't display
377 // an interstitial for the wrong page. Note that this won't cancel
378 // the server ping back but only cancel the showing of the
379 // interstial.
380 weak_factory_.InvalidateWeakPtrs();
381
382 if (!csd_service_) {
383 return;
384 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000385 browse_info_.reset(new BrowseInfo);
386
387 // Store redirect chain information.
388 if (params.url.host() != cur_host_) {
389 cur_host_ = params.url.host();
390 cur_host_redirects_ = params.redirects;
391 }
Ben Murdocheffb81e2014-03-31 11:51:25 +0100392 browse_info_->url = params.url;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000393 browse_info_->host_redirects = cur_host_redirects_;
394 browse_info_->url_redirects = params.redirects;
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000395 browse_info_->referrer = params.referrer.url;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000396 browse_info_->http_status_code = details.http_status_code;
Ben Murdocheffb81e2014-03-31 11:51:25 +0100397 browse_info_->page_id = params.page_id;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000398
Ben Murdocheffb81e2014-03-31 11:51:25 +0100399 should_extract_malware_features_ = true;
400 should_classify_for_malware_ = false;
Ben Murdoche5d81f52014-04-03 12:29:45 +0100401 pageload_complete_ = false;
Ben Murdocheffb81e2014-03-31 11:51:25 +0100402
403 // Check whether we can cassify the current URL for phishing or malware.
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +0100404 classification_request_ = new ShouldClassifyUrlRequest(
Ben Murdocheffb81e2014-03-31 11:51:25 +0100405 params,
406 base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
407 weak_factory_.GetWeakPtr()),
408 base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
409 weak_factory_.GetWeakPtr()),
410 web_contents(), csd_service_, database_manager_.get(), this);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000411 classification_request_->Start();
412}
413
414void ClientSideDetectionHost::OnSafeBrowsingHit(
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000415 const SafeBrowsingUIManager::UnsafeResource& resource) {
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000416 if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
417 return;
418
419 // Check that the hit is either malware or phishing.
420 if (resource.threat_type != SB_THREAT_TYPE_URL_PHISHING &&
421 resource.threat_type != SB_THREAT_TYPE_URL_MALWARE)
422 return;
423
424 // Check that this notification is really for us.
425 content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
426 resource.render_process_host_id, resource.render_view_id);
427 if (!hit_rvh ||
428 web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
429 return;
430
431 // Store the unique page ID for later.
432 unsafe_unique_page_id_ =
433 web_contents()->GetController().GetActiveEntry()->GetUniqueID();
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +0000434
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000435 // We also keep the resource around in order to be able to send the
436 // malicious URL to the server.
437 unsafe_resource_.reset(new SafeBrowsingUIManager::UnsafeResource(resource));
438 unsafe_resource_->callback.Reset(); // Don't do anything stupid.
439}
440
441void ClientSideDetectionHost::OnSafeBrowsingMatch(
442 const SafeBrowsingUIManager::UnsafeResource& resource) {
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +0000443 if (!web_contents() || !web_contents()->GetController().GetActiveEntry())
444 return;
445
446 // Check that this notification is really for us.
447 content::RenderViewHost* hit_rvh = content::RenderViewHost::FromID(
448 resource.render_process_host_id, resource.render_view_id);
449 if (!hit_rvh ||
450 web_contents() != content::WebContents::FromRenderViewHost(hit_rvh))
451 return;
452
453 web_contents()->GetController().GetActiveEntry()->SetExtraData(
454 kSafeBrowsingMatchKey, base::ASCIIToUTF16("1"));
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000455}
456
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000457scoped_refptr<SafeBrowsingDatabaseManager>
458ClientSideDetectionHost::database_manager() {
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000459 return database_manager_;
460}
461
462bool ClientSideDetectionHost::DidPageReceiveSafeBrowsingMatch() const {
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +0000463 if (!web_contents() || !web_contents()->GetController().GetVisibleEntry())
464 return false;
465
466 // If an interstitial page is showing, GetVisibleEntry will return the
467 // transient NavigationEntry for the interstitial. The transient entry
468 // will not have the flag set, so use the pending entry instead if there
469 // is one.
470 NavigationEntry* entry = web_contents()->GetController().GetPendingEntry();
471 if (!entry) {
472 entry = web_contents()->GetController().GetVisibleEntry();
473 if (entry->GetPageType() == content::PAGE_TYPE_INTERSTITIAL)
474 entry = web_contents()->GetController().GetLastCommittedEntry();
475 if (!entry)
476 return false;
477 }
478
479 base::string16 value;
480 return entry->GetExtraData(kSafeBrowsingMatchKey, &value);
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000481}
482
Torne (Richard Coles)010d83a2014-05-14 12:12:37 +0100483void ClientSideDetectionHost::WebContentsDestroyed() {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000484 // Tell any pending classification request that it is being canceled.
485 if (classification_request_.get()) {
486 classification_request_->Cancel();
487 }
488 // Cancel all pending feature extractions.
489 feature_extractor_.reset();
490}
491
Ben Murdocheffb81e2014-03-31 11:51:25 +0100492void ClientSideDetectionHost::OnPhishingPreClassificationDone(
493 bool should_classify) {
494 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
495 if (browse_info_.get() && should_classify) {
496 VLOG(1) << "Instruct renderer to start phishing detection for URL: "
497 << browse_info_->url;
498 content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
499 rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
500 rvh->GetRoutingID(), browse_info_->url));
501 }
502}
503
504void ClientSideDetectionHost::OnMalwarePreClassificationDone(
505 bool should_classify) {
506 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
507 // If classification checks failed we should stop extracting malware features.
508 DVLOG(2) << "Malware pre-classification checks done. Should classify: "
509 << should_classify;
510 should_extract_malware_features_ = should_classify;
511 should_classify_for_malware_ = should_classify;
512 MaybeStartMalwareFeatureExtraction();
513}
514
Ben Murdoche5d81f52014-04-03 12:29:45 +0100515void ClientSideDetectionHost::DidStopLoading(content::RenderViewHost* rvh) {
Ben Murdocheffb81e2014-03-31 11:51:25 +0100516 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
517 if (!csd_service_ || !browse_info_.get())
518 return;
Ben Murdoche5d81f52014-04-03 12:29:45 +0100519 DVLOG(2) << "Page finished loading.";
520 pageload_complete_ = true;
Ben Murdocheffb81e2014-03-31 11:51:25 +0100521 MaybeStartMalwareFeatureExtraction();
522}
523
524void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
525 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
526 if (csd_service_ && browse_info_.get() &&
527 should_classify_for_malware_ &&
Ben Murdoche5d81f52014-04-03 12:29:45 +0100528 pageload_complete_) {
Ben Murdocheffb81e2014-03-31 11:51:25 +0100529 scoped_ptr<ClientMalwareRequest> malware_request(
530 new ClientMalwareRequest);
531 // Start browser-side malware feature extraction. Once we're done it will
532 // send the malware client verdict request.
533 malware_request->set_url(browse_info_->url.spec());
534 const GURL& referrer = browse_info_->referrer;
535 if (referrer.SchemeIs("http")) { // Only send http urls.
536 malware_request->set_referrer_url(referrer.spec());
537 }
538 // This function doesn't expect browse_info_ to stay around after this
539 // function returns.
540 feature_extractor_->ExtractMalwareFeatures(
541 browse_info_.get(),
542 malware_request.release(),
543 base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
544 weak_factory_.GetWeakPtr()));
545 should_classify_for_malware_ = false;
546 }
547}
548
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000549void ClientSideDetectionHost::OnPhishingDetectionDone(
550 const std::string& verdict_str) {
551 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
552 // There is something seriously wrong if there is no service class but
553 // this method is called. The renderer should not start phishing detection
554 // if there isn't any service class in the browser.
555 DCHECK(csd_service_);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000556 DCHECK(browse_info_.get());
557
558 // We parse the protocol buffer here. If we're unable to parse it we won't
559 // send the verdict further.
560 scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest);
561 if (csd_service_ &&
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000562 browse_info_.get() &&
563 verdict->ParseFromString(verdict_str) &&
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000564 verdict->IsInitialized()) {
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000565 // We only send phishing verdict to the server if the verdict is phishing or
566 // if a SafeBrowsing interstitial was already shown for this site. E.g., a
567 // malware or phishing interstitial was shown but the user clicked
568 // through.
569 if (verdict->is_phishing() || DidShowSBInterstitial()) {
570 if (DidShowSBInterstitial()) {
571 browse_info_->unsafe_resource.reset(unsafe_resource_.release());
572 }
573 // Start browser-side feature extraction. Once we're done it will send
574 // the client verdict request.
575 feature_extractor_->ExtractFeatures(
576 browse_info_.get(),
577 verdict.release(),
578 base::Bind(&ClientSideDetectionHost::FeatureExtractionDone,
579 weak_factory_.GetWeakPtr()));
580 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000581 }
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000582}
583
584void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
585 bool is_phishing) {
586 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Ben Murdocheffb81e2014-03-31 11:51:25 +0100587 DVLOG(2) << "Received server phishing verdict for URL:" << phishing_url
588 << " is_phishing:" << is_phishing;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000589 if (is_phishing) {
590 DCHECK(web_contents());
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +0100591 if (ui_manager_.get()) {
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000592 SafeBrowsingUIManager::UnsafeResource resource;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000593 resource.url = phishing_url;
594 resource.original_url = phishing_url;
595 resource.is_subresource = false;
596 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL;
597 resource.render_process_host_id =
598 web_contents()->GetRenderProcessHost()->GetID();
599 resource.render_view_id =
600 web_contents()->GetRenderViewHost()->GetRoutingID();
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000601 if (!ui_manager_->IsWhitelisted(resource)) {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000602 // We need to stop any pending navigations, otherwise the interstital
603 // might not get created properly.
604 web_contents()->GetController().DiscardNonCommittedEntries();
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000605 }
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +0000606 ui_manager_->DisplayBlockingPage(resource);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000607 }
Torne (Richard Coles)424c4d72013-08-30 15:14:49 +0100608 // If there is true phishing verdict, invalidate weakptr so that no longer
609 // consider the malware vedict.
610 weak_factory_.InvalidateWeakPtrs();
611 }
612}
613
614void ClientSideDetectionHost::MaybeShowMalwareWarning(GURL original_url,
615 GURL malware_url,
616 bool is_malware) {
617 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
Ben Murdocheffb81e2014-03-31 11:51:25 +0100618 DVLOG(2) << "Received server malawre IP verdict for URL:" << malware_url
619 << " is_malware:" << is_malware;
Torne (Richard Coles)424c4d72013-08-30 15:14:49 +0100620 if (is_malware && malware_url.is_valid() && original_url.is_valid()) {
621 DCHECK(web_contents());
622 if (ui_manager_.get()) {
623 SafeBrowsingUIManager::UnsafeResource resource;
624 resource.url = malware_url;
625 resource.original_url = original_url;
626 resource.is_subresource = (malware_url.host() != original_url.host());
627 resource.threat_type = SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL;
628 resource.render_process_host_id =
629 web_contents()->GetRenderProcessHost()->GetID();
630 resource.render_view_id =
631 web_contents()->GetRenderViewHost()->GetRoutingID();
632 if (!ui_manager_->IsWhitelisted(resource)) {
633 // We need to stop any pending navigations, otherwise the interstital
634 // might not get created properly.
635 web_contents()->GetController().DiscardNonCommittedEntries();
Torne (Richard Coles)424c4d72013-08-30 15:14:49 +0100636 }
Torne (Richard Coles)5d1f7b12014-02-21 12:16:55 +0000637 ui_manager_->DisplayBlockingPage(resource);
Torne (Richard Coles)424c4d72013-08-30 15:14:49 +0100638 }
639 // If there is true malware verdict, invalidate weakptr so that no longer
640 // consider the phishing vedict.
641 weak_factory_.InvalidateWeakPtrs();
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000642 }
643}
644
645void ClientSideDetectionHost::FeatureExtractionDone(
646 bool success,
Ben Murdoch116680a2014-07-20 18:25:52 -0700647 scoped_ptr<ClientPhishingRequest> request) {
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000648 DCHECK(request);
Ben Murdocheffb81e2014-03-31 11:51:25 +0100649 DVLOG(2) << "Feature extraction done (success:" << success << ") for URL: "
650 << request->url() << ". Start sending client phishing request.";
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000651 ClientSideDetectionService::ClientReportPhishingRequestCallback callback;
652 // If the client-side verdict isn't phishing we don't care about the server
653 // response because we aren't going to display a warning.
654 if (request->is_phishing()) {
655 callback = base::Bind(&ClientSideDetectionHost::MaybeShowPhishingWarning,
656 weak_factory_.GetWeakPtr());
657 }
658 // Send ping even if the browser feature extraction failed.
659 csd_service_->SendClientReportPhishingRequest(
Ben Murdoch116680a2014-07-20 18:25:52 -0700660 request.release(), // The service takes ownership of the request object.
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000661 callback);
662}
663
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000664void ClientSideDetectionHost::MalwareFeatureExtractionDone(
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000665 bool feature_extraction_success,
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000666 scoped_ptr<ClientMalwareRequest> request) {
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000667 DCHECK(request.get());
Ben Murdocheffb81e2014-03-31 11:51:25 +0100668 DVLOG(2) << "Malware Feature extraction done for URL: " << request->url()
669 << ", with badip url count:" << request->bad_ip_url_info_size();
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000670
671 // Send ping if there is matching features.
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000672 if (feature_extraction_success && request->bad_ip_url_info_size() > 0) {
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000673 VLOG(1) << "Start sending client malware request.";
674 ClientSideDetectionService::ClientReportMalwareRequestCallback callback;
Torne (Richard Coles)424c4d72013-08-30 15:14:49 +0100675 callback = base::Bind(&ClientSideDetectionHost::MaybeShowMalwareWarning,
676 weak_factory_.GetWeakPtr());
Torne (Richard Coles)f2477e02013-11-28 11:55:43 +0000677 csd_service_->SendClientReportMalwareRequest(request.release(), callback);
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000678 }
679}
680
Torne (Richard Coles)5f1c9432014-08-12 13:47:38 +0100681void ClientSideDetectionHost::UpdateIPUrlMap(const std::string& ip,
682 const std::string& url,
683 const std::string& method,
684 const std::string& referrer,
685 const ResourceType resource_type) {
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +0100686 if (ip.empty() || url.empty())
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000687 return;
688
Torne (Richard Coles)a36e5922013-08-05 13:57:33 +0100689 IPUrlMap::iterator it = browse_info_->ips.find(ip);
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000690 if (it == browse_info_->ips.end()) {
Torne (Richard Coles)46d4c2b2014-06-09 12:00:27 +0100691 if (browse_info_->ips.size() < kMaxIPsPerBrowse) {
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000692 std::vector<IPUrlInfo> url_infos;
693 url_infos.push_back(IPUrlInfo(url, method, referrer, resource_type));
694 browse_info_->ips.insert(make_pair(ip, url_infos));
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000695 }
Torne (Richard Coles)46d4c2b2014-06-09 12:00:27 +0100696 } else if (it->second.size() < kMaxUrlsPerIP) {
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000697 it->second.push_back(IPUrlInfo(url, method, referrer, resource_type));
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000698 }
699}
700
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000701void ClientSideDetectionHost::Observe(
702 int type,
703 const content::NotificationSource& source,
704 const content::NotificationDetails& details) {
705 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
706 DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
707 const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
708 details).ptr();
Ben Murdocheffb81e2014-03-31 11:51:25 +0100709 if (req && browse_info_.get() &&
710 should_extract_malware_features_ && req->url.is_valid()) {
711 UpdateIPUrlMap(req->socket_address.host() /* ip */,
712 req->url.spec() /* url */,
713 req->method,
714 req->referrer,
715 req->resource_type);
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000716 }
717}
718
Torne (Richard Coles)a3f6a492013-12-18 16:25:09 +0000719bool ClientSideDetectionHost::DidShowSBInterstitial() const {
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000720 if (unsafe_unique_page_id_ <= 0 || !web_contents()) {
721 return false;
722 }
723 const NavigationEntry* nav_entry =
724 web_contents()->GetController().GetActiveEntry();
725 return (nav_entry && nav_entry->GetUniqueID() == unsafe_unique_page_id_);
726}
727
728void ClientSideDetectionHost::set_client_side_detection_service(
729 ClientSideDetectionService* service) {
730 csd_service_ = service;
731}
732
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000733void ClientSideDetectionHost::set_safe_browsing_managers(
734 SafeBrowsingUIManager* ui_manager,
735 SafeBrowsingDatabaseManager* database_manager) {
Torne (Richard Coles)868fa2f2013-06-11 10:57:03 +0100736 if (ui_manager_.get())
Torne (Richard Coles)2a99a7e2013-03-28 15:31:22 +0000737 ui_manager_->RemoveObserver(this);
738
739 ui_manager_ = ui_manager;
740 if (ui_manager)
741 ui_manager_->AddObserver(this);
742
743 database_manager_ = database_manager;
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000744}
745
Torne (Richard Coles)58218062012-11-14 11:43:16 +0000746} // namespace safe_browsing