@@ -59,7 +59,7 @@ class SelScrape(SearchEngineScrape, threading.Thread):
59
59
60
60
next_page_selectors = {
61
61
'google' : '#pnnext' ,
62
- 'yandex' : '.pager__button_kind_next ' ,
62
+ 'yandex' : '.pager__item_kind_next ' ,
63
63
'bing' : '.sb_pagN' ,
64
64
'yahoo' : '#pg-next' ,
65
65
'baidu' : '.n' ,
@@ -474,7 +474,7 @@ def wait_until_serp_loaded(self):
474
474
if self .search_engine_name == 'google' :
475
475
selector = '#navcnt td.cur'
476
476
elif self .search_engine_name == 'yandex' :
477
- selector = '.pager__item_current_yes font font '
477
+ selector = '.pager__item_current_yes'
478
478
elif self .search_engine_name == 'bing' :
479
479
selector = 'nav li a.sb_pagS'
480
480
elif self .search_engine_name == 'yahoo' :
@@ -497,7 +497,7 @@ def wait_until_serp_loaded(self):
497
497
except TimeoutException as e :
498
498
self ._save_debug_screenshot ()
499
499
content = self .webdriver .find_element_by_css_selector (selector ).text
500
- raise Exception ('Pagenumber={} did not appear in navigation. Got "{}" instead' .format (self .page_number , content ))
500
+ logger . error ('Pagenumber={} did not appear in navigation. Got "{}" instead' .format (self .page_number , content ))
501
501
502
502
elif self .search_type == 'image' :
503
503
self .wait_until_title_contains_keyword ()
@@ -700,63 +700,68 @@ def build_search(self):
700
700
This is highly sensitive.
701
701
"""
702
702
super ().build_search ()
703
- # assume we are on the normal google search page right now
704
- self .webdriver .get ('https://www.google.com/preferences?hl=en' )
705
703
706
- time .sleep (1 )
704
+ if self .config .get ('google_selenium_search_settings' , False ):
705
+ # assume we are on the normal google search page right now
706
+ self .webdriver .get ('https://www.google.com/preferences?hl=en' )
707
707
708
- # wait until we see the settings
709
- element = WebDriverWait (self .webdriver , 7 ).until (EC .presence_of_element_located ((By .NAME , 'safeui' )))
708
+ time .sleep (1 )
710
709
711
- try :
712
- if self .config .get ('google_selenium_safe_search' , False ):
713
- if self .webdriver .find_element_by_name ('safeui' ).get_attribute ('value' ) != 'on' :
714
- self .webdriver .find_element_by_name ('safeui' ).click ()
710
+ if self .config .get ('google_selenium_manual_settings' , False ):
711
+ return input ('Press any Key after search settings completed...' )
712
+
713
+ # wait until we see the settings
714
+ element = WebDriverWait (self .webdriver , 7 ).until (EC .presence_of_element_located ((By .NAME , 'safeui' )))
715
715
716
716
try :
717
- if self .config .get ('google_selenium_personalization' , False ):
718
- self .webdriver .find_element_by_css_selector ('#pson-radio > div:first-child' ).click ()
719
- else :
720
- self .webdriver .find_element_by_css_selector ('#pson-radio > div:nth-child(2)' ).click ()
721
- except WebDriverException as e :
722
- logger .warning ('Cannot set personalization settings.' )
717
+ if self .config .get ('google_selenium_safe_search' , False ):
718
+ if self .webdriver .find_element_by_name ('safeui' ).get_attribute ('value' ) != 'on' :
719
+ self .webdriver .find_element_by_name ('safeui' ).click ()
723
720
724
- time .sleep (1 )
721
+ try :
722
+ if self .config .get ('google_selenium_personalization' , False ):
723
+ self .webdriver .find_element_by_css_selector ('#pson-radio > div:first-child' ).click ()
724
+ else :
725
+ self .webdriver .find_element_by_css_selector ('#pson-radio > div:nth-child(2)' ).click ()
726
+ except WebDriverException as e :
727
+ logger .warning ('Cannot set personalization settings.' )
725
728
726
- # set the region
727
- try :
728
- self .webdriver .find_element_by_id ('regionanchormore' ).click ()
729
- except WebDriverException as e :
730
- logger .warning ('Regions probably already expanded.' )
731
-
732
- region = self .config .get ('google_selenium_region' , 'US' )
733
- self .webdriver .find_element_by_css_selector ('div[data-value="{}"]' .format (region )).click ()
734
-
735
- # set the number of results
736
- num_results = self .config .get ('google_selenium_num_results' , 10 )
737
- self .webdriver .find_element_by_id ('result_slider' ).click ()
738
- # reset
739
- for i in range (5 ):
740
- self .webdriver .find_element_by_id ('result_slider' ).send_keys (Keys .LEFT )
741
- # move to desicred result
742
- for i in range ((num_results // 10 )- 1 ):
743
- time .sleep (.25 )
744
- self .webdriver .find_element_by_id ('result_slider' ).send_keys (Keys .RIGHT )
729
+ time .sleep (1 )
745
730
746
- time .sleep (1 )
731
+ # set the region
732
+ try :
733
+ self .webdriver .find_element_by_id ('regionanchormore' ).click ()
734
+ except WebDriverException as e :
735
+ logger .warning ('Regions probably already expanded.' )
747
736
748
- # save settings
749
- self .webdriver .find_element_by_css_selector ('#form-buttons div:first-child' ).click ()
750
- # accept alert
751
- self .webdriver .switch_to .alert .accept ()
737
+ region = self .config .get ('google_selenium_region' , 'US' )
738
+ self .webdriver .find_element_by_css_selector ('div[data-value="{}"]' .format (region )).click ()
752
739
753
- time .sleep (2 )
740
+ # set the number of results
741
+ num_results = self .config .get ('google_selenium_num_results' , 10 )
742
+ self .webdriver .find_element_by_id ('result_slider' ).click ()
743
+ # reset
744
+ for i in range (5 ):
745
+ self .webdriver .find_element_by_id ('result_slider' ).send_keys (Keys .LEFT )
746
+ # move to desicred result
747
+ for i in range ((num_results // 10 )- 1 ):
748
+ time .sleep (.25 )
749
+ self .webdriver .find_element_by_id ('result_slider' ).send_keys (Keys .RIGHT )
754
750
755
- self . handle_request_denied ( )
751
+ time . sleep ( 1 )
756
752
757
- except WebDriverException as e :
758
- logger .error (e )
759
- raise e
753
+ # save settings
754
+ self .webdriver .find_element_by_css_selector ('#form-buttons div:first-child' ).click ()
755
+ # accept alert
756
+ self .webdriver .switch_to .alert .accept ()
757
+
758
+ time .sleep (2 )
759
+
760
+ self .handle_request_denied ()
761
+
762
+ except WebDriverException as e :
763
+ logger .error (e )
764
+ raise e
760
765
761
766
762
767
class DuckduckgoSelScrape (SelScrape ):
0 commit comments