Coverage for aisdb/webdata/_scraper.py: 91%
22 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 04:22 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 04:22 +0000
1''' webscraper using selenium, firefox, and mozilla geckodriver '''
3import os
4import shutil
7def _scraper():
8 ''' selenium web scraper ``selenium.webdriver``
10 to open a browser window while debugging, export DEBUG=1
11 '''
12 from selenium import webdriver
13 from selenium.webdriver.firefox.options import Options
14 from selenium.webdriver.firefox.service import Service
15 from webdriver_manager.firefox import GeckoDriverManager
16 # from selenium.webdriver.chrome.options import Options
17 # from selenium.webdriver.chrome.service import Service
18 # from webdriver_manager.chrome import ChromeDriverManager
19 # assert shutil.which('firefox') is not None, f'Firefox is required for this feature. {shutil.which("firefox")=}'
21 # configs
22 opt = Options()
23 #opt.headless = True if not os.environ.get('DEBUG') else False
24 opt.set_preference('permissions.default.image', 2)
25 opt.set_preference('extensions.contentblocker.enabled', True)
26 opt.set_preference('media.autoplay.default', 2)
27 opt.set_preference('media.autoplay.allow-muted', False)
28 opt.set_preference('media.autoplay.block-event.enabled', True)
29 opt.set_preference('media.autoplay.block-webaudio', True)
30 opt.set_preference('services.sync.prefs.sync.media.autoplay.default',
31 False)
32 opt.set_preference('ui.context_menus.after_mouseup', False)
33 opt.set_preference('privacy.sanitize.sanitizeOnShutdown', True)
34 opt.set_preference('dom.disable_beforeunload', True)
35 if not os.environ.get('DEBUG') and not os.environ.get('HEADLESS') == '0':
36 opt.add_argument('-headless')
37 """ chrome args
38 opt.add_argument('--headless')
39 opt.add_argument(f'user-data-dir={data_dir}')
40 opt.add_argument('permissions.default.image=2')
41 opt.add_argument('extensions.contentblocker.enabled=True')
42 opt.add_argument('media.autoplay.default=2')
43 opt.add_argument('media.autoplay.allow-muted=False')
44 opt.add_argument('media.autoplay.block-event.enabled=True')
45 opt.add_argument('media.autoplay.block-webaudio=True')
46 opt.add_argument(
47 'services.sync.prefs.sync.media.autoplay.default=False')
48 opt.add_argument('ui.context_menus.after_mouseup=False')
49 opt.add_argument('privacy.sanitize.sanitizeOnShutdown=True')
50 opt.add_argument('dom.disable_beforeunload=True')
51 """
53 driver = webdriver.Firefox(
54 service=Service(executable_path=GeckoDriverManager().install()),
55 options=opt)
57 if os.environ.get('DEBUG'):
58 driver.maximize_window()
59 else:
60 driver.set_window_size(9999, 9999)
62 return driver