Skip to content

Commit feca3cb

Browse files
authored
Merge pull request #67 from RyouMon/fix-load-cookie
Fix load_cookie
2 parents d5fe54c + 19f3462 commit feca3cb

File tree

7 files changed

+61
-21
lines changed

7 files changed

+61
-21
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ unidecode==1.3.8
66
langdetect==1.0.9
77
pykakasi==2.2.1
88
gppt==4.1.0
9-
typer>=0.14.0
9+
typer>=0.14.0
10+
loguru>=0.7.2

src/favorites_crawler/commands/crawl.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
from scrapy.utils.project import get_project_settings
99
from scrapy.spiderloader import SpiderLoader
1010

11+
from favorites_crawler.constants.domains import LMMPIC_DOMAIN, NHENTAI_DOMAIN, TWITTER_DOMAIN
1112
from favorites_crawler.utils.config import load_config, overwrite_spider_settings
1213
from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME
1314
from favorites_crawler.utils.auth import refresh_pixiv_token
15+
from favorites_crawler.utils.cookies import load_cookie
1416

1517
app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)
1618

@@ -36,20 +38,26 @@ def crawl_pixiv():
3638
@app.command('nhentai')
3739
def crawl_nhentai():
3840
"""Crawl your favorite comics from nhentai."""
39-
crawl('nhentai')
41+
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
42+
cookies = load_cookie(NHENTAI_DOMAIN, favors_home)
43+
crawl('nhentai', cookies=cookies)
4044

4145

4246
@app.command('x')
4347
@app.command('twitter')
4448
def crawl_twitter():
4549
"""Crawl your favorite pictures from twitter."""
46-
crawl('twitter')
50+
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
51+
cookies = load_cookie(TWITTER_DOMAIN, favors_home)
52+
crawl('twitter', cookies=cookies)
4753

4854

4955
@app.command('lemon')
5056
def crawl_lemon(id_list: list[str] = typer.Option([], '--id', '-i')):
5157
"""Crawl your favorite photo albums from lemon."""
52-
crawl('lemon', id_list=id_list)
58+
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
59+
cookies = load_cookie(LMMPIC_DOMAIN, favors_home)
60+
crawl('lemon', id_list=id_list, cookies=cookies)
5361

5462

5563
def spider_closed(spider):

src/favorites_crawler/spiders/lemon.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from favorites_crawler.itemloaders import LemonPicPostItemLoader
66
from favorites_crawler.constants.endpoints import LEMON_PIC_USER_CENTER_URL, LEMON_PIC_POST_URL_PATTERN
77
from favorites_crawler.constants.domains import LMMPIC_DOMAIN
8-
from favorites_crawler.utils.cookies import load_cookie
98

109

1110
class LemonSpider(BaseSpider):
@@ -25,10 +24,6 @@ class LemonSpider(BaseSpider):
2524
'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0},
2625
}
2726

28-
def __init__(self, *args, **kwargs):
29-
super().__init__(*args, **kwargs)
30-
self.cookies = load_cookie(LMMPIC_DOMAIN)
31-
3227
def start_requests(self):
3328
if hasattr(self, 'id_list') and self.id_list:
3429
self.logger.debug('GET id_list: %s', self.id_list)

src/favorites_crawler/spiders/nhentai.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from favorites_crawler.itemloaders import NHentaiGalleryItemLoader
66
from favorites_crawler.constants.endpoints import NHENTAI_USER_FAVORITES_URL
77
from favorites_crawler.constants.domains import NHENTAI_DOMAIN
8-
from favorites_crawler.utils.cookies import load_cookie
98

109

1110
class NHentaiSpider(BaseSpider):
@@ -27,10 +26,6 @@ class NHentaiSpider(BaseSpider):
2726
'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0},
2827
}
2928

30-
def __init__(self, *args, **kwargs):
31-
super().__init__(*args, **kwargs)
32-
self.cookies = load_cookie(NHENTAI_DOMAIN)
33-
3429
def start_requests(self):
3530
yield Request(NHENTAI_USER_FAVORITES_URL, cookies=self.cookies)
3631

src/favorites_crawler/spiders/twitter.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from favorites_crawler.itemloaders import TwitterTweetItemLoader
88
from favorites_crawler.constants.domains import TWITTER_DOMAIN
99
from favorites_crawler.constants.endpoints import TWITTER_LIKES_URL
10-
from favorites_crawler.utils.cookies import load_cookie
1110
from favorites_crawler.utils.common import DictRouter
1211

1312

@@ -31,7 +30,6 @@ def current_url(self):
3130

3231
def __init__(self, *args, **kwargs):
3332
super().__init__(*args, **kwargs)
34-
self.cookies = load_cookie(TWITTER_DOMAIN)
3533
self.base_url = TWITTER_LIKES_URL.format(id=self.custom_settings.get('LIKES_ID'))
3634
self.variables = {
3735
"userId": str(self.custom_settings.get('USER_ID')),
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
1+
from __future__ import annotations
2+
13
import os
4+
from pathlib import Path
25
from http.cookiejar import MozillaCookieJar
36

4-
5-
cookie_home = os.path.expanduser('~/.favorites_crawler')
7+
from loguru import logger
68

79

8-
def load_cookie(domain):
10+
def load_cookie(domain: str, home: str | Path) -> dict:
911
"""Load 'Netscape HTTP Cookie File' as dict"""
10-
cookiejar = MozillaCookieJar()
11-
cookiejar.load(os.path.join(cookie_home, f'{domain}_cookies.txt'))
12+
try:
13+
cookiejar = MozillaCookieJar()
14+
cookie_file = os.path.join(home, f'{domain}_cookies.txt')
15+
cookiejar.load(cookie_file)
16+
except Exception as e:
17+
logger.error('Failed to load cookie {}, {!r}', cookie_file, e)
18+
return {}
1219
return {getattr(c, 'name'): getattr(c, 'value') for c in cookiejar}

tests/test_utils/test_cookies.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from favorites_crawler.utils.cookies import load_cookie
2+
3+
4+
class TestLoadCookie:
5+
def test_load_cookie_when_file_exists(self, tmp_path):
6+
domain = 'localhost'
7+
cookie_file = tmp_path / f'{domain}_cookies.txt'
8+
cookie_file.touch()
9+
cookie_file.write_text(
10+
"""# Netscape HTTP Cookie File
11+
# http://curl.haxx.se/rfc/cookie_spec.html
12+
# This is a generated file! Do not edit.
13+
14+
localhost FALSE / TRUE 9933144989 User-Agent Test
15+
"""
16+
)
17+
18+
cookie = load_cookie(domain, tmp_path)
19+
20+
assert cookie == {'User-Agent': 'Test'}
21+
22+
def test_load_cookie_when_file_not_exists(self, tmp_path):
23+
domain = 'localhost'
24+
25+
cookie = load_cookie(domain, tmp_path)
26+
27+
assert cookie == {}
28+
29+
def test_load_cookie_when_file_invalid(self, tmp_path):
30+
domain = 'localhost'
31+
cookie_file = tmp_path / f'{domain}_cookies.txt'
32+
cookie_file.touch()
33+
cookie_file.write_text('')
34+
cookie = load_cookie(domain, tmp_path)
35+
36+
assert cookie == {}

0 commit comments

Comments
 (0)