Skip to content

Commit 6e828df

Browse files
authored
Merge pull request #62 from RyouMon/feature-global-config
Support global config
2 parents b422694 + e421690 commit 6e828df

File tree

5 files changed

+196
-36
lines changed

5 files changed

+196
-36
lines changed

README.md

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ You can set any [scrapy built-in settings](https://docs.scrapy.org/en/latest/top
126126
127127
By default, file content likes this:
128128
```yaml
129+
global:
130+
ENABLE_ORGANIZE_BY_ARTIST: true
129131
pixiv:
130132
ACCESS_TOKEN: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
131133
REFRESH_TOKEN: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
@@ -150,27 +152,18 @@ yandere:
150152
```
151153
152154
## Organize file by artist
153-
if you want to organize pixiv illust by user, add this line to your config:
155+
if you want to organize pixiv and yandere files by artist, add this line to your config:
154156
```yaml
155-
pixiv:
156-
# FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER: true # (Deprecation)
157-
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
158-
```
159-
if you want to organize yandere post by artist, add this line to your config:
160-
```yaml
161-
yandere:
162-
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
157+
global:
158+
ENABLE_ORGANIZE_BY_ARTIST: true
163159
```
164160
165161
## Store tags to IPTC/Keywords
166-
only support pixiv and yandere.
162+
only support pixiv, yandere and twitter.
167163
```yaml
168-
yandere:
169-
ENABLE_WRITE_IPTC_KEYWORDS: true # default: true
170-
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None
171-
pixiv:
172-
ENABLE_WRITE_IPTC_KEYWORDS: true # default: true
173-
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None
164+
global:
165+
ENABLE_WRITE_IPTC_KEYWORDS: true
166+
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None, If the executable is not in the path, set it manually
174167
```
175168
176169
# Restore your favorites

src/favorites_crawler/commands/crawl.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@
88
from scrapy.utils.project import get_project_settings
99
from scrapy.spiderloader import SpiderLoader
1010

11-
from favorites_crawler.utils.config import load_config, overwrite_settings
11+
from favorites_crawler.utils.config import load_config, overwrite_spider_settings
1212

1313
app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)
1414

1515
os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'favorites_crawler.settings')
1616
scrapy_settings = get_project_settings()
1717
spider_loader = SpiderLoader(scrapy_settings)
18-
overwrite_settings(spider_loader, scrapy_settings, load_config())
1918

2019

2120
@app.command('yandere')
@@ -70,8 +69,14 @@ def crawl(name, **kwargs):
7069
:param name: spider name
7170
:param kwargs: kwargs passed to spider's __init__ method
7271
"""
72+
spider = spider_loader.load(name)
73+
overwrite_spider_settings(spider, scrapy_settings, load_config())
7374
process = CrawlerProcess(scrapy_settings)
74-
process.crawl(name, **kwargs)
75+
process.crawl(spider, **kwargs)
7576
for crawler in process.crawlers:
7677
crawler.signals.connect(spider_closed, signal=signals.spider_closed)
7778
process.start()
79+
80+
81+
if __name__ == '__main__':
82+
crawl('pixiv')

src/favorites_crawler/commands/login.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def login_pixiv(
5252
print(f'Failed to login. {e!r}')
5353
else:
5454
dump_config(config)
55+
print("Login successful.")
5556

5657

5758
@app.command('yandere')
@@ -68,6 +69,7 @@ def login_yandere(
6869
yandere_config = config.setdefault('yandere', {})
6970
yandere_config['USERNAME'] = username
7071
dump_config(config)
72+
print("Login successful.")
7173

7274

7375
@app.command('x')
@@ -101,6 +103,7 @@ def login_twitter(
101103
"Failed to login."
102104
return
103105
dump_config(config)
106+
print("Login successful.")
104107

105108

106109
def parse_twitter_likes_url(url):

src/favorites_crawler/utils/config.py

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,80 @@
11
import os
2-
32
import yaml
43

5-
config_path = os.path.expanduser('~/.favorites_crawler')
6-
config_file = os.path.join(config_path, 'config.yml')
7-
if not os.path.exists(config_path):
8-
os.mkdir(config_path)
4+
DEFAULT_FAVORS_HOME = os.path.expanduser('~/.favorites_crawler')
5+
DEFAULT_CONFIG = {
6+
'global': {
7+
'ENABLE_ORGANIZE_BY_ARTIST': True,
8+
'ENABLE_WRITE_IPTC_KEYWORDS': True,
9+
'EXIF_TOOL_EXECUTABLE': None,
10+
},
11+
'pixiv': {
12+
'FILES_STORE': 'favorites_crawler_files/pixiv',
13+
'USER_ID': '',
14+
'ACCESS_TOKEN': '',
15+
'REFRESH_TOKEN': '',
16+
},
17+
'yandere': {
18+
'FILES_STORE': 'favorites_crawler_files/yandere',
19+
'USERNAME': '',
20+
},
21+
'twitter': {
22+
'FILES_STORE': 'favorites_crawler_files/twitter',
23+
'USER_ID': '',
24+
'AUTHORIZATION': '',
25+
'LIKES_ID': '',
26+
'X_CSRF_TOKEN': '',
27+
},
28+
'lemon': {
29+
'FILES_STORE': 'favorites_crawler_files/lemon',
30+
},
31+
'nhentai': {
32+
'FILES_STORE': 'favorites_crawler_files/nhentai',
33+
}
34+
}
935

1036

11-
def load_config():
37+
def load_config(home: str = DEFAULT_FAVORS_HOME) -> dict:
1238
"""Load config from user home"""
39+
create_favors_home(home)
40+
config_file = os.path.join(home, 'config.yml')
1341
if not os.path.exists(config_file):
14-
return {}
42+
dump_config(DEFAULT_CONFIG, home)
43+
return DEFAULT_CONFIG
1544
with open(config_file, encoding='utf8') as f:
1645
return yaml.safe_load(f)
1746

1847

19-
def dump_config(data):
48+
def dump_config(data: dict, home: str = DEFAULT_FAVORS_HOME):
2049
"""Dump config data to user home"""
50+
create_favors_home(home)
51+
config_file = os.path.join(home, 'config.yml')
2152
with open(config_file, 'w', encoding='utf8') as f:
2253
yaml.safe_dump(data, f, allow_unicode=True)
2354

2455

25-
def overwrite_settings(spider_loader, settings, user_config):
26-
spider_names = spider_loader.list()
27-
for name in spider_names:
28-
cls = spider_loader.load(name)
29-
spider_config = user_config.get(cls.name, {})
30-
if spider_config:
31-
cls.custom_settings.update(spider_config)
56+
def create_favors_home(path: str):
57+
"""Create favors home if not exists"""
58+
if not os.path.exists(path):
59+
os.makedirs(path, exist_ok=True)
60+
61+
62+
def overwrite_spider_settings(spider, default_settings, user_config):
63+
"""
64+
Overwrite spider settings by user config
65+
Priority: favors spider config > favors global config > spider custom settings > scrapy settings
66+
67+
:param spider: Spider class
68+
:param default_settings: :class:`scrapy.settings.Settings`
69+
:param user_config: favorites crawler config
70+
"""
71+
global_config = user_config.get('global')
72+
if global_config:
73+
spider.custom_settings.update(global_config)
74+
75+
spider_config = user_config.get(spider.name)
76+
if spider_config:
77+
spider.custom_settings.update(spider_config)
3278

33-
default_files_store = os.path.join(settings.get('FILES_STORE', ''), cls.name)
34-
cls.custom_settings.setdefault('FILES_STORE', default_files_store)
79+
default_files_store = os.path.join(default_settings.get('FILES_STORE', ''), spider.name)
80+
spider.custom_settings.setdefault('FILES_STORE', default_files_store)

tests/test_utils/test_config.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os
2+
import yaml
3+
4+
from scrapy.utils.project import get_project_settings
5+
from scrapy.spiderloader import SpiderLoader
6+
7+
from favorites_crawler.utils.config import (
8+
create_favors_home, load_config, dump_config, DEFAULT_CONFIG, overwrite_spider_settings)
9+
10+
scrapy_settings = get_project_settings()
11+
spider_loader = SpiderLoader(scrapy_settings)
12+
13+
14+
class TestCreateFavorsHome:
15+
def test_should_create_path_when_path_exists(self, tmp_path):
16+
test_path = tmp_path / "existing_dir"
17+
test_path.mkdir()
18+
19+
create_favors_home(str(test_path))
20+
21+
assert test_path.exists()
22+
assert test_path.is_dir()
23+
24+
def test_skip_create_path_when_path_not_exists(self, tmp_path):
25+
test_path = tmp_path / "non_existing_dir"
26+
27+
create_favors_home(str(test_path))
28+
29+
assert test_path.exists()
30+
assert test_path.is_dir()
31+
32+
33+
class TestLoadConfig:
34+
def test_load_config_when_config_not_exists(self, tmp_path):
35+
favors_home = str(tmp_path)
36+
37+
config = load_config(favors_home)
38+
39+
assert config == DEFAULT_CONFIG
40+
config_file = os.path.join(favors_home, 'config.yml')
41+
assert os.path.exists(config_file)
42+
43+
with open(config_file, encoding='utf8') as f:
44+
written_config = yaml.safe_load(f)
45+
assert written_config == DEFAULT_CONFIG
46+
47+
def test_load_config_when_config_exists(self, tmp_path):
48+
favors_home = str(tmp_path)
49+
config_file = os.path.join(favors_home, 'config.yml')
50+
existing_config = {'global': {'ENABLE_ORGANIZE_BY_ARTIST': False}}
51+
52+
with open(config_file, 'w', encoding='utf8') as f:
53+
yaml.safe_dump(existing_config, f)
54+
55+
config = load_config(favors_home)
56+
57+
assert config == existing_config
58+
59+
60+
class TestDumpConfig:
61+
def test_dump_config_to_favors_home(self, tmp_path):
62+
favors_home = str(tmp_path)
63+
new_config = {'global': {'ENABLE_ORGANIZE_BY_ARTIST': False}}
64+
65+
dump_config(new_config, favors_home)
66+
67+
config_file = os.path.join(favors_home, 'config.yml')
68+
assert os.path.exists(config_file)
69+
70+
with open(config_file, encoding='utf8') as f:
71+
written_config = yaml.safe_load(f)
72+
assert written_config == new_config
73+
74+
75+
class TestOverwriteSpiderSettings:
76+
def test_overwrite_spider_settings(self):
77+
user_config = {
78+
'global': {
79+
'ENABLE_ORGANIZE_BY_ARTIST': True,
80+
},
81+
'pixiv': {
82+
'FILES_STORE': '/pixiv',
83+
}
84+
}
85+
spider = spider_loader.load('pixiv')
86+
87+
overwrite_spider_settings(spider, scrapy_settings, user_config)
88+
89+
assert spider.custom_settings['FILES_STORE'] == user_config['pixiv']['FILES_STORE']
90+
assert spider.custom_settings['ENABLE_ORGANIZE_BY_ARTIST'] == user_config['global']['ENABLE_ORGANIZE_BY_ARTIST']
91+
92+
def test_spider_config_priority_should_gt_global_config(self):
93+
user_config = {
94+
'global': {
95+
'ENABLE_ORGANIZE_BY_ARTIST': True,
96+
},
97+
'yandere': {
98+
'ENABLE_ORGANIZE_BY_ARTIST': False,
99+
}
100+
}
101+
spider = spider_loader.load('yandere')
102+
103+
overwrite_spider_settings(spider, scrapy_settings, user_config)
104+
105+
assert spider.custom_settings['ENABLE_ORGANIZE_BY_ARTIST'] == user_config['yandere']['ENABLE_ORGANIZE_BY_ARTIST']
106+
107+
def test_should_set_default_file_store_when_user_doesnt_config_it(self):
108+
user_config = {}
109+
spider = spider_loader.load('nhentai')
110+
111+
overwrite_spider_settings(spider, scrapy_settings, user_config)
112+
113+
assert spider.custom_settings['FILES_STORE'] == os.path.join(scrapy_settings.get('FILES_STORE', ''), 'nhentai')

0 commit comments

Comments
 (0)