Skip to content

Commit 99f63ad

Browse files
committed
google isn't taking the hint
specifically google, but also some others, have started ignoring rel="nofollow" while also understanding just enough javascript to try viewing binary files as text
1 parent de2c978 commit 99f63ad

File tree

5 files changed

+16
-2
lines changed

5 files changed

+16
-2
lines changed

copyparty/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from .svchub import SvcHub
4141
from .util import (
4242
APPLESAN_TXT,
43+
BAD_BOTS,
4344
DEF_EXP,
4445
DEF_MTE,
4546
DEF_MTH,
@@ -1244,6 +1245,7 @@ def add_optouts(ap):
12441245
ap2.add_argument("--zipmaxt", metavar="TXT", type=u, default="", help="custom errormessage when download size exceeds max (volflag=zipmaxt)")
12451246
ap2.add_argument("--zipmaxu", action="store_true", help="authenticated users bypass the zip size limit (volflag=zipmaxu)")
12461247
ap2.add_argument("--zip-who", metavar="LVL", type=int, default=3, help="who can download as zip/tar? [\033[32m0\033[0m]=nobody, [\033[32m1\033[0m]=admins, [\033[32m2\033[0m]=authenticated-with-read-access, [\033[32m3\033[0m]=everyone-with-read-access (volflag=zip_who)\n\033[1;31mWARNING:\033[0m if a nested volume has a more restrictive value than a parent volume, then this will be \033[33mignored\033[0m if the download is initiated from the parent, more lenient volume")
1248+
ap2.add_argument("--ua-nozip", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from download-as-zip/tar; disable with [\033[32mno\033[0m] or blank")
12471249
ap2.add_argument("--no-zip", action="store_true", help="disable download as zip/tar; same as \033[33m--zip-who=0\033[0m")
12481250
ap2.add_argument("--no-tarcmp", action="store_true", help="disable download as compressed tar (?tar=gz, ?tar=bz2, ?tar=xz, ?tar=gz:9, ...)")
12491251
ap2.add_argument("--no-lifetime", action="store_true", help="do not allow clients (or server config) to schedule an upload to be deleted after a given time")
@@ -1434,6 +1436,7 @@ def add_txt(ap):
14341436
ap2.add_argument("--exp", action="store_true", help="enable textfile expansion -- replace {{self.ip}} and such; see \033[33m--help-exp\033[0m (volflag=exp)")
14351437
ap2.add_argument("--exp-md", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in markdown files; add/remove stuff on the default list with +hdr_foo or /vf.scan (volflag=exp_md)")
14361438
ap2.add_argument("--exp-lg", metavar="V,V,V", type=u, default=DEF_EXP, help="comma/space-separated list of placeholders to expand in prologue/epilogue files (volflag=exp_lg)")
1439+
ap2.add_argument("--ua-nodoc", metavar="PTN", type=u, default=BAD_BOTS, help="regex of user-agents to reject from viewing documents through ?doc=[...]; disable with [\033[32mno\033[0m] or blank")
14371440

14381441

14391442
def add_og(ap):

copyparty/httpcli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3807,6 +3807,9 @@ def _can_zip(self, volflags: dict[str, Any]) -> str:
38073807
return "download-as-zip/tar is admin-only on this server"
38083808
elif lvl <= 2 and self.uname in ("", "*"):
38093809
return "you must be authenticated to download-as-zip/tar on this server"
3810+
elif self.args.ua_nozip and self.args.ua_nozip.search(self.ua):
3811+
t = "this URL contains no valuable information for bots/crawlers"
3812+
raise Pebkac(403, t)
38103813
return ""
38113814

38123815
def tx_res(self, req_path: str) -> bool:
@@ -6291,6 +6294,10 @@ def tx_browser(self) -> bool:
62916294

62926295
doc = self.uparam.get("doc") if self.can_read else None
62936296
if doc:
6297+
zp = self.args.ua_nodoc
6298+
if zp and zp.search(self.ua):
6299+
t = "this URL contains no valuable information for bots/crawlers"
6300+
raise Pebkac(403, t)
62946301
j2a["docname"] = doc
62956302
doctxt = None
62966303
dfn = lnames.get(doc.lower())

copyparty/svchub.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,8 @@ def _process_config(self) -> bool:
769769
vs = os.path.expandvars(os.path.expanduser(vs))
770770
setattr(al, k, vs)
771771

772-
for k in "dav_ua1 sus_urls nonsus_urls".split(" "):
772+
zs = "dav_ua1 sus_urls nonsus_urls ua_nodoc ua_nozip"
773+
for k in zs.split(" "):
773774
vs = getattr(al, k)
774775
if not vs or vs == "no":
775776
setattr(al, k, None)

copyparty/util.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ def sunpack(fmt: bytes, a: bytes) -> tuple[Any, ...]:
245245

246246
META_NOBOTS = '<meta name="robots" content="noindex, nofollow">\n'
247247

248+
# smart enough to understand javascript while also ignoring rel="nofollow"
249+
BAD_BOTS = r"Barkrowler|bingbot|BLEXBot|Googlebot|GPTBot|PetalBot|SeekportBot|SemrushBot|YandexBot"
250+
248251
FFMPEG_URL = "https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-full.7z"
249252

250253
URL_PRJ = "https://github.com/9001/copyparty"

tests/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(self, a=None, v=None, c=None, **ka0):
135135
ex = "dav_inf dedup dotpart dotsrch hook_v no_dhash no_fastboot no_fpool no_htp no_rescan no_sendfile no_ses no_snap no_up_list no_voldump re_dhash plain_ip"
136136
ka.update(**{k: True for k in ex.split()})
137137

138-
ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua"
138+
ex = "ah_cli ah_gen css_browser hist ipu js_browser js_other mime mimes no_forget no_hash no_idx nonsus_urls og_tpl og_ua ua_nodoc ua_nozip"
139139
ka.update(**{k: None for k in ex.split()})
140140

141141
ex = "hash_mt hsortn safe_dedup srch_time u2abort u2j u2sz"

0 commit comments

Comments
 (0)