Skip to content

Commit 127d903

Browse files
committed
Fixed Vimeo, activation/probing mechanisms improvements
1 parent 81a817a commit 127d903

File tree

8 files changed

+79
-35
lines changed

8 files changed

+79
-35
lines changed

.githooks/pre-commit

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
#!/bin/sh
2-
python3 ./utils/update_site_data.py
2+
echo 'Activating update_sitesmd hook script...'
3+
poetry run update_sitesmd

maigret/activation.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from http.cookiejar import MozillaCookieJar
23
from http.cookies import Morsel
34

@@ -25,6 +26,7 @@ def vimeo(site, logger, cookies={}):
2526
import requests
2627

2728
r = requests.get(site.activation["url"], headers=headers)
29+
logger.debug(f"Vimeo viewer activation: {json.dumps(r.json(), indent=4)}")
2830
jwt_token = r.json()["jwt"]
2931
site.headers["Authorization"] = "jwt " + jwt_token
3032

maigret/resources/data.json

+7-8
Original file line numberDiff line numberDiff line change
@@ -7037,7 +7037,7 @@
70377037
"alexaRank": 1,
70387038
"urlMain": "https://play.google.com/store",
70397039
"url": "https://play.google.com/store/apps/developer?id={username}",
7040-
"usernameClaimed": "OpenAI",
7040+
"usernameClaimed": "KONAMI",
70417041
"usernameUnclaimed": "noonewouldeverusethis7"
70427042
},
70437043
"Gorod.dp.ua": {
@@ -17412,26 +17412,25 @@
1741217412
},
1741317413
"Vimeo": {
1741417414
"tags": [
17415-
"us",
1741617415
"video"
1741717416
],
17418-
"headers": {
17419-
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4MzkwODAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiOWNjMjk0ZjktZGZhOS00NDI0LWE0OGEtN2JjYzkwYjM2NTMyIn0.wG0kC7fWtrdKI9ccS-LE81lVgQRfYobrqCAPWxr1wzc"
17420-
},
1742117417
"activation": {
1742217418
"url": "https://vimeo.com/_rv/viewer",
1742317419
"marks": [
17424-
"Something strange occurred. Please get in touch with the app's creator."
17420+
"Something strange occurred. Please get in touch"
1742517421
],
1742617422
"method": "vimeo"
1742717423
},
17424+
"headers": {
17425+
"Authorization": "jwt eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3MzM4NzUyMDAsInVzZXJfaWQiOm51bGwsImFwcF9pZCI6NTg0NzksInNjb3BlcyI6InB1YmxpYyIsInRlYW1fdXNlcl9pZCI6bnVsbCwianRpIjoiM2MxNWE0NDUtMjVlYy00NzJhLTg5NzgtMjIzMWJiMmQ1Y2Q0In0.-hmhKFIcM0SyYtDadKAU2eqQhcYvfFGPR8vvuzLNbWM"
17426+
},
1742817427
"urlProbe": "https://api.vimeo.com/users/{username}?fields=name%2Cgender%2Cbio%2Curi%2Clink%2Cbackground_video%2Clocation_details%2Cpictures%2Cverified%2Cmetadata.public_videos.total%2Cavailable_for_hire%2Ccan_work_remotely%2Cmetadata.connections.videos.total%2Cmetadata.connections.albums.total%2Cmetadata.connections.followers.total%2Cmetadata.connections.following.total%2Cmetadata.public_videos.total%2Cmetadata.connections.vimeo_experts.is_enrolled%2Ctotal_collection_count%2Ccreated_time%2Cprofile_preferences%2Cmembership%2Cclients%2Cskills%2Cproject_types%2Crates%2Ccategories%2Cis_expert%2Cprofile_discovery%2Cwebsites%2Ccontact_emails&fetch_user_profile=1",
1742917428
"checkType": "status_code",
1743017429
"alexaRank": 148,
17431-
"urlMain": "https://vimeo.com/",
17430+
"urlMain": "https://vimeo.com",
1743217431
"url": "https://vimeo.com/{username}",
1743317432
"usernameClaimed": "blue",
17434-
"usernameUnclaimed": "noonewouldeverusethis7"
17433+
"usernameUnclaimed": "smbepezbrg"
1743517434
},
1743617435
"Virgool": {
1743717436
"disabled": true,

maigret/sites.py

+13
Original file line numberDiff line numberDiff line change
@@ -560,13 +560,26 @@ def get_db_stats(self, is_markdown=False):
560560
checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
561561
status_checks_perc = round(100 * status_checks / enabled_count, 2)
562562

563+
# Sites with probing and activation (kinda special cases, let's watch them)
564+
site_with_probing = []
565+
site_with_activation = []
566+
for site in sites_dict.values():
567+
def get_site_label(site):
568+
return f"{site.name}{' (disabled)' if site.disabled else ''}"
569+
if site.url_probe:
570+
site_with_probing.append(get_site_label(site))
571+
if site.activation:
572+
site_with_activation.append(get_site_label(site))
573+
563574
# Format output
564575
separator = "\n\n"
565576
output = [
566577
f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%",
567578
f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
568579
f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
569580
f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
581+
f"Sites with probing: {', '.join(sorted(site_with_probing))}",
582+
f"Sites with activation: {', '.join(sorted(site_with_activation))}",
570583
self._format_top_items("profile URLs", urls, 20, is_markdown),
571584
self._format_top_items("tags", tags, 20, is_markdown, self._tags),
572585
]

maigret/submit.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def extract_username_dialog(url):
184184
url_parts = url.rstrip("/").split("/")
185185
supposed_username = url_parts[-1].strip('@')
186186
entered_username = input(
187-
f'Is "{supposed_username}" a valid username? If not, write it manually: '
187+
f"{Fore.GREEN}[?] Is \"{supposed_username}\" a valid username? If not, write it manually: {Style.RESET_ALL}"
188188
)
189189
return entered_username if entered_username else supposed_username
190190

@@ -390,6 +390,13 @@ async def add_site(self, site):
390390
}
391391

392392
async def dialog(self, url_exists, cookie_file):
393+
"""
394+
An implementation of the submit mode:
395+
- User provides a URL of a existing social media account
396+
- Maigret tries to detect the site engine and understand how to check
397+
for account presence with HTTP responses analysis
398+
- If detection succeeds, Maigret generates a new site entry/replace old one in the database
399+
"""
393400
old_site = None
394401
additional_options_enabled = self.logger.level in (
395402
logging.DEBUG,
@@ -444,6 +451,15 @@ async def dialog(self, url_exists, cookie_file):
444451
f'{Fore.GREEN}[+] We will update site "{old_site.name}" in case of success.{Style.RESET_ALL}'
445452
)
446453

454+
# Check if the site check is ordinary or not
455+
if old_site and (old_site.url_probe or old_site.activation):
456+
skip = input(f"{Fore.RED}[!] The site check depends on activation / probing mechanism! Consider to update it manually. Continue? [yN]{Style.RESET_ALL}")
457+
if skip.lower() in ['n', '']:
458+
return False
459+
460+
# TODO: urlProbe support
461+
# TODO: activation support
462+
447463
url_mainpage = self.extract_mainpage_url(url_exists)
448464

449465
# headers update
@@ -511,6 +527,7 @@ async def dialog(self, url_exists, cookie_file):
511527
"urlMain": url_mainpage,
512528
"usernameClaimed": supposed_username,
513529
"usernameUnclaimed": non_exist_username,
530+
"headers": custom_headers,
514531
"checkType": "message",
515532
}
516533
self.logger.info(json.dumps(site_data, indent=4))

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,4 @@ coverage = "^7.6.9"
9090
[tool.poetry.scripts]
9191
# Run with: poetry run maigret <username>
9292
maigret = "maigret.maigret:run"
93+
update_sitesmd = "utils.update_site_data:main"

sites.md

+28-23
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Rank data fetched from Alexa by domains.
8888
1. ![](https://www.google.com/s2/favicons?domain=https://www.blogger.com) [Blogger (by GAIA id) (https://www.blogger.com)](https://www.blogger.com)*: top 500, blog*
8989
1. ![](https://www.google.com/s2/favicons?domain=https://www.researchgate.net/) [ResearchGate (https://www.researchgate.net/)](https://www.researchgate.net/)*: top 500, in, us*
9090
1. ![](https://www.google.com/s2/favicons?domain=https://www.freepik.com) [Freepik (https://www.freepik.com)](https://www.freepik.com)*: top 500, art, photo, stock*
91-
1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com/) [Vimeo (https://vimeo.com/)](https://vimeo.com/)*: top 500, us, video*
91+
1. ![](https://www.google.com/s2/favicons?domain=https://vimeo.com) [Vimeo (https://vimeo.com)](https://vimeo.com)*: top 500, video*
9292
1. ![](https://www.google.com/s2/favicons?domain=https://www.pinterest.com/) [Pinterest (https://www.pinterest.com/)](https://www.pinterest.com/)*: top 500, art, photo, sharing*
9393
1. ![](https://www.google.com/s2/favicons?domain=https://www.fiverr.com/) [Fiverr (https://www.fiverr.com/)](https://www.fiverr.com/)*: top 500, shopping, us*
9494
1. ![](https://www.google.com/s2/favicons?domain=https://t.me/) [Telegram (https://t.me/)](https://t.me/)*: top 500, messaging*
@@ -3148,9 +3148,13 @@ Enabled/total sites: 2693/3137 = 85.85%
31483148

31493149
Incomplete message checks: 397/2693 = 14.74% (false positive risks)
31503150

3151-
Status code checks: 719/2693 = 26.7% (false positive risks)
3151+
Status code checks: 616/2693 = 22.87% (false positive risks)
31523152

3153-
False positive risk (total): 41.44%
3153+
False positive risk (total): 37.61%
3154+
3155+
Sites with probing: 500px, Aparat, BinarySearch (disabled), BongaCams, BuyMeACoffee, Cent, Disqus, Docker Hub, Duolingo, Gab, GitHub, GitLab, Google Plus (archived), Gravatar, Imgur, Issuu, Keybase, Livejasmin, LocalCryptos (disabled), MixCloud, Niftygateway, Reddit Search (Pushshift) (disabled), SportsTracker, Spotify (disabled), TAP'D, Trello, Twitch, Twitter, Twitter Shadowban (disabled), UnstoppableDomains, Vimeo, Weibo, Yapisal (disabled), YouNow, nightbot, notabug.org, polarsteps, qiwi.me (disabled)
3156+
3157+
Sites with activation: Spotify (disabled), Twitter, Vimeo, Weibo
31543158

31553159
Top 20 profile URLs:
31563160
- (796) `{urlMain}/index/8-0-{username} (uCoz)`
@@ -3174,24 +3178,25 @@ Top 20 profile URLs:
31743178
- (17) `/forum/members/?username={username}`
31753179
- (17) `/search.php?keywords=&terms=all&author={username}`
31763180

3181+
31773182
Top 20 tags:
3178-
- (328) `NO_TAGS` (non-standard)
3179-
- (307) `forum`
3180-
- (50) `gaming`
3181-
- (26) `coding`
3182-
- (21) `photo`
3183-
- (20) `blog`
3184-
- (19) `news`
3185-
- (15) `music`
3186-
- (14) `tech`
3187-
- (12) `freelance`
3188-
- (12) `finance`
3189-
- (11) `sharing`
3190-
- (10) `dating`
3191-
- (10) `art`
3192-
- (10) `shopping`
3193-
- (10) `movies`
3194-
- (8) `crypto`
3195-
- (7) `sport`
3196-
- (7) `hobby`
3197-
- (7) `hacking`
3183+
- (1105) `NO_TAGS` (non-standard)
3184+
- (735) `forum`
3185+
- (92) `gaming`
3186+
- (48) `photo`
3187+
- (41) `coding`
3188+
- (30) `tech`
3189+
- (29) `news`
3190+
- (28) `blog`
3191+
- (23) `music`
3192+
- (19) `finance`
3193+
- (18) `crypto`
3194+
- (16) `sharing`
3195+
- (16) `freelance`
3196+
- (15) `art`
3197+
- (15) `shopping`
3198+
- (13) `sport`
3199+
- (13) `business`
3200+
- (12) `movies`
3201+
- (11) `hobby`
3202+
- (11) `education`

utils/update_site_data.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def get_readable_rank(r):
6767
return get_readable_rank(list(filter(lambda x: x >= rank, valid_step_ranks))[0])
6868

6969

70-
if __name__ == '__main__':
70+
def main():
7171
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
7272
)
7373
parser.add_argument("--base","-b", metavar="BASE_FILE",
@@ -86,6 +86,8 @@ def get_readable_rank(r):
8686
db = MaigretDatabase()
8787
sites_subset = db.load_from_file(args.base_file).sites
8888

89+
print(f"\nUpdating supported sites list (don't worry, it's needed)...")
90+
8991
with open("sites.md", "w") as site_file:
9092
site_file.write(f"""
9193
## List of supported sites (search methods): total {len(sites_subset)}\n
@@ -144,4 +146,8 @@ def get_readable_rank(r):
144146
site_file.write('## Statistics\n\n')
145147
site_file.write(statistics_text)
146148

147-
print("\nFinished updating supported site listing!")
149+
print("Finished updating supported site listing!")
150+
151+
152+
if __name__ == '__main__':
153+
main()

0 commit comments

Comments
 (0)