From 2e7d4aa263fbc38172d84baa33bb158a013821f4 Mon Sep 17 00:00:00 2001 From: profitroll Date: Wed, 5 Apr 2023 22:31:07 +0200 Subject: [PATCH] WIP: chat language recognition --- holochecker.py | 1 + modules/handlers/group_message.py | 45 +++++++++++++++++++++++++++++++ requirements.txt | 3 +++ 3 files changed, 49 insertions(+) create mode 100644 modules/handlers/group_message.py diff --git a/holochecker.py b/holochecker.py index e941471..c6ae8f1 100644 --- a/holochecker.py +++ b/holochecker.py @@ -42,6 +42,7 @@ from modules.callbacks.warnings import * from modules.handlers.confirmation import * from modules.handlers.contact import * from modules.handlers.group_member_update import * +from modules.handlers.group_message import * from modules.handlers.voice import * from modules.handlers.welcome import * from modules.handlers.everything import * diff --git a/modules/handlers/group_message.py b/modules/handlers/group_message.py new file mode 100644 index 0000000..a59f5db --- /dev/null +++ b/modules/handlers/group_message.py @@ -0,0 +1,45 @@ +from datetime import datetime +from app import app +from pyrogram import filters +from pyrogram.types import Message +from pyrogram.client import Client +from modules.logging import logWrite +from modules.utils import configGet, locale +from modules.database import col_warnings +from modules import custom_filters +from polyglot.detect import Detector + + +@app.on_message( + custom_filters.enabled_general + & ~filters.scheduled + & filters.chat(configGet("users", "groups")) +) +async def msg_destination_group(app: Client, msg: Message): + if msg.text is not None: + lang = Detector(msg.text, quiet=True).language + if lang.code == "ru": + logWrite( + f"Message '{msg.text}' from {msg.from_user.first_name} ({msg.from_user.id}) is fucking russian!!! [confidence {lang.confidence}]" + ) + else: + logWrite( + f"Message '{msg.text}' from {msg.from_user.first_name} ({msg.from_user.id}) is written {lang.code} [confidence {lang.confidence}]" + ) + return + elif msg.caption is not None: + lang = Detector(msg.caption, quiet=True).language + if lang.code == "ru": + logWrite( + f"Message '{msg.caption}' from {msg.from_user.first_name} ({msg.from_user.id}) is fucking russian!!! [confidence {lang.confidence}]" + ) + else: + logWrite( + f"Message '{msg.caption}' from {msg.from_user.first_name} ({msg.from_user.id}) is written {lang.code} [confidence {lang.confidence}]" + ) + return + else: + logWrite( + f"Message from {msg.from_user.first_name} ({msg.from_user.id}) has no text in it." + ) + return diff --git a/requirements.txt b/requirements.txt index b10a4c4..593f835 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,9 @@ convopyro==0.5 fastapi~=0.95.0 ftfy~=6.1.1 psutil==5.9.4 +polyglot~=16.7.4 +PyICU==2.10.2 +pycld2==0.41 pymongo==4.3.3 Pyrogram~=2.0.102 python_dateutil==2.8.2