From 89300e74e50d7b94d816423f93a54b11f9d2e8fb Mon Sep 17 00:00:00 2001 From: T-BENZIN Date: Sat, 28 Jun 2025 11:25:55 +0500 Subject: [PATCH] Added proof of concept for discord bot with OpenAI image recognition. --- discord_bot.py | 50 +++++++++++++++ openai_interaction/image_recognition_test.py | 67 ++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 openai_interaction/image_recognition_test.py diff --git a/discord_bot.py b/discord_bot.py index e69de29..2752572 100644 --- a/discord_bot.py +++ b/discord_bot.py @@ -0,0 +1,50 @@ +import os +import discord +from discord.ext import commands +from openai_interaction.image_recognition_test import image_recognition + +# Загружаем токен из переменной среды +TOKEN = os.getenv('SCREAMING_OPOSSUM') + +# Создаем клиент с префиксом (не обязателен для слэш-команд) +intents = discord.Intents.default() +intents.message_content = True +bot = commands.Bot(command_prefix='!', intents=intents) + + +# region Bot Stuff +@bot.event +async def on_ready(): + print(f'Logged in as {bot.user} (ID: {bot.user.id})') + print('------') + try: + synced = await bot.tree.sync() + print(f'Synced {len(synced)} command(s).') + except Exception as e: + print(f'Failed to sync commands: {e}') + + +@bot.tree.command(name='hello', description='Replies with Hello World!') +async def hello_command(interaction: discord.Interaction): + await interaction.response.send_message('AAAAAAAAAAAAAAAAAAAAA!') + + +@bot.event +async def on_message(message: discord.Message): + if message.author == bot.user: + return + + if message.attachments and bot.user in message.mentions: + message_attachments = [ + {'filename': i_attachment.filename, 'url': i_attachment.url} for i_attachment in message.attachments + ] + await message.channel.send(await image_recognition(message.content, message_attachments)) + + await bot.process_commands(message) +# endregion + + +if __name__ == '__main__': + if TOKEN is None: + raise RuntimeError('Environment variable SCREAMING_OPOSSUM is not set') + bot.run(TOKEN) diff --git a/openai_interaction/image_recognition_test.py b/openai_interaction/image_recognition_test.py new file mode 100644 index 0000000..1d05556 --- /dev/null +++ b/openai_interaction/image_recognition_test.py @@ -0,0 +1,67 @@ +from openai import OpenAI + +client = OpenAI() +SUPPORTED_IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.webp', '.gif') + +# response = client.responses.create( +# model="gpt-4.1-mini", +# input=[{ +# "role": "user", +# "content": [ +# {"type": "input_text", "text": "Что на этих картинках и в этом PDF файле?"}, +# { +# "type": "input_image", +# "image_url": "https://cdn.discordapp.com/attachments/1381924995994882150/1388385230108360855/2a264d9a62e7420586ee7873f53552d7.webp?ex=6860ca01&is=685f7881&hm=e8e21d0829cc9be3a21f63cb8f0c5065bf76c1de3952dbb524640694dd5037be&'>, ", +# }, +# ], +# }], +# ) +# +# print(response.output_text) + + +async def image_recognition(content: str, files: list[dict[str: str]]) -> str: + input_data = [{'type': 'input_text', 'text': content}] + input_data.extend( + [ + { + 'type': 'input_image', + 'image_url': i_url.get('url'), + } + for i_url in files if i_url.get('filename').lower().endswith(SUPPORTED_IMAGE_EXTENSIONS) + ] + ) + print(input_data) + response = client.responses.create( + model='gpt-4.1-mini', + input=[{ + 'role': 'user', + 'content': input_data, + }], + ) + return response.output_text + + +if __name__ == '__main__': + data = { + 'content': 'What\'s on these images?', + 'files': [ + { + 'filename': '2a264d9a62e7420586ee7873f53552d7.webp', + 'url': 'https://cdn.discordapp.com/attachments/1381924995994882150/1388385230108360855/2a264d9a62e7420586ee7873f53552d7.webp?ex=6860ca01&is=685f7881&hm=e8e21d0829cc9be3a21f63cb8f0c5065bf76c1de3952dbb524640694dd5037be&', + }, + { + 'filename': 'u_312c5f5306d0913c58dd0639fe493bdd_800.png', + 'url': 'https://cdn.discordapp.com/attachments/1381924995994882150/1388385230582321253/u_312c5f5306d0913c58dd0639fe493bdd_800.png?ex=6860ca01&is=685f7881&hm=d0ebcfaecc2e69551da0cdefdf88c3833f8cbd59d04b035b787ea3150382bdd2&', + }, + { + 'filename': 'shitty_pdf_file.pdf', + 'url': 'https://example.com/shitty_pdf_file.pdf' + } + ], + } + print(image_recognition(**data))