From 89300e74e50d7b94d816423f93a54b11f9d2e8fb Mon Sep 17 00:00:00 2001
From: T-BENZIN <tbenzin1337@gmail.com>
Date: Sat, 28 Jun 2025 11:25:55 +0500
Subject: [PATCH] Added proof of concept for discord bot with OpenAI image
 recognition.

---
 discord_bot.py                               | 50 +++++++++++++++
 openai_interaction/image_recognition_test.py | 67 ++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 openai_interaction/image_recognition_test.py

diff --git a/discord_bot.py b/discord_bot.py
index e69de29..2752572 100644
--- a/discord_bot.py
+++ b/discord_bot.py
@@ -0,0 +1,50 @@
+import os
+import discord
+from discord.ext import commands
+from openai_interaction.image_recognition_test import image_recognition
+
+# Загружаем токен из переменной среды
+TOKEN = os.getenv('SCREAMING_OPOSSUM')
+
+# Создаем клиент с префиксом (не обязателен для слэш-команд)
+intents = discord.Intents.default()
+intents.message_content = True
+bot = commands.Bot(command_prefix='!', intents=intents)
+
+
+# region Bot Stuff
+@bot.event
+async def on_ready():
+    print(f'Logged in as {bot.user} (ID: {bot.user.id})')
+    print('------')
+    try:
+        synced = await bot.tree.sync()
+        print(f'Synced {len(synced)} command(s).')
+    except Exception as e:
+        print(f'Failed to sync commands: {e}')
+
+
+@bot.tree.command(name='hello', description='Replies with Hello World!')
+async def hello_command(interaction: discord.Interaction):
+    await interaction.response.send_message('AAAAAAAAAAAAAAAAAAAAA!')
+
+
+@bot.event
+async def on_message(message: discord.Message):
+    if message.author == bot.user:
+        return
+
+    if message.attachments and bot.user in message.mentions:
+        message_attachments = [
+            {'filename': i_attachment.filename, 'url': i_attachment.url} for i_attachment in message.attachments
+        ]
+        await message.channel.send(await image_recognition(message.content, message_attachments))
+
+    await bot.process_commands(message)
+# endregion
+
+
+if __name__ == '__main__':
+    if TOKEN is None:
+        raise RuntimeError('Environment variable SCREAMING_OPOSSUM is not set')
+    bot.run(TOKEN)
diff --git a/openai_interaction/image_recognition_test.py b/openai_interaction/image_recognition_test.py
new file mode 100644
index 0000000..1d05556
--- /dev/null
+++ b/openai_interaction/image_recognition_test.py
@@ -0,0 +1,67 @@
+from openai import OpenAI
+
+client = OpenAI()
+SUPPORTED_IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.webp', '.gif')
+
+# response = client.responses.create(
+#     model="gpt-4.1-mini",
+#     input=[{
+#         "role": "user",
+#         "content": [
+#             {"type": "input_text", "text": "Что на этих картинках и в этом PDF файле?"},
+#             {
+#                 "type": "input_image",
+#                 "image_url": "https://cdn.discordapp.com/attachments/1381924995994882150/1388385230108360855/2a264d9a62e7420586ee7873f53552d7.webp?ex=6860ca01&is=685f7881&hm=e8e21d0829cc9be3a21f63cb8f0c5065bf76c1de3952dbb524640694dd5037be&'>, <Attachment id=1388385230582321253 filename='",
+#             },
+#             {
+#                 "type": "input_image",
+#                 "image_url": "https://cdn.discordapp.com/attachments/1381924995994882150/1388385230582321253/u_312c5f5306d0913c58dd0639fe493bdd_800.png?ex=6860ca01&is=685f7881&hm=d0ebcfaecc2e69551da0cdefdf88c3833f8cbd59d04b035b787ea3150382bdd2&'>",
+#             },
+#         ],
+#     }],
+# )
+#
+# print(response.output_text)
+
+
+async def image_recognition(content: str, files: list[dict[str: str]]) -> str:
+    input_data = [{'type': 'input_text', 'text': content}]
+    input_data.extend(
+        [
+            {
+                'type': 'input_image',
+                'image_url': i_url.get('url'),
+            }
+            for i_url in files if i_url.get('filename').lower().endswith(SUPPORTED_IMAGE_EXTENSIONS)
+        ]
+    )
+    print(input_data)
+    response = client.responses.create(
+        model='gpt-4.1-mini',
+        input=[{
+            'role': 'user',
+            'content': input_data,
+        }],
+    )
+    return response.output_text
+
+
+if __name__ == '__main__':
+    data = {
+        'content': 'What\'s on these images?',
+        'files': [
+            {
+                'filename': '2a264d9a62e7420586ee7873f53552d7.webp',
+                'url': 'https://cdn.discordapp.com/attachments/1381924995994882150/1388385230108360855/2a264d9a62e7420586ee7873f53552d7.webp?ex=6860ca01&is=685f7881&hm=e8e21d0829cc9be3a21f63cb8f0c5065bf76c1de3952dbb524640694dd5037be&',
+            },
+            {
+                'filename': 'u_312c5f5306d0913c58dd0639fe493bdd_800.png',
+                'url': 'https://cdn.discordapp.com/attachments/1381924995994882150/1388385230582321253/u_312c5f5306d0913c58dd0639fe493bdd_800.png?ex=6860ca01&is=685f7881&hm=d0ebcfaecc2e69551da0cdefdf88c3833f8cbd59d04b035b787ea3150382bdd2&',
+            },
+            {
+                'filename': 'shitty_pdf_file.pdf',
+                'url': 'https://example.com/shitty_pdf_file.pdf'
+            }
+        ],
+    }
+    print(image_recognition(**data))