Spaces:
Running
Running
File size: 7,540 Bytes
b79fff8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
from io import BytesIO
import os
import dotenv
import base64
import pyautogui
import matplotlib.pyplot as plt
import math
from time import time, sleep
from typing import Tuple, List
from PIL import Image
from langchain_core.messages import HumanMessage, BaseMessage
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
dotenv.load_dotenv()
PROMPT_INSTRUCTIONS = """
Try to predict where the image was taken.
First describe the relevant details in the image to do it.
List some regions and places where it could be.
Chose the most likely Country and City or Specific Location.
At the end, in the last line a part from the previous reasoning, write the Latitude and Longitude from that guessed location
using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
Lat: XX.XXXX, Lon: XX.XXXX
"""
class GeoBot:
prompt_instructions: str = PROMPT_INSTRUCTIONS
def __init__(self, screen_regions, player=1, model=ChatOpenAI, model_name="gpt-4o"):
self.player = player
self.screen_regions = screen_regions
self.screen_x, self.screen_y = screen_regions["screen_top_left"]
self.screen_w = screen_regions["screen_bot_right"][0] - self.screen_x
self.screen_h = screen_regions["screen_bot_right"][1] - self.screen_y
self.screen_xywh = (self.screen_x, self.screen_y, self.screen_w, self.screen_h)
self.map_x, self.map_y = screen_regions[f"map_top_left_{player}"]
self.map_w = screen_regions[f"map_bot_right_{player}"][0] - self.map_x
self.map_h = screen_regions[f"map_bot_right_{player}"][1] - self.map_y
self.minimap_xywh = (self.map_x, self.map_y, self.map_w, self.map_h)
self.next_round_button = (
screen_regions["next_round_button"] if player == 1 else None
)
self.confirm_button = screen_regions[f"confirm_button_{player}"]
self.kodiak_x, self.kodiak_y = screen_regions[f"kodiak_{player}"]
self.hobart_x, self.hobart_y = screen_regions[f"hobart_{player}"]
# Refernece points to calibrate the minimap everytime
self.kodiak_lat, self.kodiak_lon = (57.7916, -152.4083)
self.hobart_lat, self.hobart_lon = (-42.8833, 147.3355)
self.model = model(model=model_name)
@staticmethod
def pil_to_base64(image: Image) -> str:
buffered = BytesIO()
image.save(buffered, format="PNG")
img_base64_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
return img_base64_str
@classmethod
def create_message(cls, images_data: List[str]) -> HumanMessage:
message = HumanMessage(
content=[
{
"type": "text",
"text": cls.prompt_instructions,
},
]
+ [
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img_data}"},
}
for img_data in images_data
],
)
return message
def extract_location_from_response(
self, response: BaseMessage
) -> Tuple[float, float]:
try:
response = response.content.split("\n")
while (
response
and len(response[-1]) == 0
and "lat" not in response[-1].lower()
):
response.pop()
if response:
prediction = response[-1]
else:
return None
print(f"\n-------\n{self.model} Prediction:\n", prediction)
# Lat: 57.7916, Lon: -152.4083
lat = float(prediction.split(",")[0].split(":")[1])
lon = float(prediction.split(",")[1].split(":")[1])
x, y = self.lat_lon_to_mercator_map_pixels(lat, lon)
print(f"Normalized pixel coordinates: ({x}, {y})")
if x < self.map_x:
x = self.map_x
print("x out of bounds")
elif x > self.map_x + self.map_w:
x = self.map_x + self.map_w
print("x out of bounds")
if y < self.map_y:
y = self.map_y
print("y out of bounds")
elif y > self.map_y + self.map_h:
y = self.map_y + self.map_h
print("y out of bounds")
return x, y
except Exception as e:
print("Error:", e)
return None
@staticmethod
def lat_to_mercator_y(lat: float) -> float:
return math.log(math.tan(math.pi / 4 + math.radians(lat) / 2))
def lat_lon_to_mercator_map_pixels(self, lat: float, lon: float) -> Tuple[int, int]:
"""
Convert latitude and longitude to pixel coordinates on the mercator projection minimap,
taking two known points 1 and 2 as a reference.
Args:
lat (float): Latitude (Decimal Degrees) of the point to convert.
lon (float): Longitude (Decimal Degrees) of the point to convert.
Returns:
tuple: x, y pixel coordinates of the point.
"""
# Calculate the x pixel coordinate
lon_diff_ref = self.kodiak_lon - self.hobart_lon
lon_diff = self.kodiak_lon - lon
x = (
abs(self.kodiak_x - self.hobart_x) * (lon_diff / lon_diff_ref)
+ self.kodiak_x
)
# Convert latitude and longitude to mercator projection y coordinates
mercator_y1 = self.lat_to_mercator_y(self.kodiak_lat)
mercator_y2 = self.lat_to_mercator_y(self.hobart_lat)
mercator_y = self.lat_to_mercator_y(lat)
# Calculate the y pixel coordinate
lat_diff_ref = mercator_y1 - mercator_y2
lat_diff = mercator_y1 - mercator_y
y = (
abs(self.kodiak_y - self.hobart_y) * (lat_diff / lat_diff_ref)
+ self.kodiak_y
)
return round(x), round(y)
def select_map_location(self, x: int, y: int, plot: bool = False) -> None:
# Hovering over the minimap to expand it
pyautogui.moveTo(
self.map_x + self.map_w - 15, self.map_y + self.map_h - 15, duration=0.5
)
# bot.screen_w-50, bot.screen_h-80
# pyautogui.moveTo(self.screen_w-50, self.screen_h-80, duration=1.5)
# print(self.screen_w-50, self.screen_h-80)
print("finish moving")
sleep(0.5)
# Clicking on the predicted location
pyautogui.click(x, y, duration=0.5)
print("finish clicking")
sleep(0.5)
if plot:
self.plot_minimap(x, y)
# Confirming the guessed location
pyautogui.click(self.confirm_button, duration=0.2)
sleep(2)
def plot_minimap(self, x: int = None, y: int = None) -> None:
minimap = pyautogui.screenshot(region=self.minimap_xywh)
plot_kodiak_x = self.kodiak_x - self.map_x
plot_kodiak_y = self.kodiak_y - self.map_y
plot_hobart_x = self.hobart_x - self.map_x
plot_hobart_y = self.hobart_y - self.map_y
plt.imshow(minimap)
plt.plot(plot_hobart_x, plot_hobart_y, "ro")
plt.plot(plot_kodiak_x, plot_kodiak_y, "ro")
if x and y:
plt.plot(x - self.map_x, y - self.map_y, "bo")
os.makedirs("plots", exist_ok=True)
plt.savefig("plots/minimap.png")
# plt.show()
|