Improved health check and monitoring
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from datetime import datetime, timedelta
|
||||
from logging import Logger
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from starlette.responses import JSONResponse
|
||||
@@ -18,12 +19,12 @@ async def get_health_v1() -> JSONResponse:
|
||||
|
||||
|
||||
@app.get("/v1/health", response_class=JSONResponse)
|
||||
async def get_health_v1() -> JSONResponse:
|
||||
if app.status is None or app.status._last_update < (
|
||||
async def get_health_v1(detailed: Optional[bool] = False) -> JSONResponse:
|
||||
if app.status is None or app.status.get_last_update() < (
|
||||
datetime.now(tz=ZoneInfo("UTC")) - timedelta(seconds=30)
|
||||
):
|
||||
app.update_status(await ApplicationHealth.from_data(app, db))
|
||||
|
||||
health: ApplicationHealth = app.status
|
||||
|
||||
return JSONResponse(health.to_json())
|
||||
return JSONResponse(health.to_json(detailed=detailed if detailed is not None else False))
|
||||
|
||||
@@ -1,15 +1,21 @@
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from logging import Logger
|
||||
from typing import Dict, Any, Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from libbot.cache.classes import Cache
|
||||
from libbot.pycord.classes import PycordBot
|
||||
from pymongo.asynchronous.database import AsyncDatabase
|
||||
from pymongo.errors import ConnectionFailure
|
||||
|
||||
from classes.enums import HealthStatus
|
||||
from classes.fastapi import FastAPI
|
||||
from classes.service_status import ServiceStatus
|
||||
from modules.database import db_client
|
||||
from modules.utils import get_logger
|
||||
|
||||
logger: Logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -23,14 +29,17 @@ class ApplicationHealth:
|
||||
|
||||
@classmethod
|
||||
async def from_data(cls, app: FastAPI, database: AsyncDatabase) -> "ApplicationHealth":
|
||||
database_health: ServiceStatus = await ApplicationHealth.get_database_health(database)
|
||||
cache_health: ServiceStatus = ApplicationHealth.get_cache_health(app.bot.cache)
|
||||
|
||||
data: Dict[str, Any] = {
|
||||
"bot": ApplicationHealth.get_bot_health(app.bot),
|
||||
"cache": ApplicationHealth.get_cache_health(app.bot.cache),
|
||||
"database": await ApplicationHealth.get_database_health(database),
|
||||
"bot": ApplicationHealth.get_bot_health(app.bot, cache_health, database_health),
|
||||
"cache": cache_health,
|
||||
"database": database_health,
|
||||
}
|
||||
|
||||
data["api"] = ApplicationHealth.get_api_health(
|
||||
data["bot"], data["cache"], data["database"]
|
||||
data["bot"], data["cache"], database_health
|
||||
)
|
||||
|
||||
data["_last_update"] = datetime.now(tz=ZoneInfo("UTC"))
|
||||
@@ -42,10 +51,29 @@ class ApplicationHealth:
|
||||
|
||||
# TODO Fix the message
|
||||
@staticmethod
|
||||
def get_bot_health(bot: PycordBot) -> ServiceStatus:
|
||||
return ServiceStatus(
|
||||
HealthStatus.OPERATIONAL if bot.is_ready() else HealthStatus.FAILED, None
|
||||
)
|
||||
def get_bot_health(
|
||||
bot: PycordBot, cache_status: ServiceStatus, database_status: ServiceStatus
|
||||
) -> ServiceStatus:
|
||||
if not bot.is_ready():
|
||||
return ServiceStatus(HealthStatus.FAILED, "discord connection has failed")
|
||||
|
||||
if database_status.status != HealthStatus.OPERATIONAL:
|
||||
match database_status.status:
|
||||
case HealthStatus.FAILED, HealthStatus.UNKNOWN:
|
||||
return ServiceStatus(HealthStatus.FAILED, "database connection has failed")
|
||||
case HealthStatus.DEGRADED:
|
||||
return ServiceStatus(
|
||||
HealthStatus.DEGRADED, "database connection is degraded"
|
||||
)
|
||||
|
||||
if cache_status.status not in [HealthStatus.UNKNOWN, HealthStatus.OPERATIONAL]:
|
||||
match cache_status.status:
|
||||
case HealthStatus.FAILED:
|
||||
return ServiceStatus(HealthStatus.DEGRADED, "cache connection has failed")
|
||||
case HealthStatus.DEGRADED:
|
||||
return ServiceStatus(HealthStatus.DEGRADED, "cache is degraded")
|
||||
|
||||
return ServiceStatus(HealthStatus.OPERATIONAL, None)
|
||||
|
||||
# TODO Fix the message
|
||||
# TODO Implement this method
|
||||
@@ -60,44 +88,68 @@ class ApplicationHealth:
|
||||
@staticmethod
|
||||
async def get_database_health(database: AsyncDatabase) -> ServiceStatus:
|
||||
try:
|
||||
return ServiceStatus(
|
||||
(
|
||||
HealthStatus.OPERATIONAL
|
||||
if (await database.client.server_info()) is not None
|
||||
else HealthStatus.FAILED
|
||||
),
|
||||
None,
|
||||
)
|
||||
except Exception as exc:
|
||||
await db_client.admin.command("ping")
|
||||
except ConnectionFailure as exc:
|
||||
return ServiceStatus(HealthStatus.FAILED, str(exc))
|
||||
|
||||
@staticmethod
|
||||
def get_api_health(
|
||||
bot_status: ServiceStatus, cache_status: ServiceStatus, database_status: ServiceStatus
|
||||
) -> ServiceStatus:
|
||||
if database_status.status != HealthStatus.OPERATIONAL:
|
||||
ServiceStatus(
|
||||
HealthStatus.FAILED,
|
||||
"database connection has failed",
|
||||
)
|
||||
elif (
|
||||
bot_status.status != HealthStatus.OPERATIONAL
|
||||
or cache_status.status != HealthStatus.OPERATIONAL
|
||||
):
|
||||
return ServiceStatus(
|
||||
HealthStatus.DEGRADED,
|
||||
None,
|
||||
)
|
||||
|
||||
return ServiceStatus(
|
||||
HealthStatus.OPERATIONAL,
|
||||
None,
|
||||
)
|
||||
|
||||
def to_json(self) -> Dict[str, Dict[str, str | None]]:
|
||||
return {
|
||||
"api": self.api.to_json(),
|
||||
"bot": self.bot.to_json(),
|
||||
"cache": self.cache.to_json(),
|
||||
"database": self.database.to_json(),
|
||||
@staticmethod
|
||||
def get_api_health(
|
||||
bot_status: ServiceStatus, cache_status: ServiceStatus, database_status: ServiceStatus
|
||||
) -> ServiceStatus:
|
||||
if database_status.status != HealthStatus.OPERATIONAL:
|
||||
match database_status.status:
|
||||
case HealthStatus.FAILED, HealthStatus.UNKNOWN:
|
||||
return ServiceStatus(
|
||||
HealthStatus.FAILED,
|
||||
"database connection has failed",
|
||||
)
|
||||
case HealthStatus.DEGRADED:
|
||||
return ServiceStatus(
|
||||
HealthStatus.DEGRADED,
|
||||
"database connection is degraded",
|
||||
)
|
||||
|
||||
if bot_status.status != HealthStatus.OPERATIONAL:
|
||||
match bot_status.status:
|
||||
case HealthStatus.FAILED, HealthStatus.UNKNOWN:
|
||||
return ServiceStatus(
|
||||
HealthStatus.DEGRADED,
|
||||
"bot integration has failed",
|
||||
)
|
||||
case HealthStatus.DEGRADED:
|
||||
return ServiceStatus(
|
||||
HealthStatus.DEGRADED,
|
||||
"bot integration is degraded",
|
||||
)
|
||||
|
||||
if cache_status.status not in [HealthStatus.OPERATIONAL, HealthStatus.UNKNOWN]:
|
||||
match cache_status.status:
|
||||
case HealthStatus.FAILED:
|
||||
return ServiceStatus(HealthStatus.DEGRADED, "cache connection has failed")
|
||||
case HealthStatus.DEGRADED:
|
||||
return ServiceStatus(HealthStatus.DEGRADED, "cache is degraded")
|
||||
|
||||
return ServiceStatus(
|
||||
HealthStatus.OPERATIONAL,
|
||||
None,
|
||||
)
|
||||
|
||||
def get_last_update(self) -> datetime:
|
||||
return self._last_update
|
||||
|
||||
def to_json(self, detailed: Optional[bool] = False) -> Dict[str, Dict[str, str | None]]:
|
||||
output: Dict[str, Any] = {
|
||||
"api": self.api.to_json(detailed),
|
||||
"bot": self.bot.to_json(detailed),
|
||||
}
|
||||
|
||||
if detailed:
|
||||
output["cache"] = self.cache.to_json(detailed)
|
||||
output["database"] = self.database.to_json(detailed)
|
||||
|
||||
return output
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal, Dict
|
||||
from typing import Literal, Dict, Optional
|
||||
|
||||
from classes.enums import HealthStatus
|
||||
|
||||
@@ -14,8 +14,10 @@ class ServiceStatus:
|
||||
]
|
||||
message: str | None
|
||||
|
||||
def to_json(self) -> Dict[str, str | None]:
|
||||
return {
|
||||
"status": self.status.value,
|
||||
"message": self.message,
|
||||
}
|
||||
def to_json(self, detailed: Optional[bool] = False) -> Dict[str, str | None]:
|
||||
output: Dict[str, str | None] = {"status": self.status.value}
|
||||
|
||||
if detailed:
|
||||
output["message"] = self.message
|
||||
|
||||
return output
|
||||
|
||||
+1
-1
@@ -23,7 +23,7 @@ else:
|
||||
)
|
||||
|
||||
# Async declarations
|
||||
db_client = AsyncMongoClient(con_string, timeoutms=5000)
|
||||
db_client = AsyncMongoClient(con_string, connectTimeoutMS=3000)
|
||||
db: AsyncDatabase = db_client.get_database(name=db_config["name"])
|
||||
|
||||
col_users: AsyncCollection = db.get_collection("users")
|
||||
|
||||
Reference in New Issue
Block a user