From 405e43f6727d2550a7c4b0018083179540d5b574 Mon Sep 17 00:00:00 2001 From: Emagi Date: Mon, 2 Dec 2024 20:01:21 -0500 Subject: [PATCH] Fixed polling issues when a server is erroring out and also peering the right id --- source/WorldServer/Web/HTTPSClientPool.cpp | 301 ++++++++++----------- source/WorldServer/net.cpp | 9 +- 2 files changed, 156 insertions(+), 154 deletions(-) diff --git a/source/WorldServer/Web/HTTPSClientPool.cpp b/source/WorldServer/Web/HTTPSClientPool.cpp index 27ac509..53fb94d 100644 --- a/source/WorldServer/Web/HTTPSClientPool.cpp +++ b/source/WorldServer/Web/HTTPSClientPool.cpp @@ -511,157 +511,150 @@ void HTTPSClientPool::pollPeerHealth(const std::string& server, const std::strin LogWrite(PEERING__ERROR, 0, "Peering", "%s: Error finding peer %s:%s.", __FUNCTION__, server.c_str(), port.c_str()); } else { - auto client = getOrCreateClient(id, port, server + ":" + port); + auto client = getOrCreateClient(id, server, port); int16 interval = pollingInterval; - while (running.load()) { - interval++; - if (interval > pollingInterval) { - HealthStatus curStatus = peer_manager.getPeerStatus(server, web_worldport); - id = peer_manager.isPeer(server, web_worldport); - try { - auto response = client->sendRequest(server, port, "/status"); // Assumes HTTPSClient has a get method - //std::cout << "Health check response from " << server << ":" << port << " - " << response << std::endl; + HealthStatus curStatus = peer_manager.getPeerStatus(server, web_worldport); + id = peer_manager.isPeer(server, web_worldport); + try { + auto response = client->sendRequest(server, port, "/status"); // Assumes HTTPSClient has a get method + //std::cout << "Health check response from " << server << ":" << port << " - " << response << std::endl; - boost::property_tree::ptree json_tree; - std::istringstream json_stream(response); - boost::property_tree::read_json(json_stream, json_tree); - std::string online_status; - int16 peer_priority = 65535; - bool peer_primary = false; - if (auto status = json_tree.get_optional("world_status")) { - online_status = status.get(); - } - if (auto priority = json_tree.get_optional("peer_priority")) { - peer_priority = priority.get(); - } - if (auto isprimary = json_tree.get_optional("peer_primary")) { - peer_primary = isprimary.get(); - } - peer_manager.updatePriority(id, peer_priority); - - if (peer_primary && net.is_primary) { - peer_manager.handlePrimaryConflict(id); - std::shared_ptr hasPrimary = peer_manager.getHealthyPrimaryPeerPtr(); - if (hasPrimary) { // demote self - LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY status, demoting self.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - net.SetPrimary(false); - } - } - else if(!peer_manager.hasPrimary() && peer_primary) { - peer_manager.setPrimary(id); - } - - switch (curStatus) { - case HealthStatus::STARTUP: { - pollPeerHealthData(client, id, server, port); - if (online_status == "offline") { - std::shared_ptr peer = peer_manager.getPeerById(id); - if (peer) { - peer->wasOffline = true; - if (peer->sentInitialPeerData) { - peer->sentInitialPeerData = false; - } - } - } - if (online_status == "online") { - peer_manager.updateHealth(id, HealthStatus::OK); - std::shared_ptr peer = peer_manager.getPeerById(id); - if (net.is_primary) { - if (peer) { - if (peer->wasOffline && !peer->sentInitialPeerData) { - world.GetGroupManager()->SendPeerGroupData(id); - } - peer->sentInitialPeerData = true; - } - } - else if (peer) { // set as if we already sent the data since if we take over we don't want the peer trying to resubmit all groups - peer->wasOffline = false; - peer->sentInitialPeerData = true; - } - LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS OK/UP state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - } - if (!net.is_primary && !peer_manager.hasPrimary() && peer_priority < net.GetPeerPriority()) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - peer_manager.setPrimary(id); - net.SetPrimary(false); - } - else if (!peer_manager.hasPrimary() && world.world_loaded && !net.is_primary && net.GetPeerPriority() <= peer_priority) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: I AM PRIMARY!", __FUNCTION__); - net.SetPrimary(); - } - break; - } - case HealthStatus::OK: { - pollPeerHealthData(client, id, server, port); - if (!net.is_primary && !peer_manager.hasPrimary() && peer_priority < net.GetPeerPriority()) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - peer_manager.setPrimary(id); - net.SetPrimary(false); - } - else if (!peer_manager.hasPrimary() && !net.is_primary && net.GetPeerPriority() <= peer_priority) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: I AM PRIMARY!!", __FUNCTION__); - net.SetPrimary(); - } - break; - } - case HealthStatus::WARN: - case HealthStatus::ERROR: - case HealthStatus::SHUTDOWN: { - peer_manager.updateHealth(id, HealthStatus::STARTUP); - LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS ENTERED STARTUP state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - - if (net.is_primary) { - std::shared_ptr peer = peer_manager.getPeerById(id); - if (peer && peer->sentInitialPeerData == true) { - peer->sentInitialPeerData = false; - } - } - break; - } - } - } - catch (const std::exception& e) { - HealthStatus curStatus = peer_manager.getPeerStatus(server, web_worldport); - switch (curStatus) { - case HealthStatus::WARN: { - peer_manager.updateHealth(id, HealthStatus::ERROR); - break; - } - case HealthStatus::ERROR: { - - LogWrite(PEERING__ERROR, 0, "Peering", "%s: Peer %s at %s:%s - HAS ERROR->SHUTDOWN state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); - peer_manager.updateHealth(id, HealthStatus::SHUTDOWN); - if (peer_manager.getHealthyPeer() == std::nullopt) { - if (!net.is_primary && world.world_loaded) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: TAKING OVER AS PRIMARY, NO PEERS AVAILABLE TO CHECK", __FUNCTION__); - net.SetPrimary(); - } - } - else if (!peer_manager.hasPrimary()) { - std::string newPrimary = peer_manager.getPriorityPeer(); - if (newPrimary.size() > 0) { - LogWrite(PEERING__INFO, 0, "Peering", "%s: NEW PRIMARY %s", __FUNCTION__, newPrimary); - peer_manager.setPrimary(newPrimary); - net.SetPrimary(false); - } - else { - LogWrite(PEERING__ERROR, 0, "Peering", "%s: NEW PRIMARY CANNOT BE ESTABLISHED!", __FUNCTION__); - } - } - break; - } - default: { - peer_manager.updateHealth(id, HealthStatus::WARN); - break; - } - } - LogWrite(PEERING__ERROR, 0, "Peering", "%s: ERROR POLLING %s:%s reason: %s", __FUNCTION__, server.c_str(), port.c_str(), e.what() ? e.what() : "??"); - std::this_thread::sleep_for(std::chrono::milliseconds(400)); - } - interval = 0; + boost::property_tree::ptree json_tree; + std::istringstream json_stream(response); + boost::property_tree::read_json(json_stream, json_tree); + std::string online_status; + int16 peer_priority = 65535; + bool peer_primary = false; + if (auto status = json_tree.get_optional("world_status")) { + online_status = status.get(); + } + if (auto priority = json_tree.get_optional("peer_priority")) { + peer_priority = priority.get(); + } + if (auto isprimary = json_tree.get_optional("peer_primary")) { + peer_primary = isprimary.get(); + } + peer_manager.updatePriority(id, peer_priority); + + if (peer_primary && net.is_primary) { + peer_manager.handlePrimaryConflict(id); + std::shared_ptr hasPrimary = peer_manager.getHealthyPrimaryPeerPtr(); + if (hasPrimary) { // demote self + LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY status, demoting self.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + net.SetPrimary(false); + } + } + else if (!peer_manager.hasPrimary() && peer_primary) { + peer_manager.setPrimary(id); + } + + switch (curStatus) { + case HealthStatus::STARTUP: { + pollPeerHealthData(client, id, server, port); + if (online_status == "offline") { + std::shared_ptr peer = peer_manager.getPeerById(id); + if (peer) { + peer->wasOffline = true; + if (peer->sentInitialPeerData) { + peer->sentInitialPeerData = false; + } + } + } + if (online_status == "online") { + peer_manager.updateHealth(id, HealthStatus::OK); + std::shared_ptr peer = peer_manager.getPeerById(id); + if (net.is_primary) { + if (peer) { + if (peer->wasOffline && !peer->sentInitialPeerData) { + world.GetGroupManager()->SendPeerGroupData(id); + } + peer->sentInitialPeerData = true; + } + } + else if (peer) { // set as if we already sent the data since if we take over we don't want the peer trying to resubmit all groups + peer->wasOffline = false; + peer->sentInitialPeerData = true; + } + LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS OK/UP state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + } + if (!net.is_primary && !peer_manager.hasPrimary() && peer_priority < net.GetPeerPriority()) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + peer_manager.setPrimary(id); + net.SetPrimary(false); + } + else if (!peer_manager.hasPrimary() && world.world_loaded && !net.is_primary && net.GetPeerPriority() <= peer_priority) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: I AM PRIMARY!", __FUNCTION__); + net.SetPrimary(); + } + break; + } + case HealthStatus::OK: { + pollPeerHealthData(client, id, server, port); + if (!net.is_primary && !peer_manager.hasPrimary() && peer_priority < net.GetPeerPriority()) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS PRIMARY.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + peer_manager.setPrimary(id); + net.SetPrimary(false); + } + else if (!peer_manager.hasPrimary() && !net.is_primary && net.GetPeerPriority() <= peer_priority) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: I AM PRIMARY!!", __FUNCTION__); + net.SetPrimary(); + } + break; + } + case HealthStatus::WARN: + case HealthStatus::ERROR: + case HealthStatus::SHUTDOWN: { + peer_manager.updateHealth(id, HealthStatus::STARTUP); + LogWrite(PEERING__INFO, 0, "Peering", "%s: Peer %s at %s:%s - HAS ENTERED STARTUP state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + + if (net.is_primary) { + std::shared_ptr peer = peer_manager.getPeerById(id); + if (peer && peer->sentInitialPeerData == true) { + peer->sentInitialPeerData = false; + } + } + break; + } } - std::this_thread::sleep_for(std::chrono::milliseconds(50)); } + catch (const std::exception& e) { + HealthStatus curStatus = peer_manager.getPeerStatus(server, web_worldport); + switch (curStatus) { + case HealthStatus::WARN: { + peer_manager.updateHealth(id, HealthStatus::ERROR); + break; + } + case HealthStatus::ERROR: { + + LogWrite(PEERING__ERROR, 0, "Peering", "%s: Peer %s at %s:%s - HAS ERROR->SHUTDOWN state.", __FUNCTION__, id.c_str(), server.c_str(), port.c_str()); + peer_manager.updateHealth(id, HealthStatus::SHUTDOWN); + if (peer_manager.getHealthyPeer() == std::nullopt) { + if (!net.is_primary && world.world_loaded) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: TAKING OVER AS PRIMARY, NO PEERS AVAILABLE TO CHECK", __FUNCTION__); + net.SetPrimary(); + } + } + else if (!peer_manager.hasPrimary()) { + std::string newPrimary = peer_manager.getPriorityPeer(); + if (newPrimary.size() > 0) { + LogWrite(PEERING__INFO, 0, "Peering", "%s: NEW PRIMARY %s", __FUNCTION__, newPrimary); + peer_manager.setPrimary(newPrimary); + net.SetPrimary(false); + } + else { + LogWrite(PEERING__ERROR, 0, "Peering", "%s: NEW PRIMARY CANNOT BE ESTABLISHED!", __FUNCTION__); + } + } + break; + } + default: { + peer_manager.updateHealth(id, HealthStatus::WARN); + break; + } + } + LogWrite(PEERING__ERROR, 0, "Peering", "%s: ERROR POLLING %s:%s reason: %s", __FUNCTION__, server.c_str(), port.c_str(), e.what() ? e.what() : "??"); + } + interval = 0; } } @@ -724,15 +717,19 @@ void HTTPSClientPool::startPolling() { std::async(std::launch::async, [this, server, port]() { try { pollPeerHealth(server, port); - } catch (const std::exception& e) { + } + catch (const std::exception& e) { LogWrite(PEERING__DEBUG, 1, "Peering", "Exception in pollPeerHealth for %s:%s: %s", server.c_str(), port.c_str(), e.what()); - } catch (...) { + } + catch (...) { LogWrite(PEERING__DEBUG, 1, "Peering", "Unknown exception in pollPeerHealth for %s:%s.", server.c_str(), port.c_str()); } - }); - } catch (const std::exception& e) { + }); + } + catch (const std::exception& e) { LogWrite(PEERING__DEBUG, 1, "Peering", "Failed to start async task for %s:%s: %s", server.c_str(), port.c_str(), e.what()); - } catch (...) { + } + catch (...) { LogWrite(PEERING__DEBUG, 1, "Peering", "Unknown exception when starting async task for %s:%s.", server.c_str(), port.c_str()); } } diff --git a/source/WorldServer/net.cpp b/source/WorldServer/net.cpp index 6684319..3b94457 100644 --- a/source/WorldServer/net.cpp +++ b/source/WorldServer/net.cpp @@ -634,8 +634,13 @@ ThreadReturnType AchievmentLoad (void* tmp) ThreadReturnType StartPeerPoll (void* tmp) { - LogWrite(WORLD__WARNING, 0, "Thread", "Start Polling..."); - peer_https_pool.startPolling(); + + while( RunLoops ) + { + LogWrite(WORLD__WARNING, 0, "Thread", "Start Polling..."); + peer_https_pool.startPolling(); + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } THREAD_RETURN(NULL); }