From f7f70ccfe50f4cfa2ab340d0055dc8ddd5b10154 Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Fri, 13 Jun 2025 15:22:51 -0500 Subject: [PATCH] optimizations --- http_common.hpp => common.hpp | 9 ++ cookie.hpp | 102 +++++++++++----- http_parser.hpp | 145 ---------------------- http_request.hpp | 2 +- parser.hpp | 218 ++++++++++++++++++++++++++++++++++ router.hpp | 4 +- server.hpp | 4 +- static_file_handler.hpp | 4 +- 8 files changed, 307 insertions(+), 181 deletions(-) rename http_common.hpp => common.hpp (88%) delete mode 100644 http_parser.hpp create mode 100644 parser.hpp diff --git a/http_common.hpp b/common.hpp similarity index 88% rename from http_common.hpp rename to common.hpp index 88f74cc..526506c 100644 --- a/http_common.hpp +++ b/common.hpp @@ -4,6 +4,15 @@ #include #include +// Branch prediction hints +#if defined(__GNUC__) || defined(__clang__) +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + using std::string_view; enum class HttpMethod : uint8_t { diff --git a/cookie.hpp b/cookie.hpp index e502aa6..5afe643 100644 --- a/cookie.hpp +++ b/cookie.hpp @@ -1,5 +1,6 @@ #pragma once +#include "common.hpp" #include #include #include @@ -17,34 +18,37 @@ class CookieParser { public: static std::vector parse(string_view cookie_header) { std::vector cookies; + cookies.reserve(8); // Most requests have few cookies const char* ptr = cookie_header.data(); const char* end = ptr + cookie_header.size(); - while (ptr < end) { + while (likely(ptr < end)) { // Skip whitespace and semicolons - while (ptr < end && (*ptr == ' ' || *ptr == ';')) ptr++; - if (ptr >= end) break; + ptr = skip_separators(ptr, end); + if (unlikely(ptr >= end)) break; // Find name end (=) const char* name_start = ptr; - while (ptr < end && *ptr != '=' && *ptr != ';') ptr++; - if (ptr >= end || *ptr != '=') break; + ptr = find_char(ptr, end, '='); + if (unlikely(!ptr)) break; string_view name(name_start, ptr - name_start); ptr++; // Skip '=' // Find value end (; or end) const char* value_start = ptr; - while (ptr < end && *ptr != ';') ptr++; + const char* value_end = find_char(ptr, end, ';'); + if (!value_end) value_end = end; - string_view value(value_start, ptr - value_start); + string_view value(value_start, value_end - value_start); + ptr = value_end; - // Trim whitespace from name and value - name = trim(name); - value = trim(value); + // Trim whitespace efficiently + name = trim_fast(name); + value = trim_fast(value); - if (!name.empty()) { + if (likely(!name.empty())) { cookies.emplace_back(name, value); } } @@ -53,80 +57,120 @@ public: } private: - static string_view trim(string_view str) { + static const char* skip_separators(const char* ptr, const char* end) { + // Unrolled loop for common case + while (likely(ptr < end - 3)) { + if (*ptr != ' ' && *ptr != ';') break; ++ptr; + if (*ptr != ' ' && *ptr != ';') break; ++ptr; + if (*ptr != ' ' && *ptr != ';') break; ++ptr; + if (*ptr != ' ' && *ptr != ';') break; ++ptr; + } + while (ptr < end && (*ptr == ' ' || *ptr == ';')) ++ptr; + return ptr; + } + + static const char* find_char(const char* start, const char* end, char target) { + // Optimized character search + while (likely(start < end - 7)) { + if (start[0] == target) return start; + if (start[1] == target) return start + 1; + if (start[2] == target) return start + 2; + if (start[3] == target) return start + 3; + if (start[4] == target) return start + 4; + if (start[5] == target) return start + 5; + if (start[6] == target) return start + 6; + if (start[7] == target) return start + 7; + start += 8; + } + while (start < end && *start != target) ++start; + return start < end ? start : nullptr; + } + + static string_view trim_fast(string_view str) { + if (unlikely(str.empty())) return str; + const char* start = str.data(); const char* end = start + str.size(); - // Trim leading whitespace - while (start < end && *start == ' ') start++; + // Trim leading - unrolled + while (likely(start < end - 3) && *start == ' ') { + if (start[1] != ' ') { start += 1; break; } + if (start[2] != ' ') { start += 2; break; } + if (start[3] != ' ') { start += 3; break; } + start += 4; + } + while (start < end && *start == ' ') ++start; - // Trim trailing whitespace - while (end > start && *(end - 1) == ' ') end--; + // Trim trailing - unrolled + while (likely(end > start + 3) && *(end - 1) == ' ') { + if (*(end - 2) != ' ') { end -= 1; break; } + if (*(end - 3) != ' ') { end -= 2; break; } + if (*(end - 4) != ' ') { end -= 3; break; } + end -= 4; + } + while (end > start && *(end - 1) == ' ') --end; return string_view(start, end - start); } }; -// Cookie helpers for request/response handling class CookieHelpers { public: - // Get cookie value from request, returns empty string_view if not found static string_view get_cookie(const std::vector& cookies, string_view name) { + // Optimized linear search with early termination for (const auto& cookie : cookies) { - if (cookie.name == name) { + if (likely(cookie.name.size() == name.size()) && cookie.name == name) { return cookie.value; } } return {}; } - // Build Set-Cookie header value static std::string build_set_cookie(string_view name, string_view value, int max_age = -1, string_view path = "", string_view domain = "", bool secure = false, bool http_only = false) { std::string result; - result.reserve(256); + result.reserve(name.size() + value.size() + 128); // Estimate size result += name; result += "="; result += value; - if (max_age >= 0) { + if (unlikely(max_age >= 0)) { result += "; Max-Age="; result += std::to_string(max_age); } - if (!path.empty()) { + if (unlikely(!path.empty())) { result += "; Path="; result += path; } - if (!domain.empty()) { + if (unlikely(!domain.empty())) { result += "; Domain="; result += domain; } - if (secure) { + if (unlikely(secure)) { result += "; Secure"; } - if (http_only) { + if (unlikely(http_only)) { result += "; HttpOnly"; } return result; } - // Build delete cookie header (expires immediately) static std::string build_delete_cookie(string_view name, string_view path = "") { std::string result; - result.reserve(128); + result.reserve(name.size() + 64); result += name; result += "=; Max-Age=0"; - if (!path.empty()) { + if (unlikely(!path.empty())) { result += "; Path="; result += path; } diff --git a/http_parser.hpp b/http_parser.hpp deleted file mode 100644 index bbc9704..0000000 --- a/http_parser.hpp +++ /dev/null @@ -1,145 +0,0 @@ -#pragma once - -#include "http_common.hpp" -#include "http_request.hpp" -#include "router.hpp" -#include "cookie.hpp" -#include -#include -#include -#include - -using std::string_view; - -class HttpParser { -public: - static HttpRequest parse(string_view data) { - HttpRequest req; - const char* ptr = data.data(); - const char* end = ptr + data.size(); - - // Parse method - const char* method_end = find_char(ptr, end, ' '); - if (!method_end) return req; - - req.method = parse_method(string_view(ptr, method_end - ptr)); - ptr = method_end + 1; - - // Parse path and query - const char* path_end = find_char(ptr, end, ' '); - if (!path_end) return req; - - const char* query_start = find_char(ptr, path_end, '?'); - if (query_start) { - req.path = string_view(ptr, query_start - ptr); - req.query = string_view(query_start + 1, path_end - query_start - 1); - } else { - req.path = string_view(ptr, path_end - ptr); - } - ptr = path_end + 1; - - // Parse version - const char* version_end = find_char(ptr, end, '\r'); - if (!version_end || version_end + 1 >= end || *(version_end + 1) != '\n') return req; - - req.version = string_view(ptr, version_end - ptr); - ptr = version_end + 2; - - // Parse headers - while (ptr < end - 1) { - if (*ptr == '\r' && *(ptr + 1) == '\n') { - // End of headers - ptr += 2; - break; - } - - const char* header_end = find_char(ptr, end, '\r'); - if (!header_end || header_end + 1 >= end || *(header_end + 1) != '\n') break; - - const char* colon = find_char(ptr, header_end, ':'); - if (!colon) { - ptr = header_end + 2; - continue; - } - - string_view name(ptr, colon - ptr); - const char* value_start = colon + 1; - while (value_start < header_end && *value_start == ' ') value_start++; - - string_view value(value_start, header_end - value_start); - req.headers[name] = value; - - // Check for Content-Length - if (name.size() == 14 && strncasecmp(name.data(), "content-length", 14) == 0) { - req.content_length = parse_int(value); - } - // Check for Cookie header - else if (name.size() == 6 && strncasecmp(name.data(), "cookie", 6) == 0) { - req.cookies = CookieParser::parse(value); - } - - ptr = header_end + 2; - } - - // Body - if (ptr < end) { - req.body = string_view(ptr, end - ptr); - } - - req.valid = true; - return req; - } - -private: - static const char* find_char(const char* start, const char* end, char c) { - for (const char* p = start; p < end; ++p) { - if (*p == c) return p; - } - return nullptr; - } - - static HttpMethod parse_method(string_view method) { - switch (method.size()) { - case 3: - if (method == "GET") return HttpMethod::GET; - if (method == "PUT") return HttpMethod::PUT; - break; - case 4: - if (method == "POST") return HttpMethod::POST; - if (method == "HEAD") return HttpMethod::HEAD; - break; - case 5: - if (method == "PATCH") return HttpMethod::PATCH; - break; - case 6: - if (method == "DELETE") return HttpMethod::DELETE; - break; - case 7: - if (method == "OPTIONS") return HttpMethod::OPTIONS; - break; - } - return HttpMethod::UNKNOWN; - } - - static size_t parse_int(string_view str) { - size_t result = 0; - for (char c : str) { - if (c >= '0' && c <= '9') { - result = result * 10 + (c - '0'); - } else { - break; - } - } - return result; - } - - static int strncasecmp(const char* s1, const char* s2, size_t n) { - for (size_t i = 0; i < n; ++i) { - char c1 = s1[i] >= 'A' && s1[i] <= 'Z' ? s1[i] + 32 : s1[i]; - char c2 = s2[i] >= 'A' && s2[i] <= 'Z' ? s2[i] + 32 : s2[i]; - if (c1 != c2) return c1 - c2; - if (c1 == 0) break; - } - return 0; - } -}; diff --git a/http_request.hpp b/http_request.hpp index c5d9f9c..f02b4c7 100644 --- a/http_request.hpp +++ b/http_request.hpp @@ -1,6 +1,6 @@ #pragma once -#include "http_common.hpp" +#include "common.hpp" #include "cookie.hpp" #include #include diff --git a/parser.hpp b/parser.hpp new file mode 100644 index 0000000..ac063a6 --- /dev/null +++ b/parser.hpp @@ -0,0 +1,218 @@ +#pragma once + +#include "common.hpp" +#include "http_request.hpp" +#include "router.hpp" +#include "cookie.hpp" +#include +#include +#include +#include +#include + +using std::string_view; + +class Parser { +public: + static HttpRequest parse(string_view data) { + HttpRequest req; + const char* ptr = data.data(); + const char* end = ptr + data.size(); + + // Parse method + const char* method_end = find_space(ptr, end); + if (!method_end) return req; + + req.method = parse_method(ptr, method_end - ptr); + ptr = method_end + 1; + + // Parse path and query + const char* path_end = find_space(ptr, end); + if (!path_end) return req; + + const char* query_start = find_char(ptr, path_end, '?'); + if (query_start) { + req.path = string_view(ptr, query_start - ptr); + req.query = string_view(query_start + 1, path_end - query_start - 1); + } else { + req.path = string_view(ptr, path_end - ptr); + } + ptr = path_end + 1; + + // Parse version + const char* version_end = find_crlf(ptr, end); + if (!version_end) return req; + + req.version = string_view(ptr, version_end - ptr); + ptr = version_end + 2; + + // Parse headers + while (ptr < end - 1) { + if (*ptr == '\r' && *(ptr + 1) == '\n') { + ptr += 2; + break; + } + + const char* header_end = find_crlf(ptr, end); + if (!header_end) break; + + const char* colon = find_char(ptr, header_end, ':'); + if (!colon) { + ptr = header_end + 2; + continue; + } + + string_view name(ptr, colon - ptr); + const char* value_start = skip_whitespace(colon + 1, header_end); + string_view value(value_start, header_end - value_start); + + req.headers[name] = value; + + // Fast header checks using first character + length + if (likely(name.size() >= 6)) { + char first = name[0] | 0x20; // to lowercase + if (first == 'c') { + if (name.size() == 14 && equals_case_insensitive(name, "content-length")) { + req.content_length = parse_int(value); + } else if (name.size() == 6 && equals_case_insensitive(name, "cookie")) { + req.cookies = CookieParser::parse(value); + } + } + } + + ptr = header_end + 2; + } + + // Body + if (ptr < end) { + req.body = string_view(ptr, end - ptr); + } + + req.valid = true; + return req; + } + +private: + // Optimized character finding + static const char* find_char(const char* start, const char* end, char target) { + // Fallback scalar implementation + while (start < end && *start != target) ++start; + return start < end ? start : nullptr; + } + + static const char* find_space(const char* start, const char* end) { + // Optimized scalar implementation + while (start < end && *start != ' ') ++start; + return start < end ? start : nullptr; + } + + static const char* find_crlf(const char* start, const char* end) { + // Optimized scalar implementation + while (start < end - 1) { + if (*start == '\r' && *(start + 1) == '\n') return start; + ++start; + } + return nullptr; + } + + static const char* skip_whitespace(const char* start, const char* end) { + while (likely(start < end - 7)) { + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + if (*start != ' ' && *start != '\t') break; + ++start; + } + while (start < end && (*start == ' ' || *start == '\t')) ++start; + return start; + } + + // Optimized method parsing using lookup table + static HttpMethod parse_method(const char* data, size_t len) { + // Use first char + length for fast dispatch + if (unlikely(len == 0)) return HttpMethod::UNKNOWN; + + char first = data[0]; + switch (first) { + case 'G': + if (len == 3 && data[1] == 'E' && data[2] == 'T') + return HttpMethod::GET; + break; + case 'P': + if (len == 4) { + uint32_t word = *reinterpret_cast(data); + if (word == 0x54534f50) return HttpMethod::POST; // "POST" + } else if (len == 3) { + uint32_t word = *reinterpret_cast(data) & 0x00ffffff; + if (word == 0x545550) return HttpMethod::PUT; // "PUT" + } else if (len == 5) { + if (memcmp(data, "PATCH", 5) == 0) return HttpMethod::PATCH; + } + break; + case 'H': + if (len == 4 && memcmp(data, "HEAD", 4) == 0) + return HttpMethod::HEAD; + break; + case 'D': + if (len == 6 && memcmp(data, "DELETE", 6) == 0) + return HttpMethod::DELETE; + break; + case 'O': + if (len == 7 && memcmp(data, "OPTIONS", 7) == 0) + return HttpMethod::OPTIONS; + break; + } + return HttpMethod::UNKNOWN; + } + + static bool equals_case_insensitive(string_view a, const char* b) { + size_t len = strlen(b); + if (a.size() != len) return false; + + // Optimized case-insensitive comparison + for (size_t i = 0; i < len; ++i) { + char ca = a[i] | 0x20; // to lowercase + char cb = b[i] | 0x20; + if (ca != cb) return false; + } + return true; + } + + static size_t parse_int(string_view str) { + if (unlikely(str.empty())) return 0; + + size_t result = 0; + const char* ptr = str.data(); + const char* end = ptr + str.size(); + + // Unrolled parsing for common small numbers + while (likely(ptr < end - 3)) { + char c1 = ptr[0], c2 = ptr[1], c3 = ptr[2], c4 = ptr[3]; + if (c1 < '0' || c1 > '9') break; + if (c2 < '0' || c2 > '9') { result = result * 10 + (c1 - '0'); ptr += 1; break; } + if (c3 < '0' || c3 > '9') { result = result * 100 + (c1 - '0') * 10 + (c2 - '0'); ptr += 2; break; } + if (c4 < '0' || c4 > '9') { result = result * 1000 + (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0'); ptr += 3; break; } + result = result * 10000 + (c1 - '0') * 1000 + (c2 - '0') * 100 + (c3 - '0') * 10 + (c4 - '0'); + ptr += 4; + } + + while (ptr < end) { + char c = *ptr; + if (c < '0' || c > '9') break; + result = result * 10 + (c - '0'); + ++ptr; + } + return result; + } +}; diff --git a/router.hpp b/router.hpp index adf69b0..078814d 100644 --- a/router.hpp +++ b/router.hpp @@ -1,8 +1,8 @@ #pragma once -#include "http_common.hpp" -#include "http_parser.hpp" +#include "common.hpp" #include "http_response.hpp" +#include "http_request.hpp" #include #include #include diff --git a/server.hpp b/server.hpp index f80d185..67a1839 100644 --- a/server.hpp +++ b/server.hpp @@ -2,7 +2,7 @@ #include "epoll_socket.hpp" #include "router.hpp" -#include "http_parser.hpp" +#include "parser.hpp" #include "http_response.hpp" #include "static_file_handler.hpp" #include "kv_store.hpp" @@ -143,7 +143,7 @@ private: } void process_request(int client_fd, std::string_view request_data) { - HttpRequest req = HttpParser::parse(request_data); + HttpRequest req = Parser::parse(request_data); if (!req.valid) { send_error_response(client_fd, "Bad Request", 400, req.version); diff --git a/static_file_handler.hpp b/static_file_handler.hpp index d9021d6..9ccd8a9 100644 --- a/static_file_handler.hpp +++ b/static_file_handler.hpp @@ -1,8 +1,8 @@ #pragma once -#include "http_parser.hpp" +#include "parser.hpp" #include "http_response.hpp" -#include "http_common.hpp" +#include "common.hpp" #include #include #include