cpp_server/parser.hpp

219 lines
5.9 KiB
C++

#pragma once
#include "common.hpp"
#include "request.hpp"
#include "router.hpp"
#include "cookie.hpp"
#include <string_view>
#include <unordered_map>
#include <string>
#include <vector>
#include <cstring>
using std::string_view;
class Parser {
public:
static Request parse(string_view data) {
Request req;
const char* ptr = data.data();
const char* end = ptr + data.size();
// Parse method
const char* method_end = find_space(ptr, end);
if (!method_end) return req;
req.method = parse_method(ptr, method_end - ptr);
ptr = method_end + 1;
// Parse path and query
const char* path_end = find_space(ptr, end);
if (!path_end) return req;
const char* query_start = find_char(ptr, path_end, '?');
if (query_start) {
req.path = string_view(ptr, query_start - ptr);
req.query = string_view(query_start + 1, path_end - query_start - 1);
} else {
req.path = string_view(ptr, path_end - ptr);
}
ptr = path_end + 1;
// Parse version
const char* version_end = find_crlf(ptr, end);
if (!version_end) return req;
req.version = string_view(ptr, version_end - ptr);
ptr = version_end + 2;
// Parse headers
while (ptr < end - 1) {
if (*ptr == '\r' && *(ptr + 1) == '\n') {
ptr += 2;
break;
}
const char* header_end = find_crlf(ptr, end);
if (!header_end) break;
const char* colon = find_char(ptr, header_end, ':');
if (!colon) {
ptr = header_end + 2;
continue;
}
string_view name(ptr, colon - ptr);
const char* value_start = skip_whitespace(colon + 1, header_end);
string_view value(value_start, header_end - value_start);
req.headers[name] = value;
// Fast header checks using first character + length
if (likely(name.size() >= 6)) {
char first = name[0] | 0x20; // to lowercase
if (first == 'c') {
if (name.size() == 14 && equals_case_insensitive(name, "content-length")) {
req.content_length = parse_int(value);
} else if (name.size() == 6 && equals_case_insensitive(name, "cookie")) {
req.cookies = CookieParser::parse(value);
}
}
}
ptr = header_end + 2;
}
// Body
if (ptr < end) {
req.body = string_view(ptr, end - ptr);
}
req.valid = true;
return req;
}
private:
// Optimized character finding
static const char* find_char(const char* start, const char* end, char target) {
// Fallback scalar implementation
while (start < end && *start != target) ++start;
return start < end ? start : nullptr;
}
static const char* find_space(const char* start, const char* end) {
// Optimized scalar implementation
while (start < end && *start != ' ') ++start;
return start < end ? start : nullptr;
}
static const char* find_crlf(const char* start, const char* end) {
// Optimized scalar implementation
while (start < end - 1) {
if (*start == '\r' && *(start + 1) == '\n') return start;
++start;
}
return nullptr;
}
static const char* skip_whitespace(const char* start, const char* end) {
while (likely(start < end - 7)) {
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
if (*start != ' ' && *start != '\t') break;
++start;
}
while (start < end && (*start == ' ' || *start == '\t')) ++start;
return start;
}
// Optimized method parsing using lookup table
static HttpMethod parse_method(const char* data, size_t len) {
// Use first char + length for fast dispatch
if (unlikely(len == 0)) return HttpMethod::UNKNOWN;
char first = data[0];
switch (first) {
case 'G':
if (len == 3 && data[1] == 'E' && data[2] == 'T')
return HttpMethod::GET;
break;
case 'P':
if (len == 4) {
uint32_t word = *reinterpret_cast<const uint32_t*>(data);
if (word == 0x54534f50) return HttpMethod::POST; // "POST"
} else if (len == 3) {
uint32_t word = *reinterpret_cast<const uint32_t*>(data) & 0x00ffffff;
if (word == 0x545550) return HttpMethod::PUT; // "PUT"
} else if (len == 5) {
if (memcmp(data, "PATCH", 5) == 0) return HttpMethod::PATCH;
}
break;
case 'H':
if (len == 4 && memcmp(data, "HEAD", 4) == 0)
return HttpMethod::HEAD;
break;
case 'D':
if (len == 6 && memcmp(data, "DELETE", 6) == 0)
return HttpMethod::DELETE;
break;
case 'O':
if (len == 7 && memcmp(data, "OPTIONS", 7) == 0)
return HttpMethod::OPTIONS;
break;
}
return HttpMethod::UNKNOWN;
}
static bool equals_case_insensitive(string_view a, const char* b) {
size_t len = strlen(b);
if (a.size() != len) return false;
// Optimized case-insensitive comparison
for (size_t i = 0; i < len; ++i) {
char ca = a[i] | 0x20; // to lowercase
char cb = b[i] | 0x20;
if (ca != cb) return false;
}
return true;
}
static size_t parse_int(string_view str) {
if (unlikely(str.empty())) return 0;
size_t result = 0;
const char* ptr = str.data();
const char* end = ptr + str.size();
// Unrolled parsing for common small numbers
while (likely(ptr < end - 3)) {
char c1 = ptr[0], c2 = ptr[1], c3 = ptr[2], c4 = ptr[3];
if (c1 < '0' || c1 > '9') break;
if (c2 < '0' || c2 > '9') { result = result * 10 + (c1 - '0'); ptr += 1; break; }
if (c3 < '0' || c3 > '9') { result = result * 100 + (c1 - '0') * 10 + (c2 - '0'); ptr += 2; break; }
if (c4 < '0' || c4 > '9') { result = result * 1000 + (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0'); ptr += 3; break; }
result = result * 10000 + (c1 - '0') * 1000 + (c2 - '0') * 100 + (c3 - '0') * 10 + (c4 - '0');
ptr += 4;
}
while (ptr < end) {
char c = *ptr;
if (c < '0' || c > '9') break;
result = result * 10 + (c - '0');
++ptr;
}
return result;
}
};