219 lines
5.9 KiB
C++
219 lines
5.9 KiB
C++
#pragma once
|
|
|
|
#include "common.hpp"
|
|
#include "request.hpp"
|
|
#include "router.hpp"
|
|
#include "cookie.hpp"
|
|
#include <string_view>
|
|
#include <unordered_map>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstring>
|
|
|
|
using std::string_view;
|
|
|
|
class Parser {
|
|
public:
|
|
static Request parse(string_view data) {
|
|
Request req;
|
|
const char* ptr = data.data();
|
|
const char* end = ptr + data.size();
|
|
|
|
// Parse method
|
|
const char* method_end = find_space(ptr, end);
|
|
if (!method_end) return req;
|
|
|
|
req.method = parse_method(ptr, method_end - ptr);
|
|
ptr = method_end + 1;
|
|
|
|
// Parse path and query
|
|
const char* path_end = find_space(ptr, end);
|
|
if (!path_end) return req;
|
|
|
|
const char* query_start = find_char(ptr, path_end, '?');
|
|
if (query_start) {
|
|
req.path = string_view(ptr, query_start - ptr);
|
|
req.query = string_view(query_start + 1, path_end - query_start - 1);
|
|
} else {
|
|
req.path = string_view(ptr, path_end - ptr);
|
|
}
|
|
ptr = path_end + 1;
|
|
|
|
// Parse version
|
|
const char* version_end = find_crlf(ptr, end);
|
|
if (!version_end) return req;
|
|
|
|
req.version = string_view(ptr, version_end - ptr);
|
|
ptr = version_end + 2;
|
|
|
|
// Parse headers
|
|
while (ptr < end - 1) {
|
|
if (*ptr == '\r' && *(ptr + 1) == '\n') {
|
|
ptr += 2;
|
|
break;
|
|
}
|
|
|
|
const char* header_end = find_crlf(ptr, end);
|
|
if (!header_end) break;
|
|
|
|
const char* colon = find_char(ptr, header_end, ':');
|
|
if (!colon) {
|
|
ptr = header_end + 2;
|
|
continue;
|
|
}
|
|
|
|
string_view name(ptr, colon - ptr);
|
|
const char* value_start = skip_whitespace(colon + 1, header_end);
|
|
string_view value(value_start, header_end - value_start);
|
|
|
|
req.headers[name] = value;
|
|
|
|
// Fast header checks using first character + length
|
|
if (likely(name.size() >= 6)) {
|
|
char first = name[0] | 0x20; // to lowercase
|
|
if (first == 'c') {
|
|
if (name.size() == 14 && equals_case_insensitive(name, "content-length")) {
|
|
req.content_length = parse_int(value);
|
|
} else if (name.size() == 6 && equals_case_insensitive(name, "cookie")) {
|
|
req.cookies = CookieParser::parse(value);
|
|
}
|
|
}
|
|
}
|
|
|
|
ptr = header_end + 2;
|
|
}
|
|
|
|
// Body
|
|
if (ptr < end) {
|
|
req.body = string_view(ptr, end - ptr);
|
|
}
|
|
|
|
req.valid = true;
|
|
return req;
|
|
}
|
|
|
|
private:
|
|
// Optimized character finding
|
|
static const char* find_char(const char* start, const char* end, char target) {
|
|
// Fallback scalar implementation
|
|
while (start < end && *start != target) ++start;
|
|
return start < end ? start : nullptr;
|
|
}
|
|
|
|
static const char* find_space(const char* start, const char* end) {
|
|
// Optimized scalar implementation
|
|
while (start < end && *start != ' ') ++start;
|
|
return start < end ? start : nullptr;
|
|
}
|
|
|
|
static const char* find_crlf(const char* start, const char* end) {
|
|
// Optimized scalar implementation
|
|
while (start < end - 1) {
|
|
if (*start == '\r' && *(start + 1) == '\n') return start;
|
|
++start;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
static const char* skip_whitespace(const char* start, const char* end) {
|
|
while (likely(start < end - 7)) {
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
if (*start != ' ' && *start != '\t') break;
|
|
++start;
|
|
}
|
|
while (start < end && (*start == ' ' || *start == '\t')) ++start;
|
|
return start;
|
|
}
|
|
|
|
// Optimized method parsing using lookup table
|
|
static HttpMethod parse_method(const char* data, size_t len) {
|
|
// Use first char + length for fast dispatch
|
|
if (unlikely(len == 0)) return HttpMethod::UNKNOWN;
|
|
|
|
char first = data[0];
|
|
switch (first) {
|
|
case 'G':
|
|
if (len == 3 && data[1] == 'E' && data[2] == 'T')
|
|
return HttpMethod::GET;
|
|
break;
|
|
case 'P':
|
|
if (len == 4) {
|
|
uint32_t word = *reinterpret_cast<const uint32_t*>(data);
|
|
if (word == 0x54534f50) return HttpMethod::POST; // "POST"
|
|
} else if (len == 3) {
|
|
uint32_t word = *reinterpret_cast<const uint32_t*>(data) & 0x00ffffff;
|
|
if (word == 0x545550) return HttpMethod::PUT; // "PUT"
|
|
} else if (len == 5) {
|
|
if (memcmp(data, "PATCH", 5) == 0) return HttpMethod::PATCH;
|
|
}
|
|
break;
|
|
case 'H':
|
|
if (len == 4 && memcmp(data, "HEAD", 4) == 0)
|
|
return HttpMethod::HEAD;
|
|
break;
|
|
case 'D':
|
|
if (len == 6 && memcmp(data, "DELETE", 6) == 0)
|
|
return HttpMethod::DELETE;
|
|
break;
|
|
case 'O':
|
|
if (len == 7 && memcmp(data, "OPTIONS", 7) == 0)
|
|
return HttpMethod::OPTIONS;
|
|
break;
|
|
}
|
|
return HttpMethod::UNKNOWN;
|
|
}
|
|
|
|
static bool equals_case_insensitive(string_view a, const char* b) {
|
|
size_t len = strlen(b);
|
|
if (a.size() != len) return false;
|
|
|
|
// Optimized case-insensitive comparison
|
|
for (size_t i = 0; i < len; ++i) {
|
|
char ca = a[i] | 0x20; // to lowercase
|
|
char cb = b[i] | 0x20;
|
|
if (ca != cb) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static size_t parse_int(string_view str) {
|
|
if (unlikely(str.empty())) return 0;
|
|
|
|
size_t result = 0;
|
|
const char* ptr = str.data();
|
|
const char* end = ptr + str.size();
|
|
|
|
// Unrolled parsing for common small numbers
|
|
while (likely(ptr < end - 3)) {
|
|
char c1 = ptr[0], c2 = ptr[1], c3 = ptr[2], c4 = ptr[3];
|
|
if (c1 < '0' || c1 > '9') break;
|
|
if (c2 < '0' || c2 > '9') { result = result * 10 + (c1 - '0'); ptr += 1; break; }
|
|
if (c3 < '0' || c3 > '9') { result = result * 100 + (c1 - '0') * 10 + (c2 - '0'); ptr += 2; break; }
|
|
if (c4 < '0' || c4 > '9') { result = result * 1000 + (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0'); ptr += 3; break; }
|
|
result = result * 10000 + (c1 - '0') * 1000 + (c2 - '0') * 100 + (c3 - '0') * 10 + (c4 - '0');
|
|
ptr += 4;
|
|
}
|
|
|
|
while (ptr < end) {
|
|
char c = *ptr;
|
|
if (c < '0' || c > '9') break;
|
|
result = result * 10 + (c - '0');
|
|
++ptr;
|
|
}
|
|
return result;
|
|
}
|
|
};
|