From 551112a5c038d8cc167f247721f1ff51ec62447c Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Thu, 31 Jul 2025 22:24:41 -0500 Subject: [PATCH] first commit --- .gitignore | 2 + json.hpp | 299 +++++++++++++++++++++++++++++++++++++++++++++++++++++ test.cpp | 179 ++++++++++++++++++++++++++++++++ test.json | 74 +++++++++++++ 4 files changed, 554 insertions(+) create mode 100644 .gitignore create mode 100644 json.hpp create mode 100644 test.cpp create mode 100644 test.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c969e29 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +json +test \ No newline at end of file diff --git a/json.hpp b/json.hpp new file mode 100644 index 0000000..d1b2c80 --- /dev/null +++ b/json.hpp @@ -0,0 +1,299 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class JsonParser { +public: + struct JsonValue; + using JsonNull = std::monostate; + using JsonBool = bool; + using JsonNumber = double; + using JsonString = std::string; + using JsonArray = std::vector; + using JsonObject = std::unordered_map; + + struct JsonValue { + std::variant value; + + template + bool is() const { return std::holds_alternative(value); } + + template + const T& as() const { return std::get(value); } + + template + T& as() { return std::get(value); } + }; + +private: + std::string_view input; + size_t pos = 0; + + void skip_whitespace() { + while (pos < input.size() && (input[pos] == ' ' || input[pos] == '\t' || + input[pos] == '\n' || input[pos] == '\r')) { + ++pos; + } + } + + char peek() const { + return pos < input.size() ? input[pos] : '\0'; + } + + char consume() { + return pos < input.size() ? input[pos++] : '\0'; + } + + void expect(char expected) { + if (consume() != expected) { + throw std::runtime_error("Expected '" + std::string(1, expected) + "'"); + } + } + + JsonValue parse_null() { + if (input.substr(pos, 4) == "null") { + pos += 4; + return JsonValue{JsonNull{}}; + } + throw std::runtime_error("Invalid null"); + } + + JsonValue parse_bool() { + if (input.substr(pos, 4) == "true") { + pos += 4; + return JsonValue{true}; + } + if (input.substr(pos, 5) == "false") { + pos += 5; + return JsonValue{false}; + } + throw std::runtime_error("Invalid boolean"); + } + + JsonValue parse_number() { + size_t start = pos; + + if (peek() == '-') consume(); + + if (peek() == '0') { + consume(); + } else if (peek() >= '1' && peek() <= '9') { + consume(); + while (peek() >= '0' && peek() <= '9') consume(); + } else { + throw std::runtime_error("Invalid number"); + } + + if (peek() == '.') { + consume(); + if (!(peek() >= '0' && peek() <= '9')) { + throw std::runtime_error("Invalid decimal"); + } + while (peek() >= '0' && peek() <= '9') consume(); + } + + if (peek() == 'e' || peek() == 'E') { + consume(); + if (peek() == '+' || peek() == '-') consume(); + if (!(peek() >= '0' && peek() <= '9')) { + throw std::runtime_error("Invalid exponent"); + } + while (peek() >= '0' && peek() <= '9') consume(); + } + + double result; + auto [ptr, ec] = std::from_chars(input.data() + start, input.data() + pos, result); + if (ec != std::errc{}) { + throw std::runtime_error("Number parsing failed"); + } + + return JsonValue{result}; + } + + std::string parse_string_content() { + std::string result; + result.reserve(32); // optimization for typical strings + + while (pos < input.size() && peek() != '"') { + char c = consume(); + if (c == '\\') { + if (pos >= input.size()) throw std::runtime_error("Unterminated escape"); + char escaped = consume(); + switch (escaped) { + case '"': result += '"'; break; + case '\\': result += '\\'; break; + case '/': result += '/'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; + case 'n': result += '\n'; break; + case 'r': result += '\r'; break; + case 't': result += '\t'; break; + case 'u': { + if (pos + 4 > input.size()) throw std::runtime_error("Invalid unicode escape"); + auto hex = input.substr(pos, 4); + pos += 4; + int codepoint = 0; + auto [ptr, ec] = std::from_chars(hex.data(), hex.data() + 4, codepoint, 16); + if (ec != std::errc{}) throw std::runtime_error("Invalid unicode hex"); + + // Simple UTF-8 encoding for BMP + if (codepoint < 0x80) { + result += static_cast(codepoint); + } else if (codepoint < 0x800) { + result += static_cast(0xC0 | (codepoint >> 6)); + result += static_cast(0x80 | (codepoint & 0x3F)); + } else { + result += static_cast(0xE0 | (codepoint >> 12)); + result += static_cast(0x80 | ((codepoint >> 6) & 0x3F)); + result += static_cast(0x80 | (codepoint & 0x3F)); + } + break; + } + default: throw std::runtime_error("Invalid escape sequence"); + } + } else if (static_cast(c) < 0x20) { + throw std::runtime_error("Unescaped control character"); + } else { + result += c; + } + } + + if (peek() != '"') throw std::runtime_error("Unterminated string"); + return result; + } + + JsonValue parse_string() { + expect('"'); + auto content = parse_string_content(); + expect('"'); + return JsonValue{std::move(content)}; + } + + JsonValue parse_array() { + expect('['); + skip_whitespace(); + + JsonArray array; + + if (peek() == ']') { + consume(); + return JsonValue{std::move(array)}; + } + + while (true) { + array.push_back(parse_value()); + skip_whitespace(); + + char next = peek(); + if (next == ']') { + consume(); + break; + } else if (next == ',') { + consume(); + skip_whitespace(); + } else { + throw std::runtime_error("Expected ',' or ']' in array"); + } + } + + return JsonValue{std::move(array)}; + } + + JsonValue parse_object() { + expect('{'); + skip_whitespace(); + + JsonObject object; + + if (peek() == '}') { + consume(); + return JsonValue{std::move(object)}; + } + + while (true) { + if (peek() != '"') throw std::runtime_error("Expected string key"); + + expect('"'); + auto key = parse_string_content(); + expect('"'); + + skip_whitespace(); + expect(':'); + skip_whitespace(); + + auto value = parse_value(); + object.emplace(std::move(key), std::move(value)); + + skip_whitespace(); + + char next = peek(); + if (next == '}') { + consume(); + break; + } else if (next == ',') { + consume(); + skip_whitespace(); + } else { + throw std::runtime_error("Expected ',' or '}' in object"); + } + } + + return JsonValue{std::move(object)}; + } + + JsonValue parse_value() { + skip_whitespace(); + + char c = peek(); + switch (c) { + case 'n': return parse_null(); + case 't': + case 'f': return parse_bool(); + case '"': return parse_string(); + case '[': return parse_array(); + case '{': return parse_object(); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return parse_number(); + default: + throw std::runtime_error("Unexpected character"); + } + } + +public: + JsonValue parse(std::string_view json) { + input = json; + pos = 0; + auto result = parse_value(); + skip_whitespace(); + if (pos < input.size()) { + throw std::runtime_error("Extra characters after JSON"); + } + return result; + } + + JsonValue parse_file(const std::string& filepath) { + std::ifstream file(filepath); + if (!file.is_open()) { + throw std::runtime_error("Cannot open file: " + filepath); + } + + std::ostringstream buffer; + buffer << file.rdbuf(); + + if (file.bad()) { + throw std::runtime_error("Error reading file: " + filepath); + } + + return parse(buffer.str()); + } +}; \ No newline at end of file diff --git a/test.cpp b/test.cpp new file mode 100644 index 0000000..eeb64fa --- /dev/null +++ b/test.cpp @@ -0,0 +1,179 @@ +#include "json.hpp" +#include +#include + +void test_basic_types() { + JsonParser parser; + + // Test null + auto null_val = parser.parse("null"); + assert(null_val.is()); + + // Test boolean + auto true_val = parser.parse("true"); + assert(true_val.is() && true_val.as() == true); + + auto false_val = parser.parse("false"); + assert(false_val.is() && false_val.as() == false); + + // Test numbers + auto int_val = parser.parse("42"); + assert(int_val.is() && int_val.as() == 42.0); + + auto float_val = parser.parse("3.14159"); + assert(float_val.is() && abs(float_val.as() - 3.14159) < 0.00001); + + auto neg_val = parser.parse("-123.45"); + assert(neg_val.is() && neg_val.as() == -123.45); + + auto exp_val = parser.parse("1.5e3"); + assert(exp_val.is() && exp_val.as() == 1500.0); + + // Test string + auto str_val = parser.parse("\"hello world\""); + assert(str_val.is() && str_val.as() == "hello world"); + + std::cout << "✓ Basic types test passed\n"; +} + +void test_string_escapes() { + JsonParser parser; + + auto escaped = parser.parse("\"line1\\nline2\\ttab\\\"quote\\\\backslash\""); + assert(escaped.is()); + assert(escaped.as() == "line1\nline2\ttab\"quote\\backslash"); + + auto unicode = parser.parse("\"\\u0048\\u0065\\u006c\\u006c\\u006f\""); + assert(unicode.is()); + assert(unicode.as() == "Hello"); + + std::cout << "✓ String escapes test passed\n"; +} + +void test_arrays() { + JsonParser parser; + + // Empty array + auto empty_arr = parser.parse("[]"); + assert(empty_arr.is()); + assert(empty_arr.as().empty()); + + // Mixed array + auto mixed_arr = parser.parse("[1, \"hello\", true, null, [2, 3]]"); + assert(mixed_arr.is()); + const auto& arr = mixed_arr.as(); + assert(arr.size() == 5); + assert(arr[0].is() && arr[0].as() == 1.0); + assert(arr[1].is() && arr[1].as() == "hello"); + assert(arr[2].is() && arr[2].as() == true); + assert(arr[3].is()); + assert(arr[4].is()); + + std::cout << "✓ Arrays test passed\n"; +} + +void test_objects() { + JsonParser parser; + + // Empty object + auto empty_obj = parser.parse("{}"); + assert(empty_obj.is()); + assert(empty_obj.as().empty()); + + // Complex object + auto complex_obj = parser.parse(R"({ + "name": "John Doe", + "age": 30, + "is_active": true, + "scores": [85, 90, 78], + "address": { + "street": "123 Main St", + "city": "Anytown" + }, + "spouse": null + })"); + + assert(complex_obj.is()); + const auto& obj = complex_obj.as(); + + assert(obj.at("name").as() == "John Doe"); + assert(obj.at("age").as() == 30.0); + assert(obj.at("is_active").as() == true); + assert(obj.at("spouse").is()); + + const auto& scores = obj.at("scores").as(); + assert(scores.size() == 3); + assert(scores[0].as() == 85.0); + + const auto& address = obj.at("address").as(); + assert(address.at("street").as() == "123 Main St"); + + std::cout << "✓ Objects test passed\n"; +} + +void test_file_parsing() { + JsonParser parser; + + try { + auto file_json = parser.parse_file("test.json"); + assert(file_json.is()); + const auto& obj = file_json.as(); + + // Verify some expected content + assert(obj.at("library").as() == "fast-json-parser"); + assert(obj.at("version").as() == "1.0.0"); + + const auto& features = obj.at("features").as(); + assert(features.size() > 0); + + std::cout << "✓ File parsing test passed\n"; + } catch (const std::exception& e) { + std::cerr << "File parsing test failed: " << e.what() << "\n"; + } +} + +void test_error_cases() { + JsonParser parser; + + // Test various malformed JSON + std::vector invalid_json = { + "", + "{", + "}", + "[1,]", + "{\"key\":}", + "\"unterminated string", + "123.45.67", + "truee", + "nul", + "{\"key\" \"value\"}", // missing colon + "[1 2 3]" // missing commas + }; + + size_t error_count = 0; + for (const auto& json : invalid_json) { + try { + parser.parse(json); + std::cerr << "Expected error for: " << json << "\n"; + } catch (const std::exception&) { + error_count++; + } + } + + assert(error_count == invalid_json.size()); + std::cout << "✓ Error handling test passed (" << error_count << " errors caught)\n"; +} + +int main() { + std::cout << "Running JSON parser tests...\n\n"; + + test_basic_types(); + test_string_escapes(); + test_arrays(); + test_objects(); + test_file_parsing(); + test_error_cases(); + + std::cout << "\n"; + return 0; +} \ No newline at end of file diff --git a/test.json b/test.json new file mode 100644 index 0000000..e7c5ad5 --- /dev/null +++ b/test.json @@ -0,0 +1,74 @@ +{ + "library": "fast-json-parser", + "version": "1.0.0", + "description": "A minimal but fast JSON parser for C++20", + "active": true, + "deprecated": false, + "rating": null, + "downloads": 1250000, + "size_mb": 15.7, + "pi": 3.14159265359, + "scientific": 6.022e23, + "negative": -273.15, + "zero": 0, + "features": [ + "fast parsing", + "C++20 compatible", + "minimal dependencies", + "file I/O support", + "unicode handling" + ], + "benchmarks": { + "small_files": { + "time_ms": 0.45, + "memory_kb": 128 + }, + "large_files": { + "time_ms": 15.2, + "memory_kb": 2048 + } + }, + "supported_types": [ + "null", + "boolean", + "number", + "string", + "array", + "object" + ], + "unicode_test": "Hello 世界 🌍 \u0048\u0065\u006C\u006C\u006F", + "escape_test": "Line 1\\nLine 2\\tTabbed\\r\\nWindows Line\\b\\f\"Quoted\"\\/Slash\\\\Backslash", + "empty_containers": { + "empty_array": [], + "empty_object": {} + }, + "nested_structure": { + "level1": { + "level2": { + "level3": { + "deep_value": "found me!", + "deep_array": [1, 2, [3, 4, [5]]] + } + } + } + }, + "mixed_array": [ + 42, + "string", + true, + null, + { + "nested_key": "nested_value" + }, + [1, 2, 3] + ], + "author": { + "name": "Developer", + "contact": { + "email": "dev@example.com", + "social": ["github", "twitter"] + } + }, + "license": "MIT", + "tags": ["json", "parser", "cpp", "fast", "minimal"] +} \ No newline at end of file