From 49ed1fef42c5ee804fd8daaeb6661adbb8fd6bda Mon Sep 17 00:00:00 2001 From: Sky Johnson Date: Sat, 7 Sep 2024 16:39:57 -0500 Subject: [PATCH] huge big ol' updates; benches, tools, simplerouter, etc --- README.md | 104 +- Router.php | 6 + SegmentRouter.php | 31 +- SimpleRouter.php | 254 + TrieRouter.php | 32 +- other.php | 0 tests/segment.php | 51 +- tests/simple.php | 67 + tests/storage/segment/big.txt | 5049 ++++++++++ tests/storage/segment/blog.txt | 10 + tests/storage/segment/github.txt | 376 + tests/storage/trie/big.txt | 14189 +++++++++++++++++++++++++++++ tests/storage/trie/blog.txt | 21 + tests/storage/trie/github.txt | 1141 +++ tests/tools.php | 49 +- tests/trie.php | 62 +- 16 files changed, 21344 insertions(+), 98 deletions(-) create mode 100644 Router.php create mode 100644 SimpleRouter.php create mode 100644 other.php create mode 100644 tests/simple.php create mode 100644 tests/storage/segment/big.txt create mode 100644 tests/storage/segment/blog.txt create mode 100644 tests/storage/segment/github.txt create mode 100644 tests/storage/trie/big.txt create mode 100644 tests/storage/trie/blog.txt create mode 100644 tests/storage/trie/github.txt diff --git a/README.md b/README.md index 96cf50a..066807f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,105 @@ # Router -A tree-based router using string manipulation. \ No newline at end of file +Hey there! This repo is an experiment to create a well-tuned and speedy URI router. There's only two main goals: +- It's fast +- It's simple + +Prior to this proejct, I built the [SimpleRouter](https://github.com/splashsky/simplerouter) as a fork from a very cool project called the simplePHPRouter. That router was based on regex for parameterization, and the regex made it pretty painful to maintain if I ever left it alone for too long. + +Since working on SimpleRouter, I've played with other languages (primarily Go) and found a few new ways of doing things. + +## Methodology + +Radix trees (tries, but I prefer the normal spelling) are wonderful mathematical constructs; the basic concept is that you have the root of a tree and branches (nodes) that have leaves (nodes). When you add a branch, this branch gets merged with existing branches if they match, and the leaves are still at the ends to be separated. + +Take for example these routes: +``` +/api/v1/hello +/api/v1/hi +/api/v1/hello/:param +/api/v2/no +/foo +``` + +A radix (and more specifically, a PATRICIA) trie takes the commonalities in these routes and makes them into nodes, or branches. `/` exists as the root node. `api/` is turned into a node, from which `v1/` and `/v2/no` branch. `hello` is taken as another branch with the `/` and `:param` child nodes. `/foo` is naturally it's only branch from the root. + +By splitting these routes up into a trie based on their segments, you're able to iterate far more quickly through the tree to find what you're looking for. If a user then requests `/api/v1/hello/sky` the router can jump from the root, to `api/`, to `v/1`, to `hello/`, then to the final node much faster than if we had to chop up, say, an associative array and compare for every registered route. + +The nodes can contain any arbitrary information, such as HTTP methods or handlers. From my experience, this method of lookup prefers specificity, and so it will always prefer the edges over the inner structures. + +## Parameters + +One flaw(-ish) of the SimpleRouter implementation (and many other implementations) is the use of regex as a way of identifying and extracting route parameters. As everyone knows, regex imposes time, overhead, and complexity to any system. + +In order to circumvent this, we can rely on our node structure; if a node begins with our delimiter `:` then we can take the related segment from the URI and use that as a parameter, regardless of the value. This means we have extremely low overhead in the logic required to pull parameters from URIs. + + +## Performance + +Of course, what good is a router that's slow? We need to be able to lookup routes and get the handler as quickly as possible. Now, you may note there are multiple routers here; these are implementations in their experimental phase to find the most memory and time efficient lookup operations possible. + +For our benchmarks, which you can find in their respective files in [tests](tests/), we create a single instance of a router, load routes from the `.txt` files, write their respective arrays to `.txt` files in [storage](tests/storage/), then perform three iterations each; 10k, 100k, 1m requests. In these iterations, we pick a random URI from the full list, and have the router perform the lookup on that randomly selected URI. The test fails only if a `404` or `405` is returned. + +### SimpleRouter + +This is an old project of mine and the first router I ever tried to write. Foundationally it relies on tokenizing an incoming URI and matching it to regex, then looking through the internal routes array. + +``` +// big routes +Running 1000000 iterations +(100000 lookups) M: 1846.2 kb - T: 32.6156370640 s +(200000 lookups) M: 1846.2 kb - T: 63.9784071445 s +(300000 lookups) M: 1846.2 kb - T: 96.9934570789 s +(400000 lookups) M: 1846.2 kb - T: 130.2443051338 s +(500000 lookups) M: 1846.2 kb - T: 161.8348190784 s +(600000 lookups) M: 1846.3 kb - T: 197.4232161045 s +(700000 lookups) M: 1846.1 kb - T: 231.8421580791 s +(800000 lookups) M: 1846 kb - T: 262.8337080479 s +(900000 lookups) M: 1846.2 kb - T: 296.1434569359 s +Time: 330.9394941330 s +Avg/lookup: 0.0003309396 s +``` + +Interestingly, it has the lowest memory cost of the current iterations, but the absolute highest total time and time per request. The time issue is likely due to hugely unoptimized tokenization. + +### TrieRouter + +This is my first iteration of a PATRICIA trie router in PHP. I don't think it's currently perfect, as we could probably work on storing nodes as bytes rather than strings, but it's a good proof of concept for a tree based mechanism. + +``` +Running 1000000 iterations +(100000 lookups) M: 4718.3 kb - T: 0.0581219196 s +(200000 lookups) M: 4718.3 kb - T: 0.1310830116 s +(300000 lookups) M: 4718.3 kb - T: 0.1909840107 s +(400000 lookups) M: 4718.3 kb - T: 0.2500770092 s +(500000 lookups) M: 4718.3 kb - T: 0.3067679405 s +(600000 lookups) M: 4718.3 kb - T: 0.3660039902 s +(700000 lookups) M: 4718.3 kb - T: 0.4237358570 s +(800000 lookups) M: 4718.3 kb - T: 0.4837160110 s +(900000 lookups) M: 4718.3 kb - T: 0.5422408581 s +Time: 0.6060788631 s +Avg/lookup: 0.0000006061 s +``` + +You can immediately see a ***huge*** time difference from SimpleRouter. Responses are in microseconds rather than milliseconds, but we're using 3x+ as much memory. From experimentation (and you can see this in the [visualization](tests/storage/trie/big.txt)) that the trie method creates a gigantic number of child elements to store the handler for every endpoint. + +### SegmentRouter + +This second iteration is the first to achieve the best of both worlds; lower memory usage and lower time per request! In order to achieve this, we simply split routes into segments and store each segment as a node. This means that there are no extraneous child elements and navigating to an endpoint requires less effort. The [visualization](tests/storage/segment/big.txt) also shows how much simpler the tree is compared to TrieRouter. + +``` +Running 1000000 iterations +(100000 lookups) M: 2891.8 kb - T: 0.0500328541 s +(200000 lookups) M: 2891.8 kb - T: 0.0995390415 s +(300000 lookups) M: 2891.8 kb - T: 0.1491589546 s +(400000 lookups) M: 2891.8 kb - T: 0.1987509727 s +(500000 lookups) M: 2891.8 kb - T: 0.2471258640 s +(600000 lookups) M: 2891.8 kb - T: 0.2962870598 s +(700000 lookups) M: 2891.8 kb - T: 0.3496289253 s +(800000 lookups) M: 2891.8 kb - T: 0.3990900517 s +(900000 lookups) M: 2891.8 kb - T: 0.4483740330 s +Time: 0.4971950054 s +Avg/lookup: 0.0000004973 s +``` + +Truly our most impressive show yet. diff --git a/Router.php b/Router.php new file mode 100644 index 0000000..584b6d4 --- /dev/null +++ b/Router.php @@ -0,0 +1,6 @@ + $value) { + foreach (array_keys($node) as $key) { + // If the key starts with a :, it's a dynamic segment if (strpos($key, ':') === 0) return $key; } - return null; + + return false; + } + + public function clear(): Router + { + $this->routes = []; + return $this; } } diff --git a/SimpleRouter.php b/SimpleRouter.php new file mode 100644 index 0000000..1be6b65 --- /dev/null +++ b/SimpleRouter.php @@ -0,0 +1,254 @@ + $trimmed, + 'action' => $action, + 'methods' => $methods, + 'constraints' => [] + ]; + + self::$lastInsertedRoute = $trimmed; + + return new self; + } + + /** + * Shorthand function to define a GET route + * + * @param string $route + * @param callable $action + * @return Router + */ + public static function get(string $route, callable $action) + { + return self::add($route, $action, 'GET'); + } + + /** + * Default function to define a POST route + * + * @param string $route + * @param callable $action + * @return Router + */ + public static function post(string $route, callable $action) + { + return self::add($route, $action, 'POST'); + } + + /** + * Return all routes currently registered + * + * @return array + */ + public static function getAllRoutes() + { + return self::$routes; + } + + /** + * Defines an action to be called when a path isn't found - i.e. a 404 + * + * @param callable $action + * @return void + */ + public static function pathNotFound(callable $action) + { + self::$pathNotFound = $action; + } + + /** + * Defines an action to be called with a method isn't allowed on a route - i.e. a 405 + * + * @param callable $action + * @return void + */ + public static function methodNotAllowed(callable $action) + { + self::$methodNotAllowed = $action; + } + + /** + * Redefine the default constraint for route parameters. Default is '([\w\-]+)' + * + * @param string $constraint The RegEx you want parameters to adhere to by default. Defaults to '([\w\-]+)' + * @return void + */ + public static function setDefaultConstraint(string $constraint = '([\w\-]+)') + { + self::$defaultConstraint = $constraint; + } + + private static function trimRoute(string $route): string + { + $route = trim(trim($route), '/'); + return "/$route"; + } + + /** + * Accepts a callable that defines routes, and adds a prefix to them. + * + * @param string $prefix The prefix you want added to the routes. + * @param callable $routes A function that defines routes. + * @return void + */ + public static function prefix(string $prefix, callable $routes) + { + self::$currentPrefix = $prefix; + + $routes(); + + self::$currentPrefix = ''; + } + + /** + * Define a constraint for a route parameter. If only passing one parameter, + * provide the parameter name as first argument and constraint as second. If + * adding constraints for multiple parameters, pass an array of 'parameter' => 'constraint' + * pairs. + * + * @param string|array $parameter + * @param string $constraint + * @return Router + */ + public static function with(string|array $parameter, string $constraint = '') + { + $last = self::$lastInsertedRoute; + + if (is_array($parameter)) { + foreach ($parameter as $param => $constraint) { + self::$routes[$last]['constraints'][$param] = $constraint; + } + + return new self; + } + + self::$routes[$last]['constraints'][$parameter] = $constraint; + + return new self; + } + + /** + * Tokenizes the given URI using our constraint rules and returns the tokenized URI + * + * @param string $uri + * @return string + */ + private static function tokenize(string $uri, array $constraints) + { + $constraintKeys = array_keys($constraints); + + preg_match_all('/(?:{([\w\-]+)})+/', $uri, $matches); + $matches = $matches[1]; + + foreach ($matches as $match) { + $pattern = '{'.$match.'}'; + + if (in_array($match, $constraintKeys)) { + // Do some voodoo to allow users to use parentheses in their constraints if they want + $constraint = '('.rtrim(ltrim(trim($constraints[$match]), '('), ')').')'; + + $uri = str_replace($pattern, $constraint, $uri); + } else { + $uri = str_replace($pattern, self::$defaultConstraint, $uri); + } + } + + return $uri; + } + + /** + * Runs the router. Accepts a base path from which to serve the routes, and optionally whether you want to try + * and match multiple routes. + * + * @param string $basePath + * @param boolean $multimatch + * @return void + */ + public static function run(string $uri, string $basePath = '', bool $multimatch = false, string $method = ''): int|array + { + $basePath = self::trimRoute($basePath); + $path = urldecode(self::trimRoute($uri)); + + $pathMatchFound = false; + $routeMatchFound = false; + + // Begin looking through routes + foreach (self::$routes as $route) { + // If the basePath isn't just "root" + if ($basePath != '/') { + $route['route'] = self::trimRoute($basePath.$route['route']); + } + + // Prepare route by tokenizing. + $tokenized = '#^'.self::tokenize($route['route'], $route['constraints']).'$#u'; + + // If the tokenized route matches the current path... + if (preg_match($tokenized, $path, $matches)) { + $pathMatchFound = true; + + // Run through the route's accepted method(s) + foreach ((array) $route['methods'] as $allowedMethod) { + // See if the current request method matches + if (strtolower($method) == strtolower($allowedMethod)) { + array_shift($matches); // Remove the first match - always contains the full url + + // If we're successful at calling the route's action, echo the result + return [$route['action'], $matches]; + + $routeMatchFound = true; + + // Do not check other routes. + break; + } + } + } + + // Break the loop if the first found route is a match. + if($routeMatchFound && !$multimatch) { + break; + } + } + + // No matching route was found + if (!$routeMatchFound) { + // But a matching path exists + if ($pathMatchFound) { + return 405; + } else { + return 404; + } + } + } + + public static function clearRoutes() + { + self::$routes = []; + } +} diff --git a/TrieRouter.php b/TrieRouter.php index b9d4ede..f2d1590 100644 --- a/TrieRouter.php +++ b/TrieRouter.php @@ -1,16 +1,13 @@ {strtoupper($method)}; + $node = &$this->root[$method]; $segments = explode('/', trim($route, '/')); foreach ($segments as $segment) { @@ -21,12 +18,14 @@ class TrieRouter } $node['_handler'] = $handler; + + return $this; } // Find and handle the route - public function handleRequest(string $method, string $uri) + public function lookup(string $method, string $uri): int|array { - $node = &$this->{strtoupper($method)}; + $node = &$this->root[$method]; $segments = explode('/', trim($uri, '/')); $params = []; @@ -40,17 +39,15 @@ class TrieRouter $params[] = $segment; $node = &$node[$dynamicSegment]['_children']; } else { - return $this->notFound(); + return 404; } } } // Check if a handler exists for the current node - if (isset($node['_handler'])) { - return call_user_func_array($node['_handler'], $params); - } + if (isset($node['_handler'])) return [$node['_handler'], $params]; - return $this->notFound(); + return 404; } // Match dynamic route segments like ':id' @@ -62,10 +59,9 @@ class TrieRouter return null; } - // Default 404 handler - private function notFound() + public function clear(): Router { - echo "404 Not Found"; - return false; + $this->root = []; + return $this; } } diff --git a/other.php b/other.php new file mode 100644 index 0000000..e69de29 diff --git a/tests/segment.php b/tests/segment.php index cfed30c..a2dd520 100644 --- a/tests/segment.php +++ b/tests/segment.php @@ -8,35 +8,40 @@ The requests are randomly picked from the array of routes. */ -require_once __DIR__ . '/../SegmentRouter.php'; require_once 'tools.php'; -// Blog router -$b = new SegmentRouter(); -$blog = readAndAddRoutes('blog.txt', $b); - -// Github router -$g = new SegmentRouter(); -$github = readAndAddRoutes('github.txt', $g); - -// Big router -$big = new SegmentRouter(); -$bigRoutes = readAndAddRoutes('big.txt', $big); +$r = new SegmentRouter(); +// Blog lookups +$blog = readAndAddRoutes('blog.txt', $r); +writeRoutesToFile($r->routes, 'storage/segment/blog.txt'); echoTitle("Starting github lookups"); -runIterations(100000, $b, $blog); -runIterations(1000000, $b, $blog); +runIterations(100000, $r, $blog); +runIterations(1000000, $r, $blog); +unset($blog); +// Github lookups +$r->clear(); +$github = readAndAddRoutes('github.txt', $r); +writeRoutesToFile($r->routes, 'storage/segment/github.txt'); echoTitle("Starting github lookups"); -runIterations(10000, $g, $github); -runIterations(100000, $g, $github); -runIterations(1000000, $g, $github); +runIterations(10000, $r, $github); +runIterations(100000, $r, $github); +runIterations(1000000, $r, $github); +unset($github); +// Big lookups +$r->clear(); +$big = readAndAddRoutes('big.txt', $r); +writeRoutesToFile($r->routes, 'storage/segment/big.txt'); echoTitle("Starting big lookups"); -runIterations(10000, $big, $bigRoutes); -runIterations(100000, $big, $bigRoutes); -runIterations(1000000, $big, $bigRoutes); +runIterations(10000, $r, $big); +runIterations(100000, $r, $big); +runIterations(1000000, $r, $big); +unset($big); +// Parameter testing +$r->clear(); echoTitle("Testing parameters"); $routes = [ @@ -54,7 +59,6 @@ $routes = [ }], ]; -$r = new SegmentRouter(); foreach ($routes as $route) { [$method, $path, $handler] = $route; $r->add($method, $path, $handler); @@ -70,10 +74,9 @@ for ($i = 0; $i < 10; $i++) { $uri = str_replace(':extra', 'extra-' . rand(1, 100), $uri); $res = $r->lookup($method, $uri); - if ($res !== 200) { + if ($res === 404 || $res === 405) { echo "Failed to handle request for $uri - $res\n"; exit(1); } + $res[0](...$res[1]); } - -exit(0); diff --git a/tests/simple.php b/tests/simple.php new file mode 100644 index 0000000..6e66916 --- /dev/null +++ b/tests/simple.php @@ -0,0 +1,67 @@ +lookup($method, $uri); - if ($res !== 200) { + if ($res === 404 || $res === 405) { echo Color::red("Failed to handle request for $uri - $res\n"); exit(1); } @@ -115,3 +119,44 @@ function runIterations(int $iterations, $r, array $routes) { echo "Avg/lookup: " . Color::yellow(number_format((microtime(true) - $start) / $iterations, 10) . " s\n"); echo "\n"; } + +// take a route tree (array) and store it in a file to be read +function writeRoutesToFile(array $routes, string $file) { + // Clear the file before writing + file_put_contents($file, ''); + + // Open the file for writing + $fp = fopen($file, 'w'); + + // write a / to the first line of the file + fwrite($fp, "/\n"); + + // Start writing from the root level with an indentation of 0 and no prefix + writeNode($routes, 0, '', $fp); + + // Close the file + fclose($fp); +} + +function writeNode($node, $indent, $prefix, $fp) { + $totalItems = count($node); + $currentItem = 0; + + foreach ($node as $key => $value) { + $currentItem++; + $isLastChild = ($currentItem === $totalItems); + $connector = $isLastChild ? '└── ' : '├── '; + + $key = empty($key) ? '/' : $key; + + // Write the current node's key with the tree symbol + fwrite($fp, $prefix . $connector . $key . "\n"); + + // If the value is an array, it represents a child node, so recurse + if (is_array($value)) { + $newPrefix = $prefix . ($isLastChild ? ' ' : '│ '); + writeNode($value, $indent + 1, $newPrefix, $fp); + } + } +} + diff --git a/tests/trie.php b/tests/trie.php index 49b6a35..a7b30c3 100644 --- a/tests/trie.php +++ b/tests/trie.php @@ -8,53 +8,31 @@ The requests are randomly picked from the array of routes. */ -require_once __DIR__ . '/../TrieRouter.php'; require_once 'tools.php'; -// Blog router -$b = new TrieRouter(); -$blog = readAndAddRoutes('blog.txt', $b); - -// Github router -$g = new TrieRouter(); -$github = readAndAddRoutes('github.txt', $g); - -// Big router -$big = new TrieRouter(); -$bigRoutes = readAndAddRoutes('big.txt', $big); - -function runIterations(int $iterations, TrieRouter $r, array $routes) { - echo "Running $iterations iterations\n"; - $start = microtime(true); - $interval = $iterations / 10; - for ($i = 0; $i < $iterations; $i++) { - // pick a random route from the array - [$method, $uri] = $routes[array_rand($routes)]; - $res = $r->handleRequest($method, $uri); - if ($res !== true) { - echo "Failed to handle request for $uri\n"; - exit(1); - } - if ($i !== 0 && $i % ($interval) === 0) echoMemoryAndTime($i, $start); - } - echo "Time: " . Color::cyan(number_format(microtime(true) - $start, 10) . " s\n"); - // echo the average time per request - echo "Avg/lookup: " . Color::yellow(number_format((microtime(true) - $start) / $iterations, 10) . " s\n"); - echo "\n"; -} +$r = new TrieRouter(); +// Blog test +$blog = readAndAddRoutes('blog.txt', $r); +writeRoutesToFile($r->root, 'storage/trie/blog.txt'); echoTitle("Starting blog lookups"); -runIterations(100000, $b, $blog); -runIterations(1000000, $b, $blog); +runIterations(100000, $r, $blog); +runIterations(1000000, $r, $blog); +// Github test +$r->clear(); +$github = readAndAddRoutes('github.txt', $r); +writeRoutesToFile($r->root, 'storage/trie/github.txt'); echoTitle("Starting github lookups"); -runIterations(10000, $g, $github); -runIterations(100000, $g, $github); -runIterations(1000000, $g, $github); +runIterations(10000, $r, $github); +runIterations(100000, $r, $github); +runIterations(1000000, $r, $github); +// Big test +$r->clear(); +$big = readAndAddRoutes('big.txt', $r); +writeRoutesToFile($r->root, 'storage/trie/big.txt'); echoTitle("Starting big lookups"); -runIterations(10000, $big, $bigRoutes); -runIterations(100000, $big, $bigRoutes); -runIterations(1000000, $big, $bigRoutes); - -exit(0); +runIterations(10000, $r, $big); +runIterations(100000, $r, $big); +runIterations(1000000, $r, $big);