Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
3 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
4 | // | ||
5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
7 | // | ||
8 | // Official repository: https://github.com/boostorg/url | ||
9 | // | ||
10 | |||
11 | |||
12 | #include <boost/url/detail/config.hpp> | ||
13 | #include <boost/url/decode_view.hpp> | ||
14 | #include "decode.hpp" | ||
15 | #include <boost/url/segments_encoded_view.hpp> | ||
16 | #include <boost/url/grammar/ci_string.hpp> | ||
17 | #include <boost/assert.hpp> | ||
18 | #include <boost/core/ignore_unused.hpp> | ||
19 | #include <cstring> | ||
20 | #include "normalize.hpp" | ||
21 | |||
22 | namespace boost { | ||
23 | namespace urls { | ||
24 | namespace detail { | ||
25 | |||
26 | void | ||
27 | 5604 | pop_encoded_front( | |
28 | core::string_view& s, | ||
29 | char& c, | ||
30 | std::size_t& n) noexcept | ||
31 | { | ||
32 |
2/2✓ Branch 1 taken 5514 times.
✓ Branch 2 taken 90 times.
|
5604 | if(s.front() != '%') |
33 | { | ||
34 | 5514 | c = s.front(); | |
35 | 5514 | s.remove_prefix(1); | |
36 | } | ||
37 | else | ||
38 | { | ||
39 | 90 | detail::decode_unsafe( | |
40 | &c, | ||
41 | &c + 1, | ||
42 | s.substr(0, 3)); | ||
43 | 90 | s.remove_prefix(3); | |
44 | } | ||
45 | 5604 | ++n; | |
46 | 5604 | } | |
47 | |||
48 | int | ||
49 | 77 | compare_encoded( | |
50 | core::string_view lhs, | ||
51 | core::string_view rhs) noexcept | ||
52 | { | ||
53 | 77 | std::size_t n0 = 0; | |
54 | 77 | std::size_t n1 = 0; | |
55 | 77 | char c0 = 0; | |
56 | 77 | char c1 = 0; | |
57 | 205 | while( | |
58 |
4/4✓ Branch 1 taken 253 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 42 times.
|
535 | !lhs.empty() && |
59 |
2/2✓ Branch 1 taken 240 times.
✓ Branch 2 taken 13 times.
|
253 | !rhs.empty()) |
60 | { | ||
61 | 240 | pop_encoded_front(lhs, c0, n0); | |
62 | 240 | pop_encoded_front(rhs, c1, n1); | |
63 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 220 times.
|
240 | if (c0 < c1) |
64 | 20 | return -1; | |
65 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 205 times.
|
220 | if (c1 < c0) |
66 | 15 | return 1; | |
67 | } | ||
68 | 42 | n0 += detail::decode_bytes_unsafe(lhs); | |
69 | 42 | n1 += detail::decode_bytes_unsafe(rhs); | |
70 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
|
42 | if (n0 == n1) |
71 | 21 | return 0; | |
72 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 13 times.
|
21 | if (n0 < n1) |
73 | 8 | return -1; | |
74 | 13 | return 1; | |
75 | } | ||
76 | |||
77 | void | ||
78 | 1104 | digest_encoded( | |
79 | core::string_view s, | ||
80 | fnv_1a& hasher) noexcept | ||
81 | { | ||
82 | 1104 | char c = 0; | |
83 | 1104 | std::size_t n = 0; | |
84 |
2/2✓ Branch 1 taken 452 times.
✓ Branch 2 taken 1104 times.
|
1556 | while(!s.empty()) |
85 | { | ||
86 | 452 | pop_encoded_front(s, c, n); | |
87 | 452 | hasher.put(c); | |
88 | } | ||
89 | 1104 | } | |
90 | |||
91 | int | ||
92 | 119 | ci_compare_encoded( | |
93 | core::string_view lhs, | ||
94 | core::string_view rhs) noexcept | ||
95 | { | ||
96 | 119 | std::size_t n0 = 0; | |
97 | 119 | std::size_t n1 = 0; | |
98 | 119 | char c0 = 0; | |
99 | 119 | char c1 = 0; | |
100 | 1505 | while ( | |
101 |
4/4✓ Branch 1 taken 1521 times.
✓ Branch 2 taken 103 times.
✓ Branch 3 taken 1515 times.
✓ Branch 4 taken 109 times.
|
3145 | !lhs.empty() && |
102 |
2/2✓ Branch 1 taken 1515 times.
✓ Branch 2 taken 6 times.
|
1521 | !rhs.empty()) |
103 | { | ||
104 | 1515 | pop_encoded_front(lhs, c0, n0); | |
105 | 1515 | pop_encoded_front(rhs, c1, n1); | |
106 | 1515 | c0 = grammar::to_lower(c0); | |
107 | 1515 | c1 = grammar::to_lower(c1); | |
108 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1507 times.
|
1515 | if (c0 < c1) |
109 | 8 | return -1; | |
110 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1505 times.
|
1507 | if (c1 < c0) |
111 | 2 | return 1; | |
112 | } | ||
113 | 109 | n0 += detail::decode_bytes_unsafe(lhs); | |
114 | 109 | n1 += detail::decode_bytes_unsafe(rhs); | |
115 |
2/2✓ Branch 0 taken 102 times.
✓ Branch 1 taken 7 times.
|
109 | if (n0 == n1) |
116 | 102 | return 0; | |
117 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
7 | if (n0 < n1) |
118 | 1 | return -1; | |
119 | 6 | return 1; | |
120 | } | ||
121 | |||
122 | void | ||
123 | 276 | ci_digest_encoded( | |
124 | core::string_view s, | ||
125 | fnv_1a& hasher) noexcept | ||
126 | { | ||
127 | 276 | char c = 0; | |
128 | 276 | std::size_t n = 0; | |
129 |
2/2✓ Branch 1 taken 1642 times.
✓ Branch 2 taken 276 times.
|
1918 | while(!s.empty()) |
130 | { | ||
131 | 1642 | pop_encoded_front(s, c, n); | |
132 | 1642 | c = grammar::to_lower(c); | |
133 | 1642 | hasher.put(c); | |
134 | } | ||
135 | 276 | } | |
136 | |||
137 | int | ||
138 | 8 | compare( | |
139 | core::string_view lhs, | ||
140 | core::string_view rhs) noexcept | ||
141 | { | ||
142 | 8 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
143 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
|
18 | for (std::size_t i = 0; i < rlen; ++i) |
144 | { | ||
145 | 17 | char c0 = lhs[i]; | |
146 | 17 | char c1 = rhs[i]; | |
147 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 11 times.
|
17 | if (c0 < c1) |
148 | 6 | return -1; | |
149 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 10 times.
|
11 | if (c1 < c0) |
150 | 1 | return 1; | |
151 | } | ||
152 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | if ( lhs.size() == rhs.size() ) |
153 | 1 | return 0; | |
154 | ✗ | if ( lhs.size() < rhs.size() ) | |
155 | ✗ | return -1; | |
156 | ✗ | return 1; | |
157 | } | ||
158 | |||
159 | int | ||
160 | 156 | ci_compare( | |
161 | core::string_view lhs, | ||
162 | core::string_view rhs) noexcept | ||
163 | { | ||
164 | 156 | auto rlen = (std::min)(lhs.size(), rhs.size()); | |
165 |
2/2✓ Branch 0 taken 640 times.
✓ Branch 1 taken 149 times.
|
789 | for (std::size_t i = 0; i < rlen; ++i) |
166 | { | ||
167 | 640 | char c0 = grammar::to_lower(lhs[i]); | |
168 | 640 | char c1 = grammar::to_lower(rhs[i]); | |
169 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 634 times.
|
640 | if (c0 < c1) |
170 | 6 | return -1; | |
171 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 633 times.
|
634 | if (c1 < c0) |
172 | 1 | return 1; | |
173 | } | ||
174 |
2/2✓ Branch 2 taken 142 times.
✓ Branch 3 taken 7 times.
|
149 | if ( lhs.size() == rhs.size() ) |
175 | 142 | return 0; | |
176 |
2/2✓ Branch 2 taken 6 times.
✓ Branch 3 taken 1 times.
|
7 | if ( lhs.size() < rhs.size() ) |
177 | 6 | return -1; | |
178 | 1 | return 1; | |
179 | } | ||
180 | |||
181 | void | ||
182 | 276 | ci_digest( | |
183 | core::string_view s, | ||
184 | fnv_1a& hasher) noexcept | ||
185 | { | ||
186 |
2/2✓ Branch 2 taken 590 times.
✓ Branch 3 taken 276 times.
|
866 | for (char c: s) |
187 | { | ||
188 | 590 | c = grammar::to_lower(c); | |
189 | 590 | hasher.put(c); | |
190 | } | ||
191 | 276 | } | |
192 | |||
193 | std::size_t | ||
194 | ✗ | path_starts_with( | |
195 | core::string_view lhs, | ||
196 | core::string_view rhs) noexcept | ||
197 | { | ||
198 | ✗ | auto consume_one = []( | |
199 | core::string_view::iterator& it, | ||
200 | char &c) | ||
201 | { | ||
202 | ✗ | if(*it != '%') | |
203 | { | ||
204 | ✗ | c = *it; | |
205 | ✗ | ++it; | |
206 | ✗ | return; | |
207 | } | ||
208 | ✗ | detail::decode_unsafe( | |
209 | &c, | ||
210 | &c + 1, | ||
211 | core::string_view(it, 3)); | ||
212 | ✗ | if (c != '/') | |
213 | { | ||
214 | ✗ | it += 3; | |
215 | ✗ | return; | |
216 | } | ||
217 | ✗ | c = *it; | |
218 | ✗ | ++it; | |
219 | }; | ||
220 | |||
221 | ✗ | auto it0 = lhs.begin(); | |
222 | ✗ | auto it1 = rhs.begin(); | |
223 | ✗ | auto end0 = lhs.end(); | |
224 | ✗ | auto end1 = rhs.end(); | |
225 | ✗ | char c0 = 0; | |
226 | ✗ | char c1 = 0; | |
227 | ✗ | while ( | |
228 | ✗ | it0 < end0 && | |
229 | ✗ | it1 < end1) | |
230 | { | ||
231 | ✗ | consume_one(it0, c0); | |
232 | ✗ | consume_one(it1, c1); | |
233 | ✗ | if (c0 != c1) | |
234 | ✗ | return 0; | |
235 | } | ||
236 | ✗ | if (it1 == end1) | |
237 | ✗ | return it0 - lhs.begin(); | |
238 | ✗ | return 0; | |
239 | } | ||
240 | |||
241 | std::size_t | ||
242 | 2104 | path_ends_with( | |
243 | core::string_view lhs, | ||
244 | core::string_view rhs) noexcept | ||
245 | { | ||
246 | 5800 | auto consume_last = []( | |
247 | core::string_view::iterator& it, | ||
248 | core::string_view::iterator& end, | ||
249 | char& c) | ||
250 | { | ||
251 |
4/4✓ Branch 0 taken 3932 times.
✓ Branch 1 taken 1868 times.
✓ Branch 2 taken 5768 times.
✓ Branch 3 taken 32 times.
|
9732 | if ((end - it) < 3 || |
252 |
2/2✓ Branch 1 taken 3900 times.
✓ Branch 2 taken 32 times.
|
3932 | *(std::prev(end, 3)) != '%') |
253 | { | ||
254 | 5768 | c = *--end; | |
255 | 5768 | return; | |
256 | } | ||
257 |
1/2✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
32 | detail::decode_unsafe( |
258 | &c, | ||
259 | &c + 1, | ||
260 | core::string_view(std::prev( | ||
261 | end, 3), 3)); | ||
262 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | if (c != '/') |
263 | { | ||
264 | 32 | end -= 3; | |
265 | 32 | return; | |
266 | } | ||
267 | ✗ | c = *--end; | |
268 | }; | ||
269 | |||
270 | 2104 | auto it0 = lhs.begin(); | |
271 | 2104 | auto it1 = rhs.begin(); | |
272 | 2104 | auto end0 = lhs.end(); | |
273 | 2104 | auto end1 = rhs.end(); | |
274 | 2104 | char c0 = 0; | |
275 | 2104 | char c1 = 0; | |
276 | 1104 | while( | |
277 |
2/2✓ Branch 0 taken 2974 times.
✓ Branch 1 taken 234 times.
|
3208 | it0 < end0 && |
278 |
2/2✓ Branch 0 taken 2900 times.
✓ Branch 1 taken 74 times.
|
2974 | it1 < end1) |
279 | { | ||
280 | 2900 | consume_last(it0, end0, c0); | |
281 | 2900 | consume_last(it1, end1, c1); | |
282 |
2/2✓ Branch 0 taken 1796 times.
✓ Branch 1 taken 1104 times.
|
2900 | if (c0 != c1) |
283 | 1796 | return 0; | |
284 | } | ||
285 |
2/2✓ Branch 0 taken 110 times.
✓ Branch 1 taken 198 times.
|
308 | if (it1 == end1) |
286 | 110 | return lhs.end() - end0; | |
287 | 198 | return 0; | |
288 | } | ||
289 | |||
290 | std::size_t | ||
291 | 831 | remove_dot_segments( | |
292 | char* dest0, | ||
293 | char const* end, | ||
294 | core::string_view input) noexcept | ||
295 | { | ||
296 | // 1. The input buffer `s` is initialized with | ||
297 | // the now-appended path components and the | ||
298 | // output buffer `dest0` is initialized to | ||
299 | // the empty string. | ||
300 | 831 | char* dest = dest0; | |
301 | 831 | bool const is_absolute = input.starts_with('/'); | |
302 | |||
303 | // Step 2 is a loop through 5 production rules: | ||
304 | // https://www.rfc-editor.org/rfc/rfc3986#section-5.2.4 | ||
305 | // | ||
306 | // There are no transitions between all rules, | ||
307 | // which enables some optimizations. | ||
308 | // | ||
309 | // Initial: | ||
310 | // - Rule A: handle initial dots | ||
311 | // If the input buffer begins with a | ||
312 | // prefix of "../" or "./", then remove | ||
313 | // that prefix from the input buffer. | ||
314 | // Rule A can only happen at the beginning. | ||
315 | // Errata 4547: Keep "../" in the beginning | ||
316 | // https://www.rfc-editor.org/errata/eid4547 | ||
317 | // | ||
318 | // Then: | ||
319 | // - Rule D: ignore a final ".." or "." | ||
320 | // if the input buffer consists only of "." | ||
321 | // or "..", then remove that from the input | ||
322 | // buffer. | ||
323 | // Rule D can only happen after Rule A because: | ||
324 | // - B and C write "/" to the input | ||
325 | // - E writes "/" to input or returns | ||
326 | // | ||
327 | // Then: | ||
328 | // - Rule B: ignore ".": write "/" to the input | ||
329 | // - Rule C: apply "..": remove seg and write "/" | ||
330 | // - Rule E: copy complete segment | ||
331 | auto append = | ||
332 | 1491 | [](char*& first, char const* last, core::string_view in) | |
333 | { | ||
334 | // append `in` to `dest` | ||
335 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1491 times.
|
1491 | BOOST_ASSERT(in.size() <= std::size_t(last - first)); |
336 | 1491 | std::memmove(first, in.data(), in.size()); | |
337 | 1491 | first += in.size(); | |
338 | ignore_unused(last); | ||
339 | 1491 | }; | |
340 | |||
341 | 9555 | auto dot_starts_with = []( | |
342 | core::string_view str, core::string_view dots, std::size_t& n) | ||
343 | { | ||
344 | // starts_with for encoded/decoded dots | ||
345 | // or decoded otherwise. return how many | ||
346 | // chars in str match the dots | ||
347 | 9555 | n = 0; | |
348 |
2/2✓ Branch 2 taken 16356 times.
✓ Branch 3 taken 550 times.
|
16906 | for (char c: dots) |
349 | { | ||
350 |
2/2✓ Branch 1 taken 7351 times.
✓ Branch 2 taken 9005 times.
|
16356 | if (str.starts_with(c)) |
351 | { | ||
352 | 7351 | str.remove_prefix(1); | |
353 | 7351 | ++n; | |
354 | } | ||
355 | 9005 | else if (str.size() > 2 && | |
356 |
2/2✓ Branch 1 taken 24 times.
✓ Branch 2 taken 6219 times.
|
6243 | str[0] == '%' && |
357 |
5/6✓ Branch 0 taken 6243 times.
✓ Branch 1 taken 2762 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 9005 times.
|
15264 | str[1] == '2' && |
358 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | (str[2] == 'e' || |
359 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
|
16 | str[2] == 'E')) |
360 | { | ||
361 | ✗ | str.remove_prefix(3); | |
362 | ✗ | n += 3; | |
363 | } | ||
364 | else | ||
365 | { | ||
366 | 9005 | n = 0; | |
367 | 9005 | return false; | |
368 | } | ||
369 | } | ||
370 | 550 | return true; | |
371 | }; | ||
372 | |||
373 | 4773 | auto dot_equal = [&dot_starts_with]( | |
374 | 4773 | core::string_view str, core::string_view dots) | |
375 | { | ||
376 | 4773 | std::size_t n = 0; | |
377 | 4773 | dot_starts_with(str, dots, n); | |
378 | 4773 | return n == str.size(); | |
379 | 831 | }; | |
380 | |||
381 | // Rule A | ||
382 | std::size_t n; | ||
383 |
2/2✓ Branch 1 taken 766 times.
✓ Branch 2 taken 81 times.
|
847 | while (!input.empty()) |
384 | { | ||
385 |
2/2✓ Branch 2 taken 4 times.
✓ Branch 3 taken 762 times.
|
766 | if (dot_starts_with(input, "../", n)) |
386 | { | ||
387 | // Errata 4547 | ||
388 | 4 | append(dest, end, "../"); | |
389 | 4 | input.remove_prefix(n); | |
390 | 4 | continue; | |
391 | } | ||
392 |
2/2✓ Branch 2 taken 750 times.
✓ Branch 3 taken 12 times.
|
762 | else if (!dot_starts_with(input, "./", n)) |
393 | { | ||
394 | 750 | break; | |
395 | } | ||
396 | 12 | input.remove_prefix(n); | |
397 | } | ||
398 | |||
399 | // Rule D | ||
400 |
2/2✓ Branch 2 taken 82 times.
✓ Branch 3 taken 749 times.
|
831 | if( dot_equal(input, ".")) |
401 | { | ||
402 | 82 | input = {}; | |
403 | } | ||
404 |
2/2✓ Branch 2 taken 3 times.
✓ Branch 3 taken 746 times.
|
749 | else if( dot_equal(input, "..") ) |
405 | { | ||
406 | // Errata 4547 | ||
407 | 3 | append(dest, end, ".."); | |
408 | 3 | input = {}; | |
409 | } | ||
410 | |||
411 | // 2. While the input buffer is not empty, | ||
412 | // loop as follows: | ||
413 |
2/2✓ Branch 1 taken 1647 times.
✓ Branch 2 taken 792 times.
|
2439 | while (!input.empty()) |
414 | { | ||
415 | // Rule B | ||
416 | 1647 | bool const is_dot_seg = dot_starts_with(input, "/./", n); | |
417 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 1615 times.
|
1647 | if (is_dot_seg) |
418 | { | ||
419 | 32 | input.remove_prefix(n - 1); | |
420 | 32 | continue; | |
421 | } | ||
422 | |||
423 | 1615 | bool const is_final_dot_seg = dot_equal(input, "/."); | |
424 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1607 times.
|
1615 | if (is_final_dot_seg) |
425 | { | ||
426 | // We can't remove "." from a core::string_view | ||
427 | // So what we do here is equivalent to | ||
428 | // replacing s with '/' as required | ||
429 | // in Rule B and executing the next | ||
430 | // iteration, which would append this | ||
431 | // '/' to the output, as required by | ||
432 | // Rule E | ||
433 | 8 | append(dest, end, input.substr(0, 1)); | |
434 | 8 | input = {}; | |
435 | 8 | break; | |
436 | } | ||
437 | |||
438 | // Rule C | ||
439 | 1607 | bool const is_dotdot_seg = dot_starts_with(input, "/../", n); | |
440 |
2/2✓ Branch 0 taken 193 times.
✓ Branch 1 taken 1414 times.
|
1607 | if (is_dotdot_seg) |
441 | { | ||
442 | 193 | core::string_view cur_out(dest0, dest - dest0); | |
443 | 193 | std::size_t p = cur_out.find_last_of('/'); | |
444 | 193 | bool const has_multiple_segs = p != core::string_view::npos; | |
445 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 61 times.
|
193 | if (has_multiple_segs) |
446 | { | ||
447 | // output has multiple segments | ||
448 | // "erase" [p, end] if not "/.." | ||
449 | 132 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
450 | 132 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
451 |
2/2✓ Branch 0 taken 121 times.
✓ Branch 1 taken 11 times.
|
132 | if (!prev_is_dotdot_seg) |
452 | { | ||
453 | 121 | dest = dest0 + p; | |
454 | } | ||
455 | else | ||
456 | { | ||
457 | 11 | append(dest, end, "/.."); | |
458 | } | ||
459 | } | ||
460 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 50 times.
|
61 | else if (dest0 != dest) |
461 | { | ||
462 | // Only one segment in the output: remove it | ||
463 | 11 | core::string_view last_seg(dest0, dest - dest0); | |
464 | 11 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
465 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
|
11 | if (!prev_is_dotdot_seg) |
466 | { | ||
467 | 9 | dest = dest0; | |
468 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (!is_absolute) |
469 | { | ||
470 | 9 | input.remove_prefix(1); | |
471 | } | ||
472 | } | ||
473 | else | ||
474 | { | ||
475 | 2 | append(dest, end, "/.."); | |
476 | } | ||
477 | } | ||
478 | else | ||
479 | { | ||
480 | // Output is empty | ||
481 |
1/2✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
|
50 | if (is_absolute) |
482 | { | ||
483 | 50 | append(dest, end, "/.."); | |
484 | } | ||
485 | else | ||
486 | { | ||
487 | ✗ | append(dest, end, ".."); | |
488 | } | ||
489 | } | ||
490 | 193 | input.remove_prefix(n - 1); | |
491 | 193 | continue; | |
492 | } | ||
493 | |||
494 | 1414 | bool const is_final_dotdot_seg = dot_equal(input, "/.."); | |
495 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1383 times.
|
1414 | if (is_final_dotdot_seg) |
496 | { | ||
497 | 31 | core::string_view cur_out(dest0, dest - dest0); | |
498 | 31 | std::size_t p = cur_out.find_last_of('/'); | |
499 | 31 | bool const has_multiple_segs = p != core::string_view::npos; | |
500 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 13 times.
|
31 | if (has_multiple_segs) |
501 | { | ||
502 | // output has multiple segments | ||
503 | // "erase" [p, end] if not "/.." | ||
504 | 18 | core::string_view last_seg(dest0 + p, dest - (dest0 + p)); | |
505 | 18 | bool const prev_is_dotdot_seg = dot_equal(last_seg, "/.."); | |
506 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | if (!prev_is_dotdot_seg) |
507 | { | ||
508 | 14 | dest = dest0 + p; | |
509 | 14 | append(dest, end, "/"); | |
510 | } | ||
511 | else | ||
512 | { | ||
513 | 4 | append(dest, end, "/.."); | |
514 | } | ||
515 | } | ||
516 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
|
13 | else if (dest0 != dest) |
517 | { | ||
518 | // Only one segment in the output: remove it | ||
519 | 3 | core::string_view last_seg(dest0, dest - dest0); | |
520 | 3 | bool const prev_is_dotdot_seg = dot_equal(last_seg, ".."); | |
521 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | if (!prev_is_dotdot_seg) { |
522 | 1 | dest = dest0; | |
523 | } | ||
524 | else | ||
525 | { | ||
526 | 2 | append(dest, end, "/.."); | |
527 | } | ||
528 | } | ||
529 | else | ||
530 | { | ||
531 | // Output is empty: append dotdot | ||
532 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (is_absolute) |
533 | { | ||
534 | 10 | append(dest, end, "/.."); | |
535 | } | ||
536 | else | ||
537 | { | ||
538 | ✗ | append(dest, end, ".."); | |
539 | } | ||
540 | } | ||
541 | 31 | input = {}; | |
542 | 31 | break; | |
543 | } | ||
544 | |||
545 | // Rule E | ||
546 | 1383 | std::size_t p = input.find_first_of('/', 1); | |
547 |
2/2✓ Branch 0 taken 676 times.
✓ Branch 1 taken 707 times.
|
1383 | if (p != core::string_view::npos) |
548 | { | ||
549 | 676 | append(dest, end, input.substr(0, p)); | |
550 | 676 | input.remove_prefix(p); | |
551 | } | ||
552 | else | ||
553 | { | ||
554 | 707 | append(dest, end, input); | |
555 | 707 | input = {}; | |
556 | } | ||
557 | } | ||
558 | |||
559 | // 3. Finally, the output buffer is set | ||
560 | // as the result of remove_dot_segments, | ||
561 | // and we return its size | ||
562 | 831 | return dest - dest0; | |
563 | } | ||
564 | |||
565 | char | ||
566 | 1138 | path_pop_back( core::string_view& s ) | |
567 | { | ||
568 |
4/4✓ Branch 1 taken 518 times.
✓ Branch 2 taken 620 times.
✓ Branch 3 taken 1090 times.
✓ Branch 4 taken 48 times.
|
1656 | if (s.size() < 3 || |
569 |
3/4✓ Branch 2 taken 518 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 470 times.
✓ Branch 5 taken 48 times.
|
518 | *std::prev(s.end(), 3) != '%') |
570 | { | ||
571 | 1090 | char c = s.back(); | |
572 | 1090 | s.remove_suffix(1); | |
573 | 1090 | return c; | |
574 | } | ||
575 | 48 | char c = 0; | |
576 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
96 | detail::decode_unsafe( |
577 | 96 | &c, &c + 1, s.substr(s.size() - 3)); | |
578 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 4 times.
|
48 | if (c != '/') |
579 | { | ||
580 | 44 | s.remove_suffix(3); | |
581 | 44 | return c; | |
582 | } | ||
583 | 4 | c = s.back(); | |
584 | 4 | s.remove_suffix(1); | |
585 | 4 | return c; | |
586 | }; | ||
587 | |||
588 | void | ||
589 | 506 | pop_last_segment( | |
590 | core::string_view& s, | ||
591 | core::string_view& c, | ||
592 | std::size_t& level, | ||
593 | bool r) noexcept | ||
594 | { | ||
595 | 506 | c = {}; | |
596 | 506 | std::size_t n = 0; | |
597 |
2/2✓ Branch 1 taken 550 times.
✓ Branch 2 taken 118 times.
|
668 | while (!s.empty()) |
598 | { | ||
599 | // B. if the input buffer begins with a | ||
600 | // prefix of "/./" or "/.", where "." is | ||
601 | // a complete path segment, then replace | ||
602 | // that prefix with "/" in the input | ||
603 | // buffer; otherwise, | ||
604 | 550 | n = detail::path_ends_with(s, "/./"); | |
605 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 540 times.
|
550 | if (n) |
606 | { | ||
607 | 10 | c = s.substr(s.size() - n); | |
608 | 10 | s.remove_suffix(n); | |
609 | 10 | continue; | |
610 | } | ||
611 | 540 | n = detail::path_ends_with(s, "/."); | |
612 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 528 times.
|
540 | if (n) |
613 | { | ||
614 | 12 | c = s.substr(s.size() - n, 1); | |
615 | 12 | s.remove_suffix(n); | |
616 | 12 | continue; | |
617 | } | ||
618 | |||
619 | // C. if the input buffer begins with a | ||
620 | // prefix of "/../" or "/..", where ".." | ||
621 | // is a complete path segment, then | ||
622 | // replace that prefix with "/" in the | ||
623 | // input buffer and remove the last | ||
624 | // segment and its preceding "/" | ||
625 | // (if any) from the output buffer | ||
626 | // otherwise, | ||
627 | 528 | n = detail::path_ends_with(s, "/../"); | |
628 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 486 times.
|
528 | if (n) |
629 | { | ||
630 | 42 | c = s.substr(s.size() - n); | |
631 | 42 | s.remove_suffix(n); | |
632 | 42 | ++level; | |
633 | 42 | continue; | |
634 | } | ||
635 | 486 | n = detail::path_ends_with(s, "/.."); | |
636 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 440 times.
|
486 | if (n) |
637 | { | ||
638 | 46 | c = s.substr(s.size() - n); | |
639 | 46 | s.remove_suffix(n); | |
640 | 46 | ++level; | |
641 | 46 | continue; | |
642 | } | ||
643 | |||
644 | // E. move the first path segment in the | ||
645 | // input buffer to the end of the output | ||
646 | // buffer, including the initial "/" | ||
647 | // character (if any) and any subsequent | ||
648 | // characters up to, but not including, | ||
649 | // the next "/" character or the end of | ||
650 | // the input buffer. | ||
651 | 440 | std::size_t p = s.size() > 1 | |
652 |
2/2✓ Branch 0 taken 370 times.
✓ Branch 1 taken 70 times.
|
440 | ? s.find_last_of('/', s.size() - 2) |
653 | 440 | : core::string_view::npos; | |
654 |
2/2✓ Branch 0 taken 272 times.
✓ Branch 1 taken 168 times.
|
440 | if (p != core::string_view::npos) |
655 | { | ||
656 | 272 | c = s.substr(p + 1); | |
657 | 272 | s.remove_suffix(c.size()); | |
658 | } | ||
659 | else | ||
660 | { | ||
661 | 168 | c = s; | |
662 | 168 | s = {}; | |
663 | } | ||
664 | |||
665 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 52 times.
|
440 | if (level == 0) |
666 | 388 | return; | |
667 |
2/2✓ Branch 1 taken 42 times.
✓ Branch 2 taken 10 times.
|
52 | if (!s.empty()) |
668 | 42 | --level; | |
669 | } | ||
670 | // we still need to skip n_skip + 1 | ||
671 | // but the string is empty | ||
672 |
4/4✓ Branch 0 taken 42 times.
✓ Branch 1 taken 76 times.
✓ Branch 2 taken 34 times.
✓ Branch 3 taken 8 times.
|
118 | if (r && level) |
673 | { | ||
674 | 34 | c = "/"; | |
675 | 34 | level = 0; | |
676 | 34 | return; | |
677 | } | ||
678 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 80 times.
|
84 | else if (level) |
679 | { | ||
680 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
|
4 | if (c.empty()) |
681 | ✗ | c = "/.."; | |
682 | else | ||
683 | 4 | c = "/../"; | |
684 | 4 | --level; | |
685 | 4 | return; | |
686 | } | ||
687 | 80 | c = {}; | |
688 | } | ||
689 | |||
690 | void | ||
691 | 276 | normalized_path_digest( | |
692 | core::string_view s, | ||
693 | bool remove_unmatched, | ||
694 | fnv_1a& hasher) noexcept | ||
695 | { | ||
696 | 276 | core::string_view child; | |
697 | 276 | std::size_t level = 0; | |
698 | 230 | do | |
699 | { | ||
700 | 506 | pop_last_segment( | |
701 | s, child, level, remove_unmatched); | ||
702 |
2/2✓ Branch 1 taken 1138 times.
✓ Branch 2 taken 506 times.
|
1644 | while (!child.empty()) |
703 | { | ||
704 | 1138 | char c = path_pop_back(child); | |
705 | 1138 | hasher.put(c); | |
706 | } | ||
707 | } | ||
708 |
2/2✓ Branch 1 taken 230 times.
✓ Branch 2 taken 276 times.
|
506 | while (!s.empty()); |
709 | 276 | } | |
710 | |||
711 | // compare segments as if there were a normalized | ||
712 | int | ||
713 | 168 | segments_compare( | |
714 | segments_encoded_view seg0, | ||
715 | segments_encoded_view seg1) noexcept | ||
716 | { | ||
717 | // calculate path size as if it were normalized | ||
718 | auto normalized_size = | ||
719 | 336 | [](segments_encoded_view seg) -> std::size_t | |
720 | { | ||
721 |
2/2✓ Branch 1 taken 102 times.
✓ Branch 2 taken 234 times.
|
336 | if (seg.empty()) |
722 | 102 | return seg.is_absolute(); | |
723 | |||
724 | 234 | std::size_t n = 0; | |
725 | 234 | std::size_t skip = 0; | |
726 | 234 | auto begin = seg.begin(); | |
727 | 234 | auto it = seg.end(); | |
728 |
2/2✓ Branch 1 taken 658 times.
✓ Branch 2 taken 234 times.
|
892 | while (it != begin) |
729 | { | ||
730 | 658 | --it; | |
731 | 658 | decode_view dseg = **it; | |
732 |
2/2✓ Branch 1 taken 167 times.
✓ Branch 2 taken 491 times.
|
658 | if (dseg == "..") |
733 | 167 | ++skip; | |
734 |
2/2✓ Branch 1 taken 453 times.
✓ Branch 2 taken 38 times.
|
491 | else if (dseg != ".") |
735 | { | ||
736 |
2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 368 times.
|
453 | if (skip) |
737 | 85 | --skip; | |
738 | else | ||
739 | 368 | n += dseg.size() + 1; | |
740 | } | ||
741 | } | ||
742 | 234 | n += skip * 3; | |
743 | 234 | n -= !seg.is_absolute(); | |
744 | 234 | return n; | |
745 | }; | ||
746 | |||
747 | // find the normalized size for the comparison | ||
748 | 168 | std::size_t n0 = normalized_size(seg0); | |
749 | 168 | std::size_t n1 = normalized_size(seg1); | |
750 | 168 | std::size_t n00 = n0; | |
751 | 168 | std::size_t n10 = n1; | |
752 | |||
753 | // consume the last char from a segment range | ||
754 | auto consume_last = | ||
755 | 1632 | []( | |
756 | std::size_t& n, | ||
757 | decode_view& dseg, | ||
758 | segments_encoded_view::iterator& begin, | ||
759 | segments_encoded_view::iterator& it, | ||
760 | decode_view::iterator& cit, | ||
761 | std::size_t& skip, | ||
762 | bool& at_slash) -> char | ||
763 | { | ||
764 |
2/2✓ Branch 2 taken 1005 times.
✓ Branch 3 taken 627 times.
|
1632 | if (cit != dseg.begin()) |
765 | { | ||
766 | // return last char from current segment | ||
767 | 1005 | at_slash = false; | |
768 | 1005 | --cit; | |
769 | 1005 | --n; | |
770 | 1005 | return *cit; | |
771 | } | ||
772 | |||
773 |
2/2✓ Branch 0 taken 367 times.
✓ Branch 1 taken 260 times.
|
627 | if (!at_slash) |
774 | { | ||
775 | // current segment dseg is over and | ||
776 | // previous char was not a slash | ||
777 | // so we output one | ||
778 | 367 | at_slash = true; | |
779 | 367 | --n; | |
780 | 367 | return '/'; | |
781 | } | ||
782 | |||
783 | // current segment dseg is over and | ||
784 | // last char was already the slash | ||
785 | // between segments, so take the | ||
786 | // next final segment to consume | ||
787 | 260 | at_slash = false; | |
788 |
1/2✓ Branch 2 taken 498 times.
✗ Branch 3 not taken.
|
498 | while (cit == dseg.begin()) |
789 | { | ||
790 | // take next segment | ||
791 |
2/2✓ Branch 1 taken 376 times.
✓ Branch 2 taken 122 times.
|
498 | if (it != begin) |
792 | 376 | --it; | |
793 | else | ||
794 | 122 | break; | |
795 |
2/2✓ Branch 3 taken 140 times.
✓ Branch 4 taken 236 times.
|
376 | if (**it == "..") |
796 | { | ||
797 | // skip next if this is ".." | ||
798 | 140 | ++skip; | |
799 | } | ||
800 |
2/2✓ Branch 3 taken 208 times.
✓ Branch 4 taken 28 times.
|
236 | else if (**it != ".") |
801 | { | ||
802 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 138 times.
|
208 | if (skip) |
803 | { | ||
804 | // discount skips | ||
805 | 70 | --skip; | |
806 | } | ||
807 | else | ||
808 | { | ||
809 | // or update current seg | ||
810 | 138 | dseg = **it; | |
811 | 138 | cit = dseg.end(); | |
812 | 138 | break; | |
813 | } | ||
814 | } | ||
815 | } | ||
816 | // consume from the new current | ||
817 | // segment | ||
818 | 260 | --n; | |
819 |
2/2✓ Branch 2 taken 123 times.
✓ Branch 3 taken 137 times.
|
260 | if (cit != dseg.begin()) |
820 | { | ||
821 | // in the general case, we consume | ||
822 | // one more character from the end | ||
823 | 123 | --cit; | |
824 | 123 | return *cit; | |
825 | } | ||
826 | |||
827 | // nothing left to consume in the | ||
828 | // current and new segment | ||
829 |
2/2✓ Branch 1 taken 128 times.
✓ Branch 2 taken 9 times.
|
137 | if (it == begin) |
830 | { | ||
831 | // if this is the first | ||
832 | // segment, the segments are | ||
833 | // over and there can only | ||
834 | // be repetitions of "../" to | ||
835 | // output | ||
836 | 128 | return "/.."[n % 3]; | |
837 | } | ||
838 | // at other segments, we need | ||
839 | // a slash to transition to the | ||
840 | // next segment | ||
841 | 9 | at_slash = true; | |
842 | 9 | return '/'; | |
843 | }; | ||
844 | |||
845 | // consume final segments from seg0 that | ||
846 | // should not influence the comparison | ||
847 | 168 | auto begin0 = seg0.begin(); | |
848 | 168 | auto it0 = seg0.end(); | |
849 | 168 | decode_view dseg0; | |
850 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it0 != seg0.begin()) |
851 | { | ||
852 | 117 | --it0; | |
853 | 117 | dseg0 = **it0; | |
854 | } | ||
855 | 168 | decode_view::iterator cit0 = dseg0.end(); | |
856 | 168 | std::size_t skip0 = 0; | |
857 | 168 | bool at_slash0 = true; | |
858 |
2/2✓ Branch 0 taken 134 times.
✓ Branch 1 taken 168 times.
|
302 | while (n0 > n1) |
859 | { | ||
860 | 134 | consume_last(n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | |
861 | } | ||
862 | |||
863 | // consume final segments from seg1 that | ||
864 | // should not influence the comparison | ||
865 | 168 | auto begin1 = seg1.begin(); | |
866 | 168 | auto it1 = seg1.end(); | |
867 | 168 | decode_view dseg1; | |
868 |
2/2✓ Branch 2 taken 117 times.
✓ Branch 3 taken 51 times.
|
168 | if (it1 != seg1.begin()) |
869 | { | ||
870 | 117 | --it1; | |
871 | 117 | dseg1 = **it1; | |
872 | } | ||
873 | 168 | decode_view::iterator cit1 = dseg1.end(); | |
874 | 168 | std::size_t skip1 = 0; | |
875 | 168 | bool at_slash1 = true; | |
876 |
2/2✓ Branch 0 taken 34 times.
✓ Branch 1 taken 168 times.
|
202 | while (n1 > n0) |
877 | { | ||
878 | 34 | consume_last(n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | |
879 | } | ||
880 | |||
881 | 168 | int cmp = 0; | |
882 |
2/2✓ Branch 0 taken 732 times.
✓ Branch 1 taken 168 times.
|
900 | while (n0) |
883 | { | ||
884 | 732 | char c0 = consume_last( | |
885 | n0, dseg0, begin0, it0, cit0, skip0, at_slash0); | ||
886 | 732 | char c1 = consume_last( | |
887 | n1, dseg1, begin1, it1, cit1, skip1, at_slash1); | ||
888 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 696 times.
|
732 | if (c0 < c1) |
889 | 36 | cmp = -1; | |
890 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 655 times.
|
696 | else if (c1 < c0) |
891 | 41 | cmp = +1; | |
892 | } | ||
893 | |||
894 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 127 times.
|
168 | if (cmp != 0) |
895 | 41 | return cmp; | |
896 |
2/2✓ Branch 0 taken 125 times.
✓ Branch 1 taken 2 times.
|
127 | if ( n00 == n10 ) |
897 | 125 | return 0; | |
898 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if ( n00 < n10 ) |
899 | 1 | return -1; | |
900 | 1 | return 1; | |
901 | } | ||
902 | |||
903 | } // detail | ||
904 | } // urls | ||
905 | } // boost | ||
906 | |||
907 | |||
908 |