some helpers for parsing query strings

feature/UrlUtils
Kenneth Barbour 2018-10-11 18:22:59 -04:00 committed by Kenneth Barbour
parent 6f3fcc7932
commit c0e91aff7e
3 changed files with 167 additions and 0 deletions

62
src/urlenc.cpp 100644
View File

@ -0,0 +1,62 @@
#include "urlenc.h"
int urlenc::decoded_length(const char* str)
{
int i, n;
for (i = n = 0; str[i] != '\0'; i++)
if (str[i] == '%') n++;
if (str[i-1] == '%' || str[i-2] == '%') n--; // ignore % at the end
return i - 2*n;
}
int urlenc::decode(const char * encoded, char* buff, unsigned int len)
{
int i,j;
for (i = j = 0; encoded[i] != '\0' && j < len-1; i++) {
if (encoded[i] == '%' && encoded[i+1] != '\0' && encoded[i+2] != '\0') {
buff[j++] = urlenc::enctochar(encoded+i);
i += 2;
} else {
buff[j++] = encoded[i];
}
}
buff[j++] = '\0';
return j;
}
char urlenc::enctochar(const char * encoded)
{
if (encoded[0] != '%') return encoded[0]; // stupid check
if (encoded[1] == '\0' || encoded[2] == '\0') return '\0'; // hacker check
return 16 * hexval(encoded[1]) + hexval(encoded[2]);
}
char urlenc::hexval(char val)
{
val -= 48;
if (val > 9) val -= 7;
if (val > 15) val -= 32;
return val % 16;
}
int urlenc::query_key_pos(const char * query, const char * key)
{
int i = 0, j = 0;
char q, k;
if (key[0] == '\0') return -1;
do {
q = query[i+j];
k = key[j];
if (k == '\0' && q == '=')
return i;
if (q == k) {
j++;
} else i++;
} while (q != '\0');
return -1;
}

10
src/urlenc.h 100644
View File

@ -0,0 +1,10 @@
namespace urlenc
{
int decoded_length(const char*);
int decode(const char *, char*, unsigned int);
char enctochar(const char *);
char hexval(char);
int query_key_pos(const char *, const char *);
}

View File

@ -0,0 +1,95 @@
#include "catch.hpp"
#include "urlenc.h"
#include <iostream>
using namespace urlenc;
using Catch::Matchers::Equals;
TEST_CASE("Convert some encodings to characters","[urlenc][enctochar]")
{
CHECK(enctochar("%20") == ' ');
CHECK(enctochar("%21") == '!');
CHECK(enctochar("%2B") == '+');
}
TEST_CASE("Stop decoding after 3 characters","[urlenc][enctochar]")
{
CHECK(enctochar("%20foobarbaz") == ' ');
}
TEST_CASE("Replace all the %__ encodings with the proper character","[urlenc][decode]")
{
char buff[20] = {};
char lil[4] = {};
CHECK(decode("foo=bar%20baz", buff, 12) == 12);
CHECK_THAT(buff, Equals("foo=bar baz"));
// Long string
CHECK(decode("AAAABBBBCCCCDDDDEEEEFFFF", buff, 12) == 12);
CHECK_THAT(buff, Equals("AAAABBBBCCC"));
// Long string with encodings
CHECK(decode("AAAA%20BBBB%20CCCC", buff, 12) == 12);
CHECK_THAT(buff, Equals("AAAA BBBB C"));
// This would be a malformed string, but this is my expected behavior
CHECK(decode("hack%", buff, 12) == 6);
CHECK_THAT(buff, Equals("hack%"));
CHECK(decode("hack%r", buff, 12) == 7);
CHECK_THAT(buff, Equals("hack%r"));
// Another overrun Test
CHECK(decode("AA%", buff, 4) == 4);
CHECK_THAT(buff, Equals("AA%"));
}
TEST_CASE("Test the hexval helper function","[urlenc][hexval]")
{
CHECK(hexval('0') == 0);
CHECK(hexval('1') == 1);
CHECK(hexval('2') == 2);
CHECK(hexval('3') == 3);
CHECK(hexval('4') == 4);
CHECK(hexval('5') == 5);
CHECK(hexval('6') == 6);
CHECK(hexval('7') == 7);
CHECK(hexval('8') == 8);
CHECK(hexval('9') == 9);
CHECK(hexval('A') == 10);
CHECK(hexval('B') == 11);
CHECK(hexval('C') == 12);
CHECK(hexval('D') == 13);
CHECK(hexval('E') == 14);
CHECK(hexval('F') == 15);
CHECK(hexval('a') == 10);
CHECK(hexval('b') == 11);
CHECK(hexval('c') == 12);
CHECK(hexval('d') == 13);
CHECK(hexval('e') == 14);
CHECK(hexval('f') == 15);
// undocumented behavior if char is not a hex
}
TEST_CASE("Check the length of a decoded string","[urlenc][decoded_length]")
{
CHECK(decoded_length("foo%20bar%21baz%22") == 12);
CHECK(decoded_length("%20bar%21baz%22") == 9);
CHECK(decoded_length("bla?bla=bla&blabla") == 18);
CHECK(decoded_length("foo%") == 4);
CHECK(decoded_length("AAA%A") == 5);
}
TEST_CASE("Locate position of a key/value pair in the query component","[urlenc][query_key_pos]")
{
CHECK(query_key_pos("foo=1&bar=2&baz=3","bar") == 6);
CHECK(query_key_pos("foo=1&bar=2&baz=3","foo") == 0);
CHECK(query_key_pos("foo=1&bar=2&baz=3","qux") == -1);
CHECK(query_key_pos("foo=1&bar=&baz=3","baz") == 11);
CHECK(query_key_pos("j=10&bar=&baz=3","bar") == 5);
CHECK(query_key_pos("","") == -1);
CHECK(query_key_pos("","bar") == -1);
CHECK(query_key_pos("foo=bar&baz=qux","") == -1);
}