#include <stdlib.h>
#include <vector>
#include <string>
#include <string.h>
// A class to incrementally parse an HTTP header as it comes in. It
// lets you know when it has received all required bytes, as specified
// by the content-length header (if present). If there is no content-length,
// it will stop reading after the final "\n\r".
//
// Example usage:
//
// HttpParser parser;
// HttpParser::status_t status;
//
// for( ;; ) {
// // read bytes from socket into buffer, break on error
// status = parser.addBytes( buffer, length );
// if ( status != HttpParser::Incomplete ) break;
// }
//
// if ( status == HttpParser::Done ) {
// // parse fully formed http message.
// }
class HttpParser
{
public:
HttpParser();
~HttpParser();
enum status_t {
Done,
Error,
Incomplete
};
status_t addBytes( const char* bytes, unsigned len );
const char* getMethod();
const char* getUri();
const char* getQueryString();
const char* getBody();
// key should be in lower case when looking up.
const char* getValue( const char* key );
unsigned getContentLength();
private:
void parseHeader();
bool parseRequestLine();
std::string _data;
unsigned _headerStart;
unsigned _bodyStart;
unsigned _parsedTo;
int _state;
unsigned _keyIndex;
unsigned _valueIndex;
unsigned _contentLength;
unsigned _contentStart;
unsigned _uriIndex;
typedef std::vector<unsigned> IntArray;
IntArray _keys;
enum State {
p_request_line=0,
p_request_line_cr=1,
p_request_line_crlf=2,
p_request_line_crlfcr=3,
p_key=4,
p_key_colon=5,
p_key_colon_sp=6,
p_value=7,
p_value_cr=8,
p_value_crlf=9,
p_value_crlfcr=10,
p_content=11, // here we are done parsing the header.
p_error=12 // here an error has occurred and the parse failed.
};
status_t _status;
};
HttpParser::HttpParser() :
_headerStart(0),
_bodyStart(0),
_status( Incomplete ),
_state( 0 ),
_parsedTo( 0 ),
_keyIndex(0),
_valueIndex(0),
_contentLength(0),
_contentStart(0),
_uriIndex(0)
{
}
HttpParser::~HttpParser()
{
}
void
HttpParser::parseHeader()
{
// run the fsm.
const int CR = 13;
const int LF = 10;
const int ANY = 256;
enum Action {
// make lower case
LOWER = 0x1,
// convert current character to null.
NULLIFY = 0x2,
// set the header index to the current position
SET_HEADER_START = 0x4,
// set the key index to the current position
SET_KEY = 0x8,
// set value index to the current position.
SET_VALUE = 0x10,
// store current key/value pair.
STORE_KEY_VALUE = 0x20,
// sets content start to current position + 1
SET_CONTENT_START = 0x40
};
static const struct FSM {
State curState;
int c;
State nextState;
unsigned actions;
} fsm[] = {
{ p_request_line, CR, p_request_line_cr, NULLIFY },
{ p_request_line, ANY, p_request_line, 0 },
{ p_request_line_cr, LF, p_request_line_crlf, 0 },
{ p_request_line_crlf, CR, p_request_line_crlfcr, 0 },
{ p_request_line_crlf, ANY, p_key, SET_HEADER_START | SET_KEY | LOWER },
{ p_request_line_crlfcr, LF, p_content, SET_CONTENT_START },
{ p_key, ':', p_key_colon, NULLIFY },
{ p_key, ANY, p_key, LOWER },
{ p_key_colon, ' ', p_key_colon_sp, 0 },
{ p_key_colon_sp, ANY, p_value, SET_VALUE },
{ p_value, CR, p_value_cr, NULLIFY | STORE_KEY_VALUE },
{ p_value, ANY, p_value, 0 },
{ p_value_cr, LF, p_value_crlf, 0 },
{ p_value_crlf, CR, p_value_crlfcr, 0 },
{ p_value_crlf, ANY, p_key, SET_KEY | LOWER },
{ p_value_crlfcr, LF, p_content, SET_CONTENT_START },
{ p_error, ANY, p_error, 0 }
};
for( unsigned i = _parsedTo; i < _data.length(); ++i) {
char c = _data[i];
State nextState = p_error;
for ( unsigned d = 0; d < sizeof(fsm) / sizeof(FSM); ++d ) {
if ( fsm[d].curState == _state &&
( c == fsm[d].c || fsm[d].c == ANY ) ) {
nextState = fsm[d].nextState;
if ( fsm[d].actions & LOWER ) {
_data[i] = tolower( _data[i] );
}
if ( fsm[d].actions & NULLIFY ) {
_data[i] = 0;
}
if ( fsm[d].actions & SET_HEADER_START ) {
_headerStart = i;
}
if ( fsm[d].actions & SET_KEY ) {
_keyIndex = i;
}
if ( fsm[d].actions & SET_VALUE ) {
_valueIndex = i;
}
if ( fsm[d].actions & SET_CONTENT_START ) {
_contentStart = i + 1;
}
if ( fsm[d].actions & STORE_KEY_VALUE ) {
// store position of first character of key.
_keys.push_back( _keyIndex );
}
break;
}
}
_state = nextState;
if ( _state == p_content ) {
const char* str = getValue("content-length");
if ( str ) {
_contentLength = atoi( str );
}
break;
}
}
_parsedTo = _data.length();
}
bool
HttpParser::parseRequestLine()
{
size_t sp1;
size_t sp2;
sp1 = _data.find( ' ', 0 );
if ( sp1 == std::string::npos ) return false;
sp2 = _data.find( ' ', sp1 + 1 );
if ( sp2 == std::string::npos ) return false;
_data[sp1] = 0;
_data[sp2] = 0;
_uriIndex = sp1 + 1;
return true;
}
HttpParser::status_t
HttpParser::addBytes( const char* bytes, unsigned len )
{
if ( _status != Incomplete ) {
return _status;
}
// append the bytes to data.
_data.append( bytes, len );
if ( _state < p_content ) {
parseHeader();
}
if ( _state == p_error ) {
_status = Error;
} else if ( _state == p_content ) {
if ( _contentLength == 0 || _data.length() - _contentStart >= _contentLength ) {
if ( parseRequestLine() ) {
_status = Done;
} else {
_status = Error;
}
}
}
return _status;
}
const char*
HttpParser::getMethod()
{
return &_data[0];
}
const char*
HttpParser::getUri()
{
return &_data[_uriIndex];
}
const char*
HttpParser::getQueryString()
{
const char* pos = getUri();
while( *pos ) {
if ( *pos == '?' ) {
pos++;
break;
}
pos++;
}
return pos;
}
const char*
HttpParser::getBody()
{
if ( _contentLength > 0 ) {
return &_data[_contentStart];
} else {
return NULL;
}
}
// key should be in lower case.
const char*
HttpParser::getValue( const char* key )
{
for( IntArray::iterator iter = _keys.begin();
iter != _keys.end(); ++iter )
{
unsigned index = *iter;
if ( strcmp( &_data[index], key ) == 0 ) {
return &_data[index + strlen(key) + 2];
}
}
return NULL;
}
unsigned
HttpParser::getContentLength()
{
return _contentLength;
}
Refactorings
No refactoring yet !
Michael Lucas-Smith
March 12, 2009, March 12, 2009 06:38, permalink
The basic premise of the problem is incorrect. In HTTP/1.0 the connection will close once the communique is complete. You can assume the entire content body is of the content-type specified in the HTTP headers.
In HTTP/1.1, they added the ability to have persistent connections. There are only a few combinations, one involving the Content-Length tells you exactly how many bytes there are, as well as Chunked which tells you have many bytes are to be read next until it eventually says 0, or you can still have HTTP/1.0 communications which requires the connection to close. There is no such thing as an ambiguous body length in HTTP, because it follow the MIME specification.
Alex Chacha
September 11, 2009, September 11, 2009 22:32, permalink
You forgot that HTTP header is a multimap structure and you may have one name point to a list of values. For example (Cookie lines tend to not get split but can be if they are too long and up to the browser):
GET / HTTP/1.1
Host: 127.0.0.1
Cookie: foo=1; bar=2;
Cookie: baz=3;
User-Agent: keyboardandmouse
See http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4 Section 4.2
mammad
November 30, 2011, November 30, 2011 23:55, permalink
hello
i need your code for Disigne a web Server.
can you help me?
mammad
November 30, 2011, November 30, 2011 23:55, permalink
help
hello i need your code for Disigne a web Server. can you help me?
I'm writing a web server, and I'm particularly proud of this HTTP parser. One difficulty with HTTP is the web server doesn't know when to stop reading the HTTP request from the socket until it receives the content-length value. This class solves that problem. You keep feeding it bytes until it returns the HttpParser::Done or HttpParser::Error status.
It also breaks the header up into key-values pairs in-place, so the whole header and data is kept in one string. When you request a value, it just returns a pointer to the value.