D41d8cd98f00b204e9800998ecf8427e

I'm writing a web server, and I'm particularly proud of this HTTP parser. One difficulty with HTTP is the web server doesn't know when to stop reading the HTTP request from the socket until it receives the content-length value. This class solves that problem. You keep feeding it bytes until it returns the HttpParser::Done or HttpParser::Error status.

It also breaks the header up into key-values pairs in-place, so the whole header and data is kept in one string. When you request a value, it just returns a pointer to the value.

#include <stdlib.h>
#include <vector>
#include <string>
#include <string.h>

// A class to incrementally parse an HTTP header as it comes in. It 
// lets you know when it has received all required bytes, as specified 
// by the content-length header (if present). If there is no content-length,
// it will stop reading after the final "\n\r".
//
// Example usage:
// 
//    HttpParser parser;
//    HttpParser::status_t status;
//
//    for( ;; ) {
//        // read bytes from socket into buffer, break on error
//        status = parser.addBytes( buffer, length );
//        if ( status != HttpParser::Incomplete ) break;
//    }
//
//    if ( status == HttpParser::Done ) {
//        // parse fully formed http message.
//    }


class HttpParser
{
public:
    HttpParser();
    ~HttpParser();

    enum status_t {
        Done,
        Error,
        Incomplete
    };

    status_t addBytes( const char* bytes, unsigned len );

    const char* getMethod();
    const char* getUri();
    const char* getQueryString();
    const char* getBody();
    // key should be in lower case when looking up.
    const char* getValue( const char* key );
    unsigned getContentLength();

private:
    void parseHeader();
    bool parseRequestLine();

    std::string _data;
    unsigned _headerStart;
    unsigned _bodyStart;
    unsigned _parsedTo;
    int _state;
    unsigned _keyIndex;
    unsigned _valueIndex;
    unsigned _contentLength;
    unsigned _contentStart;
    unsigned _uriIndex;
    
    typedef std::vector<unsigned> IntArray;
    IntArray _keys;

    enum State {
        p_request_line=0,
        p_request_line_cr=1,
        p_request_line_crlf=2,
        p_request_line_crlfcr=3,
        p_key=4,
        p_key_colon=5,
        p_key_colon_sp=6,
        p_value=7,
        p_value_cr=8,
        p_value_crlf=9,
        p_value_crlfcr=10,
        p_content=11, // here we are done parsing the header.
        p_error=12 // here an error has occurred and the parse failed.
    };

    status_t _status;
};

HttpParser::HttpParser() :
    _headerStart(0),
    _bodyStart(0),
    _status( Incomplete ),
    _state( 0 ),
    _parsedTo( 0 ),
    _keyIndex(0),
    _valueIndex(0),
    _contentLength(0),
    _contentStart(0),
    _uriIndex(0)
{

}

HttpParser::~HttpParser()
{

}

void
HttpParser::parseHeader()
{
    // run the fsm.
    const int  CR = 13;
    const int  LF = 10;
    const int  ANY = 256;

    enum Action {
        // make lower case
        LOWER = 0x1,

        // convert current character to null.
        NULLIFY = 0x2,

        // set the header index to the current position
        SET_HEADER_START = 0x4,

        // set the key index to the current position
        SET_KEY = 0x8,

        // set value index to the current position.
        SET_VALUE = 0x10,

        // store current key/value pair.
        STORE_KEY_VALUE = 0x20,

        // sets content start to current position + 1
        SET_CONTENT_START = 0x40
    };

    static const struct FSM {
        State curState;
        int c;
        State nextState;
        unsigned actions;
    } fsm[] = {
        { p_request_line,         CR, p_request_line_cr,     NULLIFY                            },
        { p_request_line,        ANY, p_request_line,        0                                  },
        { p_request_line_cr,      LF, p_request_line_crlf,   0                                  },
        { p_request_line_crlf,    CR, p_request_line_crlfcr, 0                                  },
        { p_request_line_crlf,   ANY, p_key,                 SET_HEADER_START | SET_KEY | LOWER },
        { p_request_line_crlfcr,  LF, p_content,             SET_CONTENT_START                  },
        { p_key,                 ':', p_key_colon,           NULLIFY                            },
        { p_key,                 ANY, p_key,                 LOWER                              },
        { p_key_colon,           ' ', p_key_colon_sp,        0                                  },
        { p_key_colon_sp,        ANY, p_value,               SET_VALUE                          },
        { p_value,                CR, p_value_cr,            NULLIFY | STORE_KEY_VALUE          },
        { p_value,               ANY, p_value,               0                                  },
        { p_value_cr,             LF, p_value_crlf,          0                                  },
        { p_value_crlf,           CR, p_value_crlfcr,        0                                  },
        { p_value_crlf,          ANY, p_key,                 SET_KEY | LOWER                    },
        { p_value_crlfcr,         LF, p_content,             SET_CONTENT_START                  },
        { p_error,               ANY, p_error,               0                                  }
    };

    for( unsigned i = _parsedTo; i < _data.length(); ++i) {
        char c = _data[i];
        State nextState = p_error;

        for ( unsigned d = 0; d < sizeof(fsm) / sizeof(FSM); ++d ) {
            if ( fsm[d].curState == _state && 
                    ( c == fsm[d].c || fsm[d].c == ANY ) ) {

                nextState = fsm[d].nextState;

                if ( fsm[d].actions & LOWER ) {
                    _data[i] = tolower( _data[i] );
                }

                if ( fsm[d].actions & NULLIFY ) {
                    _data[i] = 0;
                }

                if ( fsm[d].actions & SET_HEADER_START ) {
                    _headerStart = i;
                }

                if ( fsm[d].actions & SET_KEY ) {
                    _keyIndex = i;
                }

                if ( fsm[d].actions & SET_VALUE ) {
                    _valueIndex = i;
                }

                if ( fsm[d].actions & SET_CONTENT_START ) {
                    _contentStart = i + 1;
                }

                if ( fsm[d].actions & STORE_KEY_VALUE ) {
                    // store position of first character of key.
                    _keys.push_back( _keyIndex );
                }

                break;
            }
        }

        _state = nextState;

        if ( _state == p_content ) {
            const char* str = getValue("content-length");
            if ( str ) {
                _contentLength = atoi( str );
            }
            break;
        }
    }

    _parsedTo = _data.length();

}

bool
HttpParser::parseRequestLine()
{
    size_t sp1;
    size_t sp2;

    sp1 = _data.find( ' ', 0 );
    if ( sp1 == std::string::npos ) return false;
    sp2 = _data.find( ' ', sp1 + 1 );
    if ( sp2 == std::string::npos ) return false;

    _data[sp1] = 0;
    _data[sp2] = 0;
    _uriIndex = sp1 + 1;
    return true;
}

HttpParser::status_t
HttpParser::addBytes( const char* bytes, unsigned len )
{
    if ( _status != Incomplete ) {
        return _status;
    }

    // append the bytes to data.
    _data.append( bytes, len );

    if ( _state < p_content ) {
        parseHeader();
    }

    if ( _state == p_error ) {
        _status = Error;
    } else if ( _state == p_content ) {
        if ( _contentLength == 0 || _data.length() - _contentStart >= _contentLength ) {
            if ( parseRequestLine() ) {
                _status = Done;
            } else {
                _status = Error;
            }
        }
    }

    return _status;
}

const char*
HttpParser::getMethod()
{
    return &_data[0];
}

const char*
HttpParser::getUri()
{
    return &_data[_uriIndex];
}

const char*
HttpParser::getQueryString()
{
    const char* pos = getUri();
    while( *pos ) {
        if ( *pos == '?' ) {
            pos++;
            break;
        }
        pos++;
    }
    return pos;
}

const char* 
HttpParser::getBody()
{
    if ( _contentLength > 0 ) {
        return &_data[_contentStart];
    } else  {
        return NULL;
    }
}

// key should be in lower case.
const char* 
HttpParser::getValue( const char* key )
{
    for( IntArray::iterator iter = _keys.begin();
            iter != _keys.end(); ++iter  )
    {
        unsigned index = *iter;
        if ( strcmp( &_data[index], key ) == 0 ) {
            return &_data[index + strlen(key) + 2];
        }

    }

    return NULL;
}

unsigned
HttpParser::getContentLength()
{
    return _contentLength;
}

Refactorings

No refactoring yet !

3cee020cead3f7a92fcaeef7be8cd99b

Michael Lucas-Smith

March 12, 2009, March 12, 2009 06:38, permalink

1 rating. Login to rate!

The basic premise of the problem is incorrect. In HTTP/1.0 the connection will close once the communique is complete. You can assume the entire content body is of the content-type specified in the HTTP headers.
In HTTP/1.1, they added the ability to have persistent connections. There are only a few combinations, one involving the Content-Length tells you exactly how many bytes there are, as well as Chunked which tells you have many bytes are to be read next until it eventually says 0, or you can still have HTTP/1.0 communications which requires the connection to close. There is no such thing as an ambiguous body length in HTTP, because it follow the MIME specification.

0280f87374f967d2da5d456408d6be52

Alex Chacha

September 11, 2009, September 11, 2009 22:32, permalink

2 ratings. Login to rate!

You forgot that HTTP header is a multimap structure and you may have one name point to a list of values. For example (Cookie lines tend to not get split but can be if they are too long and up to the browser):

GET / HTTP/1.1
Host: 127.0.0.1
Cookie: foo=1; bar=2;
Cookie: baz=3;
User-Agent: keyboardandmouse

See http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4 Section 4.2

598c2d80efbe336bffa7019f34e76bf1

mammad

November 30, 2011, November 30, 2011 23:55, permalink

No rating. Login to rate!

hello
i need your code for Disigne a web Server.
can you help me?

598c2d80efbe336bffa7019f34e76bf1

mammad

November 30, 2011, November 30, 2011 23:55, permalink

No rating. Login to rate!

help

hello
i need your code for Disigne a web Server.
can you help me?

Your refactoring





Format Copy from initial code

or Cancel