前面讲解的httpd_got_request函数是判断请求的格式是否正确,httpd_got_request函数则是对请求的数据进行分析。并初始化相关的数据进行返回请求时处理的依据,主要是获取用户的请求的方式,文件的路径,请求的协议的版本并根据此判断相关的数据是否正确并做对应的处理。
httpd_got_request函数
(1)httpd_got_request函数首先调用函数bufgets获取以回车符或者是间隔符间隔的一组数据的字符串,在这里的作用是获取请求行。
(2)经过处理获取用户请求的路径信息存储在url中。
(3)经过处理过去用户请求使用的协议信息存储在protocol中,如果协议信息为空则认为是HTTP0.9,不为空则为HTTP1.1及以后的版本。
(4)判断url的信息是否是以http://开始的对于,如果是以http://开始,但是HTTP的协议是0.9的,HTTP0.9是不支持这种写法返回400错误网页退出函数,如果此开头后面的数据后面是否存在'/'字符对于不存在此字符的返回400错误网页退出函数,对于如果此开头后面的数据后面的第一个字符为'.'字符的返回400错误网页退出函数;对于正确认证的返回设置hc->reqhost的值为url去掉http://头之后的值,设置url为hc->reqhost第一个'/'字符之后的值。
(5)判断url的字符串的第一个字符是否为'/'字符,对于不是此字符的返回40错误网页,并退出函数。
(6)判断用户的请求方式,存储在method_str中。
(7)将原始的未经过转换的url字符串url存储在hc->encodedurl中。
(8)将经过将url字符串url转换为ASCII字符串存储在hc->decodedurl中。
(9)设置原始的文件名hc->origfilename为hc->decodedurl的第一个字符之后的数据。
(10)如果hc->origfilename的第一个字符为空字符设置hc->origfilename的值为"."。
(11)如果原始的url字符串hc->encodedurl中存在‘?’字符,将此字符之后的数据存储在hc->query中,去除hc->origfilename字符串后面的‘?’后面的数据。
(12)调用de_dotdot函数分析hc->origfilename中的内容,将hc->origfilename中的"//"转换为"/",去除"./"和"/./",循环去除此后hc->origfilename中开头的"../",将"/../"转换为"/"等操作,去除hc->origfilename中无效的字符。
(13)判断hc->origfilename是否有效(首字符为'/'或者首字符为'.'且二字符为'.'三字符为空或者为'/'的认定为无效),对于无效的返回400错误并退出函数。
(14)对于是HTTP1.1或者之后的HTTP版本处理请求首部相关信息,获取相关信息并初始化对应的连接状态的相关信息。
(15)对于是HTTP1.1或者之后的HTTP版本对于hc->reqhost第一个字符为空字符且hc->hdrhost第一个字符为空字符的返回400错误并退出函数,对于设置hc->keep_alive的设置延迟关闭连接标志hc->should_linger。
(16)设置hc->expnfilename的值为hc->origfilename的值。
(17)对于hc->expnfilename的第一个字符为'~',对于定义TILDE_MAP_1的调用函数tilde_map_1返回值为0返回400错误退出函数,tilde_map_1函数返回值一直都是1,tilde_map_1函数的作用是将hc->expnfilename的值转换为TILDE_MAP_1/hc->expnfilename.对于定义TILDE_MAP_2的调用函数tilde_map_2对于获取用户登录信息失败或者是调用expand_symlinks函数的处理值rest的第一个参数不为0返回400错误退出函数。
(18)对于设置hc->hs->vhost的值为1,如果调用getsockname失败将会返回500错误退出函数。
(19)调用expand_symlinks函数如果返回值为0返回500错误退出函数。
(20)设置hc->pathinfo为经过expand_symlinks函数处理过的数据。
(21)对于hc->pathinfo的第一个值不为空,hc->pathinfo字符串的长度小于hc->origfilename字符串的长度且hc->origfilename字符hc->pathinfo字符串差值后面的值与hc->pathinfo的值相等设置hc->origfilenamec->origfilename字符hc->pathinfo字符串差值的值后面的字符为空字符。
(22)如果hc->expnfilename字符串的第一个字符为'\'设置hc->expnfilename字符串的值,返回0便是成功退出函数。
流程图
源程序
int httpd_parse_request( httpd_conn* hc ) { char* buf; char* method_str; char* url; char* protocol; char* reqhost; char* eol; char* cp; char* pi; hc->checked_idx = 0; /* reset */ method_str = bufgets( hc ); url = strpbrk( method_str, " \t\012\015" ); if ( url == (char*) 0 ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } *url++ = '\0'; url += strspn( url, " \t\012\015" ); protocol = strpbrk( url, " \t\012\015" ); if ( protocol == (char*) 0 ) { protocol = "HTTP/0.9"; hc->mime_flag = 0; } else { *protocol++ = '\0'; protocol += strspn( protocol, " \t\012\015" ); if ( *protocol != '\0' ) { eol = strpbrk( protocol, " \t\012\015" ); if ( eol != (char*) 0 ) { *eol = '\0'; } if ( strcasecmp( protocol, "HTTP/1.0" ) != 0 ) { hc->one_one = 1; } } } hc->protocol = protocol; /* Check for HTTP/1.1 absolute URL. */ if ( strncasecmp( url, "http://", 7 ) == 0 ) { if ( ! hc->one_one ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } reqhost = url + 7; url = strchr( reqhost, '/' ); if ( url == (char*) 0 ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } *url = '\0'; if ( strchr( reqhost, '/' ) != (char*) 0 || reqhost[0] == '.' ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } httpd_realloc_str( &hc->reqhost, &hc->maxreqhost, strlen( reqhost ) ); (void) strcpy( hc->reqhost, reqhost ); *url = '/'; } if ( *url != '/' ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } if ( strcasecmp( method_str, httpd_method_str( METHOD_GET ) ) == 0 ) { hc->method = METHOD_GET; } else if ( strcasecmp( method_str, httpd_method_str( METHOD_HEAD ) ) == 0 ) { hc->method = METHOD_HEAD; } else if ( strcasecmp( method_str, httpd_method_str( METHOD_POST ) ) == 0 ) { hc->method = METHOD_POST; } else if ( strcasecmp( method_str, httpd_method_str( METHOD_PUT ) ) == 0 ) { hc->method = METHOD_PUT; } else if ( strcasecmp( method_str, httpd_method_str( METHOD_DELETE ) ) == 0 ) { hc->method = METHOD_DELETE; } else if ( strcasecmp( method_str, httpd_method_str( METHOD_TRACE ) ) == 0 ) { hc->method = METHOD_TRACE; } else { httpd_send_err( hc, 501, err501title, "", err501form, method_str ); return -1; } hc->encodedurl = url; httpd_realloc_str(&hc->decodedurl, &hc->maxdecodedurl, strlen( hc->encodedurl ) ); strdecode( hc->decodedurl, hc->encodedurl ); httpd_realloc_str(&hc->origfilename, &hc->maxorigfilename, strlen( hc->decodedurl ) ); (void) strcpy( hc->origfilename, &hc->decodedurl[1] ); /* Special case for top-level URL. */ if ( hc->origfilename[0] == '\0' ) { (void) strcpy( hc->origfilename, "." ); } /* Extract query string from encoded URL. */ cp = strchr( hc->encodedurl, '?' ); if ( cp != (char*) 0 ) { ++cp; httpd_realloc_str( &hc->query, &hc->maxquery, strlen( cp ) ); (void) strcpy( hc->query, cp ); /* Remove query from (decoded) origfilename. */ cp = strchr( hc->origfilename, '?' ); if ( cp != (char*) 0 ) { *cp = '\0'; } } de_dotdot( hc->origfilename ); if ( hc->origfilename[0] == '/' ||( hc->origfilename[0] == '.' && hc->origfilename[1] == '.' &&( hc->origfilename[2] == '\0' || hc->origfilename[2] == '/' ) ) ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } if ( hc->mime_flag ) { /* Read the MIME headers. */ while ( ( buf = bufgets( hc ) ) != (char*) 0 ) { if ( buf[0] == '\0' ) { break; } if ( strncasecmp( buf, "Referer:", 8 ) == 0 ) { cp = &buf[8]; cp += strspn( cp, " \t" ); hc->referrer = cp; } else if ( strncasecmp( buf, "Referrer:", 9 ) == 0 ) { cp = &buf[9]; cp += strspn( cp, " \t" ); hc->referrer = cp; } else if ( strncasecmp( buf, "User-Agent:", 11 ) == 0 ) { cp = &buf[11]; cp += strspn( cp, " \t" ); hc->useragent = cp; } else if ( strncasecmp( buf, "Host:", 5 ) == 0 ) { cp = &buf[5]; cp += strspn( cp, " \t" ); hc->hdrhost = cp; cp = strchr( hc->hdrhost, ':' ); if ( cp != (char*) 0 ) { *cp = '\0'; } if ( strchr( hc->hdrhost, '/' ) != (char*) 0 || hc->hdrhost[0] == '.' ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } } else if ( strncasecmp( buf, "Accept:", 7 ) == 0 ) { cp = &buf[7]; cp += strspn( cp, " \t" ); if ( hc->accept[0] != '\0' ) { if ( strlen( hc->accept ) > 5000 ) { syslog(LOG_ERR, "%.80s way too much Accept: data",httpd_ntoa( &hc->client_addr ) ); continue; } httpd_realloc_str(&hc->accept, &hc->maxaccept,strlen( hc->accept ) + 2 + strlen( cp ) ); (void) strcat( hc->accept, ", " ); } else { httpd_realloc_str(&hc->accept, &hc->maxaccept, strlen( cp ) ); } (void) strcat( hc->accept, cp ); } else if ( strncasecmp( buf, "Accept-Encoding:", 16 ) == 0 ) { cp = &buf[16]; cp += strspn( cp, " \t" ); if ( hc->accepte[0] != '\0' ) { if ( strlen( hc->accepte ) > 5000 ) { syslog(LOG_ERR, "%.80s way too much Accept-Encoding: data",httpd_ntoa( &hc->client_addr ) ); continue; } httpd_realloc_str(&hc->accepte, &hc->maxaccepte,strlen( hc->accepte ) + 2 + strlen( cp ) ); (void) strcat( hc->accepte, ", " ); } else { httpd_realloc_str(&hc->accepte, &hc->maxaccepte, strlen( cp ) ); } (void) strcpy( hc->accepte, cp ); } else if ( strncasecmp( buf, "Accept-Language:", 16 ) == 0 ) { cp = &buf[16]; cp += strspn( cp, " \t" ); hc->acceptl = cp; } else if ( strncasecmp( buf, "If-Modified-Since:", 18 ) == 0 ) { cp = &buf[18]; hc->if_modified_since = tdate_parse( cp ); if ( hc->if_modified_since == (time_t) -1 ) { syslog( LOG_DEBUG, "unparsable time: %.80s", cp ); } } else if ( strncasecmp( buf, "Cookie:", 7 ) == 0 ) { cp = &buf[7]; cp += strspn( cp, " \t" ); hc->cookie = cp; } else if ( strncasecmp( buf, "Range:", 6 ) == 0 ) { /* Only support %d- and %d-%d, not %d-%d,%d-%d or -%d. */ if ( strchr( buf, ',' ) == (char*) 0 ) { char* cp_dash; cp = strpbrk( buf, "=" ); if ( cp != (char*) 0 ) { cp_dash = strchr( cp + 1, '-' ); if ( cp_dash != (char*) 0 && cp_dash != cp + 1 ) { *cp_dash = '\0'; hc->got_range = 1; hc->first_byte_index = atoll( cp + 1 ); if ( hc->first_byte_index < 0 ) { hc->first_byte_index = 0; } if ( isdigit( (int) cp_dash[1] ) ) { hc->last_byte_index = atoll( cp_dash + 1 ); if ( hc->last_byte_index < 0 ) { hc->last_byte_index = -1; } } } } } } else if ( strncasecmp( buf, "Range-If:", 9 ) == 0 ||strncasecmp( buf, "If-Range:", 9 ) == 0 ) { cp = &buf[9]; hc->range_if = tdate_parse( cp ); if ( hc->range_if == (time_t) -1 ) { syslog( LOG_DEBUG, "unparsable time: %.80s", cp ); } } else if ( strncasecmp( buf, "Content-Type:", 13 ) == 0 ) { cp = &buf[13]; cp += strspn( cp, " \t" ); hc->contenttype = cp; } else if ( strncasecmp( buf, "Content-Length:", 15 ) == 0 ) { cp = &buf[15]; hc->contentlength = atol( cp ); } /***/ else if ( strncasecmp( buf, "Authorization:", 14 ) == 0 ) { cp = &buf[14]; cp += strspn( cp, " \t" ); hc->authorization = cp; } else if ( strncasecmp( buf, "Connection:", 11 ) == 0 ) { cp = &buf[11]; cp += strspn( cp, " \t" ); if ( strcasecmp( cp, "keep-alive" ) == 0 ) { hc->keep_alive = 1; } } #ifdef LOG_UNKNOWN_HEADERS else if ( strncasecmp( buf, "Accept-Charset:", 15 ) == 0 || strncasecmp( buf, "Accept-Language:", 16 ) == 0 || strncasecmp( buf, "Agent:", 6 ) == 0 || strncasecmp( buf, "Cache-Control:", 14 ) == 0 || strncasecmp( buf, "Cache-Info:", 11 ) == 0 || strncasecmp( buf, "Charge-To:", 10 ) == 0 || strncasecmp( buf, "Client-IP:", 10 ) == 0 || strncasecmp( buf, "Date:", 5 ) == 0 || strncasecmp( buf, "Extension:", 10 ) == 0 || strncasecmp( buf, "Forwarded:", 10 ) == 0 || strncasecmp( buf, "From:", 5 ) == 0 || strncasecmp( buf, "HTTP-Version:", 13 ) == 0 || strncasecmp( buf, "Max-Forwards:", 13 ) == 0 || strncasecmp( buf, "Message-Id:", 11 ) == 0 || strncasecmp( buf, "MIME-Version:", 13 ) == 0 || strncasecmp( buf, "Negotiate:", 10 ) == 0 || strncasecmp( buf, "Pragma:", 7 ) == 0 || strncasecmp( buf, "Proxy-Agent:", 12 ) == 0 || strncasecmp( buf, "Proxy-Connection:", 17 ) == 0 || strncasecmp( buf, "Security-Scheme:", 16 ) == 0 || strncasecmp( buf, "Session-Id:", 11 ) == 0 || strncasecmp( buf, "UA-Color:", 9 ) == 0 || strncasecmp( buf, "UA-CPU:", 7 ) == 0 || strncasecmp( buf, "UA-Disp:", 8 ) == 0 || strncasecmp( buf, "UA-OS:", 6 ) == 0 || strncasecmp( buf, "UA-Pixels:", 10 ) == 0 || strncasecmp( buf, "User:", 5 ) == 0 || strncasecmp( buf, "Via:", 4 ) == 0 || strncasecmp( buf, "X-", 2 ) == 0 ) { ; /* ignore */ } else { syslog( LOG_DEBUG, "unknown request header: %.80s", buf ); } #endif /* LOG_UNKNOWN_HEADERS */ } } if ( hc->one_one ) { /* Check that HTTP/1.1 requests specify a host, as required. */ if ( hc->reqhost[0] == '\0' && hc->hdrhost[0] == '\0' ) { httpd_send_err( hc, 400, httpd_err400title, "", httpd_err400form, "" ); return -1; } /* If the client wants to do keep-alives, it might also be doing ** pipelining. There's no way for us to tell. Since we don't ** implement keep-alives yet, if we close such a connection there ** might be unread pipelined requests waiting. So, we have to ** do a lingering close. */ if ( hc->keep_alive ) { hc->should_linger = 1; } } /* Ok, the request has been parsed. Now we resolve stuff that ** may require the entire request. */ /* Copy original filename to expanded filename. */ httpd_realloc_str(&hc->expnfilename, &hc->maxexpnfilename, strlen( hc->origfilename ) ); (void) strcpy( hc->expnfilename, hc->origfilename ); /* Tilde mapping. */ if ( hc->expnfilename[0] == '~' ) { #ifdef TILDE_MAP_1 if ( ! tilde_map_1( hc ) ) { httpd_send_err( hc, 404, err404title, "", err404form, hc->encodedurl ); return -1; } #endif /* TILDE_MAP_1 */ #ifdef TILDE_MAP_2 if ( ! tilde_map_2( hc ) ) { httpd_send_err( hc, 404, err404title, "", err404form, hc->encodedurl ); return -1; } #endif /* TILDE_MAP_2 */ } /* Virtual host mapping. */ if ( hc->hs->vhost ) { if ( ! vhost_map( hc ) ) { httpd_send_err( hc, 500, err500title, "", err500form, hc->encodedurl ); return -1; } } /* Expand all symbolic links in the filename. This also gives us ** any trailing non-existing components, for pathinfo. */ /***/ cp = expand_symlinks( hc->expnfilename, &pi, hc->hs->no_symlink_check, hc->tildemapped ); if ( cp == (char*) 0 ) { httpd_send_err( hc, 500, err500title, "", err500form, hc->encodedurl ); return -1; } httpd_realloc_str( &hc->expnfilename, &hc->maxexpnfilename, strlen( cp ) ); (void) strcpy( hc->expnfilename, cp ); httpd_realloc_str( &hc->pathinfo, &hc->maxpathinfo, strlen( pi ) ); (void) strcpy( hc->pathinfo, pi ); /* Remove pathinfo stuff from the original filename too. */ if ( hc->pathinfo[0] != '\0' ) { int i; i = strlen( hc->origfilename ) - strlen( hc->pathinfo ); if ( i > 0 && strcmp( &hc->origfilename[i], hc->pathinfo ) == 0 ) { hc->origfilename[i - 1] = '\0'; } } /* If the expanded filename is an absolute path, check that it's still ** within the current directory or the alternate directory. */ if ( hc->expnfilename[0] == '/' ) { if ( strncmp(hc->expnfilename, hc->hs->cwd, strlen( hc->hs->cwd ) ) == 0 ) { /* Elide the current directory. */ (void) ol_strcpy(hc->expnfilename, &hc->expnfilename[strlen( hc->hs->cwd )] ); } #ifdef TILDE_MAP_2 else if ( hc->altdir[0] != '\0' && ( strncmp( hc->expnfilename, hc->altdir, strlen( hc->altdir ) ) == 0 && ( hc->expnfilename[strlen( hc->altdir )] == '\0' || hc->expnfilename[strlen( hc->altdir )] == '/' ) ) ) { } #endif /* TILDE_MAP_2 */ else { syslog( LOG_NOTICE, "%.80s URL \"%.80s\" goes outside the web tree", httpd_ntoa( &hc->client_addr ), hc->encodedurl ); httpd_send_err( hc, 403, err403title, "", ERROR_FORM( err403form, "The requested URL '%.80s' resolves to a file outside the permitted web server directory tree.\n" ), hc->encodedurl ); return -1; } } return 0; }