diff options
| -rw-r--r-- | CHANGES | 2 | ||||
| -rw-r--r-- | Makefile.am | 2 | ||||
| -rw-r--r-- | NOTICE | 6 | ||||
| -rw-r--r-- | THANKS | 2 | ||||
| -rw-r--r-- | bin/Makefile.am | 5 | ||||
| -rw-r--r-- | bin/couchdb.tpl.in | 5 | ||||
| -rw-r--r-- | configure.ac | 1 | ||||
| -rw-r--r-- | share/www/script/couch_tests.js | 13 | ||||
| -rw-r--r-- | src/couchdb/couch.app.tpl.in | 2 | ||||
| -rw-r--r-- | src/couchdb/couch_httpd.erl | 11 | ||||
| -rw-r--r-- | src/couchdb/couch_httpd_db.erl | 3 | ||||
| -rw-r--r-- | src/couchdb/couch_httpd_misc_handlers.erl | 3 | ||||
| -rw-r--r-- | src/couchdb/couch_rep.erl | 96 | ||||
| -rw-r--r-- | src/couchdb/couch_server_sup.erl | 2 | ||||
| -rw-r--r-- | src/ibrowse/Makefile.am | 47 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse.app | 13 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse.erl | 628 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse.hrl | 12 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_app.erl | 64 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_http_client.erl | 1312 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_lb.erl | 195 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_lib.erl | 399 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_sup.erl | 65 | ||||
| -rw-r--r-- | src/ibrowse/ibrowse_test.erl | 226 | ||||
| -rw-r--r-- | utils/Makefile.am | 1 | 
25 files changed, 3073 insertions, 42 deletions
| @@ -28,6 +28,8 @@ Database Core:     dramatically. The fix keeps only one document in the write queue at a time.   * Fix for databases sometimes incorrectly reporting that they contain 0     documents after compaction. + * CouchDB now uses ibrowse instead of inets for its internal HTTP client +   implementation. This means better replication stability.  HTTP Interface: diff --git a/Makefile.am b/Makefile.am index 67bf82b8..41ca5cf5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -10,7 +10,7 @@  ## License for the specific language governing permissions and limitations  ## under the License. -SUBDIRS = bin etc src/couchdb src/mochiweb share test var utils +SUBDIRS = bin etc src/couchdb src/ibrowse src/mochiweb share test var utils  localdoc_DATA = AUTHORS.gz BUGS.gz CHANGES.gz NEWS.gz README.gz THANKS.gz @@ -21,3 +21,9 @@ This product also includes the following third-party components:   * MochiWeb (http://code.google.com/p/mochiweb/)     Copyright 2007, Mochi Media Coporation + + * ibrowse + (http://jungerl.cvs.sourceforge.net/viewvc/jungerl/jungerl/lib/ibrowse/) + +   Copyright 2008, Chandrashekhar Mullaparthi +   This ASF redistribution is consistent with the terms of the BSD License.
\ No newline at end of file @@ -12,7 +12,9 @@ Some of these people are:   * Yoan Blanc <yoan.blanc@gmail.com>   * Paul Carey <paul.p.carey@gmail.com>   * Benoit Chesneau <bchesneau@gmail.com> + * Jason Davies <jason@jasondavies.com>   * Paul Joseph Davis <paul.joseph.davis@gmail.com> + * Maximillian Dornseif <md@hudora.de>   * Michael Gottesman <gottesmm@reed.edu>   * Michael Hendricks <michael@ndrix.org>   * Till Klampaeckel <till@klampaeckel.de> diff --git a/bin/Makefile.am b/bin/Makefile.am index e03dfa0d..decc4ef7 100644 --- a/bin/Makefile.am +++ b/bin/Makefile.am @@ -28,8 +28,9 @@ couchdb: couchdb.tpl  	    -e "s|%ICU_CONFIG%|$(ICU_CONFIG)|g" \  	    -e "s|%bindir%|@bindir@|g" \  	    -e "s|%localerlanglibdir%|@localerlanglibdir@|g" \ -	    -e "s|%mochiwebebindir%|couch-@version@/ebin|g" \ -	    -e "s|%couchdbebindir%|mochiweb-r82/ebin|g" \ +	    -e "s|%couchdbebindir%|couch-@version@/ebin|g" \ +	    -e "s|%mochiwebebindir%|mochiweb-r82/ebin|g" \ +	    -e "s|%ibrowseebindir%|ibrowse-1.4.1/ebin|g" \  	    -e "s|%defaultini%|default.ini|g" \  	    -e "s|%localini%|local.ini|g" \  	    -e "s|%localconfdir%|@localconfdir@|g" \ diff --git a/bin/couchdb.tpl.in b/bin/couchdb.tpl.in index c769bd33..4959654a 100644 --- a/bin/couchdb.tpl.in +++ b/bin/couchdb.tpl.in @@ -183,11 +183,12 @@ start_couchdb () {          %ERL% $interactive_option -smp auto -sasl errlog_type error +K true \          -pa %localerlanglibdir%/%couchdbebindir% \              %localerlanglibdir%/%mochiwebebindir% \ -        -eval \"application:load(inets)\" \ +            %localerlanglibdir%/%ibrowseebindir% \ +        -eval \"application:load(ibrowse)\" \          -eval \"application:load(crypto)\" \          -eval \"application:load(couch)\" \          -eval \"crypto:start()\" \ -        -eval \"inets:start()\" \ +        -eval \"ibrowse:start()\" \          -eval \"couch_server:start([$start_arguments]), receive done -> done end.\" "      if test "$BACKGROUND_BOOLEAN" = "true" \          -a "$RECURSED_BOOLEAN" = "false"; then diff --git a/configure.ac b/configure.ac index b30aa8fe..25ccebfe 100644 --- a/configure.ac +++ b/configure.ac @@ -267,6 +267,7 @@ AC_CONFIG_FILES([etc/Makefile])  AC_CONFIG_FILES([share/Makefile])  AC_CONFIG_FILES([src/couchdb/couch.app.tpl])  AC_CONFIG_FILES([src/couchdb/Makefile]) +AC_CONFIG_FILES([src/ibrowse/Makefile])  AC_CONFIG_FILES([src/mochiweb/Makefile])  AC_CONFIG_FILES([test/Makefile])  AC_CONFIG_FILES([utils/Makefile]) diff --git a/share/www/script/couch_tests.js b/share/www/script/couch_tests.js index 8f76ff06..a6c47a7d 100644 --- a/share/www/script/couch_tests.js +++ b/share/www/script/couch_tests.js @@ -2134,6 +2134,19 @@ var tests = {              T(docA._rev == docB._rev);            };          }, + +        design_docs_test: new function() { +          // make sure design docs replicate properly +          this.init = function(dbA, dbB) { +            dbA.save({ _id:"_design/test" }); +          }; + +          this.afterAB1 = function() { +            var docA = dbA.open("_design/test"); +            var docB = dbB.open("_design/test"); +            T(docA._rev == docB._rev); +          }; +        },          attachments_test: new function () {            // Test attachments diff --git a/src/couchdb/couch.app.tpl.in b/src/couchdb/couch.app.tpl.in index 3b1ea02f..e0100cb4 100644 --- a/src/couchdb/couch.app.tpl.in +++ b/src/couchdb/couch.app.tpl.in @@ -24,4 +24,4 @@                             couch_view,                             couch_query_servers,                             couch_db_update_notifier_sup]}, -              {applications,[kernel,stdlib,crypto,inets,mochiweb]}]}. +              {applications,[kernel,stdlib,crypto,ibrowse,mochiweb]}]}. diff --git a/src/couchdb/couch_httpd.erl b/src/couchdb/couch_httpd.erl index acd6af40..7c371326 100644 --- a/src/couchdb/couch_httpd.erl +++ b/src/couchdb/couch_httpd.erl @@ -15,7 +15,7 @@  -export([start_link/0, stop/0, handle_request/3]). --export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,path/1]). +-export([header_value/2,header_value/3,qs_value/2,qs_value/3,qs/1,path/1,absolute_uri/2]).  -export([verify_is_server_admin/1,unquote/1,quote/1,recv/2]).  -export([parse_form/1,json_body/1,body/1,doc_etag/1, make_etag/1, etag_respond/3]).  -export([primary_header_value/2,partition/1,serve_file/3]). @@ -242,6 +242,15 @@ qs(#httpd{mochi_req=MochiReq}) ->  path(#httpd{mochi_req=MochiReq}) ->      MochiReq:get(path). +absolute_uri(#httpd{mochi_req=MochiReq}, Path) -> +    Host = case MochiReq:get_header_value("Host") of +        undefined -> +            {ok, {Address, Port}} = inet:sockname(MochiReq:get(socket)), +            inet_parse:ntoa(Address) ++ ":" ++ integer_to_list(Port); +        Value -> Value +    end, +    "http://" ++ Host ++ Path. +  unquote(UrlEncodedString) ->      mochiweb_util:unquote(UrlEncodedString). diff --git a/src/couchdb/couch_httpd_db.erl b/src/couchdb/couch_httpd_db.erl index 73d90fe6..2cb4c403 100644 --- a/src/couchdb/couch_httpd_db.erl +++ b/src/couchdb/couch_httpd_db.erl @@ -261,7 +261,8 @@ db_req(#httpd{method='GET',mochi_req=MochiReq, path_parts=[DbName,<<"_design/",_      PathFront = "/" ++ couch_httpd:quote(binary_to_list(DbName)) ++ "/",      RawSplit = regexp:split(MochiReq:get(raw_path),"_design%2F"),      {ok, [PathFront|PathTail]} = RawSplit, -    RedirectTo = PathFront ++ "_design/" ++ mochiweb_util:join(PathTail, "%2F"), +    RedirectTo = couch_httpd:absolute_uri(Req, PathFront ++ "_design/" ++  +        mochiweb_util:join(PathTail, "%2F")),      couch_httpd:send_response(Req, 301, [{"Location", RedirectTo}], <<>>);  db_req(#httpd{path_parts=[_DbName,<<"_design">>,Name]}=Req, Db) -> diff --git a/src/couchdb/couch_httpd_misc_handlers.erl b/src/couchdb/couch_httpd_misc_handlers.erl index e30f7594..2ba8ef64 100644 --- a/src/couchdb/couch_httpd_misc_handlers.erl +++ b/src/couchdb/couch_httpd_misc_handlers.erl @@ -50,7 +50,8 @@ handle_utils_dir_req(#httpd{method='GET'}=Req, DocumentRoot) ->          couch_httpd:serve_file(Req, RelativePath, DocumentRoot);      {_ActionKey, "", _RelativePath} ->          % GET /_utils -        couch_httpd:send_response(Req, 301, [{"Location", "/_utils/"}], <<>>) +        Headers = [{"Location", couch_httpd:absolute_uri(Req, "/_utils/")}], +        couch_httpd:send_response(Req, 301, Headers, <<>>)      end;  handle_utils_dir_req(Req, _) ->      send_method_not_allowed(Req, "GET,HEAD"). diff --git a/src/couchdb/couch_rep.erl b/src/couchdb/couch_rep.erl index 29f1fc80..ff926584 100644 --- a/src/couchdb/couch_rep.erl +++ b/src/couchdb/couch_rep.erl @@ -157,30 +157,54 @@ replicate2(Source, DbSrc, Target, DbTgt, Options) ->      end.  pull_rep(DbTarget, DbSource, SourceSeqNum) -> -    http:set_options([{max_pipeline_length, 101}, {pipeline_timeout, 5000}]),      {ok, {NewSeq, Stats}} =           enum_docs_since(DbSource, DbTarget, SourceSeqNum, {SourceSeqNum, []}), -    http:set_options([{max_pipeline_length, 2}, {pipeline_timeout, 0}]),      {NewSeq, Stats}.  do_http_request(Url, Action, Headers) ->      do_http_request(Url, Action, Headers, []).  do_http_request(Url, Action, Headers, JsonBody) -> -    ?LOG_DEBUG("couch_rep HTTP client request:", []), -    ?LOG_DEBUG("\tAction: ~p", [Action]), -    ?LOG_DEBUG("\tUrl: ~p", [Url]), -    Request = +    do_http_request(Url, Action, Headers, JsonBody, 10). + +do_http_request(Url, Action, _Headers, _JsonBody, 0) -> +    ?LOG_ERROR("couch_rep HTTP ~p request failed after 10 retries: ~p",  +        [Action, Url]); +do_http_request(Url, Action, Headers, JsonBody, Retries) -> +    ?LOG_DEBUG("couch_rep HTTP ~p request: ~p", [Action, Url]), +    Body =      case JsonBody of      [] -> -        {Url, Headers}; +        <<>>;      _ -> -        {Url, Headers, "application/json; charset=utf-8", iolist_to_binary(?JSON_ENCODE(JsonBody))} +        iolist_to_binary(?JSON_ENCODE(JsonBody))      end, -    {ok, {{_, ResponseCode,_},_Headers, ResponseBody}} = http:request(Action, Request, [], []), -    if -    ResponseCode >= 200, ResponseCode < 500 -> -        ?JSON_DECODE(ResponseBody) +    Options = [ +        {content_type, "application/json; charset=utf-8"}, +        {max_pipeline_size, 101}, +        {transfer_encoding, {chunked, 65535}} +    ], +    case ibrowse:send_req(Url, Headers, Action, Body, Options) of +    {ok, Status, ResponseHeaders, ResponseBody} -> +        ResponseCode = list_to_integer(Status), +        if +        ResponseCode >= 200, ResponseCode < 300 -> +            ?JSON_DECODE(ResponseBody); +        ResponseCode >= 300, ResponseCode < 400 -> +            RedirectUrl = mochiweb_headers:get_value("Location",  +                mochiweb_headers:make(ResponseHeaders)), +            do_http_request(RedirectUrl, Action, Headers, JsonBody, Retries-1); +        ResponseCode >= 400, ResponseCode < 500 ->  +            ?JSON_DECODE(ResponseBody);         +        ResponseCode == 500 -> +            ?LOG_INFO("retrying couch_rep HTTP ~p request due to 500 error: ~p", +                [Action, Url]), +            do_http_request(Url, Action, Headers, JsonBody, Retries - 1) +        end; +    {error, Reason} -> +        ?LOG_INFO("retrying couch_rep HTTP ~p request due to {error, ~p}: ~p",  +            [Action, Reason, Url]), +        do_http_request(Url, Action, Headers, JsonBody, Retries - 1)      end.  save_docs_buffer(DbTarget, DocsBuffer, []) -> @@ -223,20 +247,17 @@ wait_result({Pid,Ref}) ->      {'DOWN', Ref, _, _, Reason} -> exit(Reason)  end. -enum_docs_parallel(DbS, DbT, DocInfoList) -> -    UpdateSeqs = [D#doc_info.update_seq || D <- DocInfoList], +enum_docs_parallel(DbS, DbT, InfoList) -> +    UpdateSeqs = [Seq || {_, Seq, _, _} <- InfoList],      SaveDocsPid = spawn_link(fun() -> save_docs_buffer(DbT,[],UpdateSeqs) end), -    Stats = pmap(fun(SrcDocInfo) -> -        #doc_info{id=Id, -            rev=Rev, -            conflict_revs=Conflicts, -            deleted_conflict_revs=DelConflicts, -            update_seq=Seq} = SrcDocInfo, -        SrcRevs = [Rev | Conflicts] ++ DelConflicts, -         -        case get_missing_revs(DbT, [{Id, SrcRevs}]) of -        {ok, [{Id, MissingRevs}]} -> +    Stats = pmap(fun({Id, Seq, SrcRevs, MissingRevs}) -> +        case MissingRevs of +        [] -> +            SaveDocsPid ! {self(), skip, Seq}, +            receive got_it -> ok end, +            [{missing_checked, length(SrcRevs)}]; +        _ ->              {ok, DocResults} = open_doc_revs(DbS, Id, MissingRevs, [latest]),              % only save successful reads @@ -247,13 +268,9 @@ enum_docs_parallel(DbS, DbT, DocInfoList) ->              receive got_it -> ok end,              [{missing_checked, length(SrcRevs)},               {missing_found, length(MissingRevs)}, -             {docs_read, length(Docs)}]; -        {ok, []} -> -            SaveDocsPid ! {self(), skip, Seq}, -            receive got_it -> ok end, -            [{missing_checked, length(SrcRevs)}] -        end     -    end, DocInfoList), +             {docs_read, length(Docs)}] +        end +    end, InfoList),      SaveDocsPid ! {self(), shutdown}, @@ -345,7 +362,22 @@ enum_docs_since(DbSource, DbTarget, StartSeq, InAcc) ->      [] ->          {ok, InAcc};      _ -> -        Stats = enum_docs_parallel(DbSource, DbTarget, DocInfoList), +        UpdateSeqs = [D#doc_info.update_seq || D <- DocInfoList], +        SrcRevsList = lists:map(fun(SrcDocInfo) -> +            #doc_info{id=Id, +                rev=Rev, +                conflict_revs=Conflicts, +                deleted_conflict_revs=DelConflicts +            } = SrcDocInfo, +            SrcRevs = [Rev | Conflicts] ++ DelConflicts, +            {Id, SrcRevs} +        end, DocInfoList),         +        {ok, MissingRevsList} = get_missing_revs(DbTarget, SrcRevsList), +        InfoList = lists:map(fun({{Id, SrcRevs}, Seq}) -> +            MissingRevs = proplists:get_value(Id, MissingRevsList, []), +            {Id, Seq, SrcRevs, MissingRevs} +        end, lists:zip(SrcRevsList, UpdateSeqs)), +        Stats = enum_docs_parallel(DbSource, DbTarget, InfoList),          OldStats = element(2, InAcc),          TotalStats = [              {<<"missing_checked">>,  diff --git a/src/couchdb/couch_server_sup.erl b/src/couchdb/couch_server_sup.erl index 27efc9e7..627c34a9 100644 --- a/src/couchdb/couch_server_sup.erl +++ b/src/couchdb/couch_server_sup.erl @@ -102,7 +102,7 @@ start_server(IniFiles) ->      % ensure these applications are running -    application:start(inets), +    application:start(ibrowse),      application:start(crypto),      {ok, Pid} = supervisor:start_link( diff --git a/src/ibrowse/Makefile.am b/src/ibrowse/Makefile.am new file mode 100644 index 00000000..e4feb3d7 --- /dev/null +++ b/src/ibrowse/Makefile.am @@ -0,0 +1,47 @@ +## Licensed under the Apache License, Version 2.0 (the "License"); you may not +## use this file except in compliance with the License.  You may obtain a copy +## of the License at +## +##   http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +## WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the +## License for the specific language governing permissions and limitations under +## the License. + +ibrowseebindir = $(localerlanglibdir)/ibrowse-1.4.1/ebin + +ibrowse_file_collection = \ +    ibrowse.erl \ +    ibrowse_app.erl \ +    ibrowse_http_client.erl \ +    ibrowse_lb.erl \ +    ibrowse_lib.erl \ +    ibrowse_sup.erl \ +    ibrowse_test.erl + +ibrowseebin_static_file = ibrowse.app + +ibrowseebin_make_generated_file_list = \ +    ibrowse.beam \ +    ibrowse_app.beam \ +    ibrowse_http_client.beam \ +    ibrowse_lb.beam \ +    ibrowse_lib.beam \ +    ibrowse_sup.beam \ +    ibrowse_test.beam + +ibrowseebin_DATA = \ +    $(ibrowseebin_static_file) \ +    $(ibrowseebin_make_generated_file_list) + +EXTRA_DIST =  \ +    $(ibrowse_file_collection) \ +    $(ibrowseebin_static_file) + +CLEANFILES = \ +    $(ibrowseebin_make_generated_file_list) + +%.beam: %.erl +	$(ERLC) $< diff --git a/src/ibrowse/ibrowse.app b/src/ibrowse/ibrowse.app new file mode 100644 index 00000000..960c0794 --- /dev/null +++ b/src/ibrowse/ibrowse.app @@ -0,0 +1,13 @@ +{application, ibrowse, +        [{description, "HTTP client application"}, +         {vsn, "1.4.1"}, +         {modules, [ ibrowse,  +		     ibrowse_http_client,  +		     ibrowse_app,  +		     ibrowse_sup,  +		     ibrowse_lib, +		     ibrowse_lb ]}, +         {registered, []}, +         {applications, [kernel,stdlib,sasl]}, +	 {env, []}, +	 {mod, {ibrowse_app, []}}]}. diff --git a/src/ibrowse/ibrowse.erl b/src/ibrowse/ibrowse.erl new file mode 100644 index 00000000..4e6404ad --- /dev/null +++ b/src/ibrowse/ibrowse.erl @@ -0,0 +1,628 @@ +%%%------------------------------------------------------------------- +%%% File    : ibrowse.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : Load balancer process for HTTP client connections. +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +%% @author Chandrashekhar Mullaparthi <chandrashekhar dot mullaparthi at gmail dot com> +%% @copyright 2005-2008 Chandrashekhar Mullaparthi +%% @version 1.4 +%% @doc The ibrowse application implements an HTTP 1.1 client. This +%% module implements the API of the HTTP client. There is one named +%% process called 'ibrowse' which assists in load balancing and maintaining configuration. There is one load balancing process per unique webserver. There is +%% one process to handle one TCP connection to a webserver +%% (implemented in the module ibrowse_http_client). Multiple connections to a +%% webserver are setup based on the settings for each webserver. The +%% ibrowse process also determines which connection to pipeline a +%% certain request on.  The functions to call are send_req/3, +%% send_req/4, send_req/5, send_req/6. +%% +%% <p>Here are a few sample invocations.</p> +%% +%% <code> +%% ibrowse:send_req("http://intranet/messenger/", [], get).  +%% <br/><br/> +%%  +%% ibrowse:send_req("http://www.google.com/", [], get, [],  +%% 		 [{proxy_user, "XXXXX"}, +%% 		  {proxy_password, "XXXXX"}, +%% 		  {proxy_host, "proxy"}, +%% 		  {proxy_port, 8080}], 1000).  +%% <br/><br/> +%% +%%ibrowse:send_req("http://www.erlang.org/download/otp_src_R10B-3.tar.gz", [], get, [], +%% 		 [{proxy_user, "XXXXX"}, +%% 		  {proxy_password, "XXXXX"}, +%% 		  {proxy_host, "proxy"}, +%% 		  {proxy_port, 8080}, +%% 		  {save_response_to_file, true}], 1000). +%% <br/><br/> +%% +%% ibrowse:send_req("http://www.erlang.org", [], head). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.sun.com", [], options). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.bbc.co.uk", [], trace). +%% +%% <br/><br/> +%% ibrowse:send_req("http://www.google.com", [], get, [],  +%%                   [{stream_to, self()}]). +%% </code> +%% +%% <p>A driver exists which implements URL encoding in C, but the +%% speed achieved using only erlang has been good enough, so the +%% driver isn't actually used.</p> + +-module(ibrowse). +-vsn('$Id: ibrowse.erl,v 1.7 2008/05/21 15:28:11 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([start_link/0, start/0, stop/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, +	 terminate/2, code_change/3]). + +%% API interface +-export([ +	 rescan_config/0, +	 rescan_config/1, +	 get_config_value/1, +	 get_config_value/2, +	 spawn_worker_process/2, +	 spawn_link_worker_process/2, +	 stop_worker_process/1, +	 send_req/3, +	 send_req/4, +	 send_req/5, +	 send_req/6, +	 send_req_direct/4, +	 send_req_direct/5, +	 send_req_direct/6, +	 send_req_direct/7, +	 set_max_sessions/3, +	 set_max_pipeline_size/3, +	 set_dest/3, +	 trace_on/0, +	 trace_off/0, +	 trace_on/2, +	 trace_off/2, +	 show_dest_status/2 +	]). + +-ifdef(debug). +-compile(export_all). +-endif. + +-import(ibrowse_lib, [ +		      parse_url/1, +		      printable_date/0, +		      get_value/2, +		      get_value/3, +		      do_trace/2 +		     ]). +		       +-record(state, {trace = false}). + +-include("ibrowse.hrl"). + +-define(DEF_MAX_SESSIONS,10). +-define(DEF_MAX_PIPELINE_SIZE,10). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +%% @doc Starts the ibrowse process linked to the calling process. Usually invoked by the supervisor ibrowse_sup +%% @spec start_link() -> {ok, pid()} +start_link() -> +    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +%% @doc Starts the ibrowse process without linking. Useful when testing using the shell +start() -> +    gen_server:start({local, ?MODULE}, ?MODULE, [], [{debug, []}]). + +%% @doc Stop the ibrowse process. Useful when testing using the shell. +stop() -> +    catch gen_server:call(ibrowse, stop). + +%% @doc This is the basic function to send a HTTP request. +%% The Status return value indicates the HTTP status code returned by the webserver +%% @spec send_req(Url::string(), Headers::headerList(), Method::method()) -> response() +%% headerList() = [{header(), value()}] +%% header() = atom() | string() +%% value() = term() +%% method() = get | post | head | options | put | delete | trace | mkcol | propfind | proppatch | lock | unlock | move | copy +%% Status = string() +%% ResponseHeaders = [respHeader()] +%% respHeader() = {headerName(), headerValue()} +%% headerName() = string() +%% headerValue() = string() +%% response() = {ok, Status, ResponseHeaders, ResponseBody} | {error, Reason} +%% ResponseBody = string() | {file, Filename} +%% Reason = term() +send_req(Url, Headers, Method) -> +    send_req(Url, Headers, Method, [], []). + +%% @doc Same as send_req/3.  +%% If a list is specified for the body it has to be a flat list. The body can also be a fun/0 or a fun/1. <br/> +%% If fun/0, the connection handling process will repeatdely call the fun until it returns an error or eof. <pre>Fun() = {ok, Data} | eof</pre><br/> +%% If fun/1, the connection handling process will repeatedly call the fun with the supplied state until it returns an error or eof. <pre>Fun(State) = {ok, Data} | {ok, Data, NewState} | eof</pre> +%% @spec send_req(Url, Headers, Method::method(), Body::body()) -> response() +%% body() = [] | string() | binary() | fun_arity_0() | {fun_arity_1(), initial_state()} +%% initial_state() = term() +send_req(Url, Headers, Method, Body) -> +    send_req(Url, Headers, Method, Body, []). + +%% @doc Same as send_req/4.  +%% For a description of SSL Options, look in the ssl manpage. If the +%% HTTP Version to use is not specified, the default is 1.1. +%% <br/> +%% <p>The <code>host_header</code> is useful in the case where ibrowse is +%% connecting to a component such as <a +%% href="http://www.stunnel.org">stunnel</a> which then sets up a +%% secure connection to a webserver. In this case, the URL supplied to +%% ibrowse must have the stunnel host/port details, but that won't +%% make sense to the destination webserver. This option can then be +%% used to specify what should go in the <code>Host</code> header in +%% the request.</p> +%% <ul> +%% <li>When both the options <code>save_response_to_file</code> and <code>stream_to</code>  +%% are specified, the former takes precedence.</li> +%% +%% <li>For the <code>save_response_to_file</code> option, the response body is saved to +%% file only if the status code is in the 200-299 range. If not, the response body is returned +%% as a string.</li> +%% <li>Whenever an error occurs in the processing of a request, ibrowse will return as much +%% information as it has, such as HTTP Status Code and HTTP Headers. When this happens, the response +%% is of the form <code>{error, {Reason, {stat_code, StatusCode}, HTTP_headers}}</code></li> +%% </ul> +%% @spec send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response() +%% optionList() = [option()] +%% option() = {max_sessions, integer()}        | +%%          {max_pipeline_size, integer()}     | +%%          {trace, boolean()}                 |  +%%          {is_ssl, boolean()}                | +%%          {ssl_options, [SSLOpt]}            | +%%          {pool_name, atom()}                | +%%          {proxy_host, string()}             | +%%          {proxy_port, integer()}            | +%%          {proxy_user, string()}             | +%%          {proxy_password, string()}         | +%%          {use_absolute_uri, boolean()}      | +%%          {basic_auth, {username(), password()}} | +%%          {cookie, string()}                 | +%%          {content_length, integer()}        | +%%          {content_type, string()}           | +%%          {save_response_to_file, srtf()}    | +%%          {stream_to, process()}             | +%%          {http_vsn, {MajorVsn, MinorVsn}}   | +%%          {host_header, string()}            | +%%          {transfer_encoding, {chunked, ChunkSize}} +%%  +%% process() = pid() | atom() +%% username() = string() +%% password() = string() +%% SSLOpt = term() +%% ChunkSize = integer() +%% srtf() = boolean() | filename() +%% filename() = string() +%%  +send_req(Url, Headers, Method, Body, Options) -> +    send_req(Url, Headers, Method, Body, Options, 30000). + +%% @doc Same as send_req/5.  +%% All timeout values are in milliseconds. +%% @spec send_req(Url, Headers::headerList(), Method::method(), Body::body(), Options::optionList(), Timeout) -> response() +%% Timeout = integer() | infinity +send_req(Url, Headers, Method, Body, Options, Timeout) -> +    case catch parse_url(Url) of +	#url{host = Host, +	     port = Port} = Parsed_url -> +	    Lb_pid = case ets:lookup(ibrowse_lb, {Host, Port}) of +			 [] -> +			     get_lb_pid(Parsed_url); +			 [#lb_pid{pid = Lb_pid_1}] -> +			     Lb_pid_1 +		     end, +	    Max_sessions = get_max_sessions(Host, Port, Options), +	    Max_pipeline_size = get_max_pipeline_size(Host, Port, Options), +	    Options_1 = merge_options(Host, Port, Options), +	    {SSLOptions, IsSSL} = +		case get_value(is_ssl, Options_1, false) of +		    false -> {[], false}; +		    true -> {get_value(ssl_options, Options_1), true} +		end, +	    case ibrowse_lb:spawn_connection(Lb_pid, Parsed_url, +					     Max_sessions,  +					     Max_pipeline_size, +					     {SSLOptions, IsSSL}) of +		{ok, Conn_Pid} -> +		    do_send_req(Conn_Pid, Parsed_url, Headers, +				Method, Body, Options_1, Timeout); +		Err -> +		    Err +	    end; +	Err -> +	    {error, {url_parsing_failed, Err}} +    end. + +merge_options(Host, Port, Options) -> +    Config_options = get_config_value({options, Host, Port}, []), +    lists:foldl( +      fun({Key, Val}, Acc) -> +			case lists:keysearch(Key, 1, Options) of +			    false -> +				[{Key, Val} | Acc]; +			    _ -> +				Acc +			end +      end, Options, Config_options). + +get_lb_pid(Url) -> +    gen_server:call(?MODULE, {get_lb_pid, Url}). + +get_max_sessions(Host, Port, Options) -> +    get_value(max_sessions, Options, +	      get_config_value({max_sessions, Host, Port}, ?DEF_MAX_SESSIONS)). + +get_max_pipeline_size(Host, Port, Options) -> +    get_value(max_pipeline_size, Options, +	      get_config_value({max_pipeline_size, Host, Port}, ?DEF_MAX_PIPELINE_SIZE)). + +%% @doc Deprecated. Use set_max_sessions/3 and set_max_pipeline_size/3 +%% for achieving the same effect. +set_dest(Host, Port, [{max_sessions, Max} | T]) -> +    set_max_sessions(Host, Port, Max), +    set_dest(Host, Port, T); +set_dest(Host, Port, [{max_pipeline_size, Max} | T]) -> +    set_max_pipeline_size(Host, Port, Max), +    set_dest(Host, Port, T); +set_dest(Host, Port, [{trace, Bool} | T]) when Bool == true; Bool == false -> +    ibrowse ! {trace, true, Host, Port}, +    set_dest(Host, Port, T); +set_dest(_Host, _Port, [H | _]) -> +    exit({invalid_option, H}); +set_dest(_, _, []) -> +    ok. +     +%% @doc Set the maximum number of connections allowed to a specific Host:Port. +%% @spec set_max_sessions(Host::string(), Port::integer(), Max::integer()) -> ok +set_max_sessions(Host, Port, Max) when is_integer(Max), Max > 0 -> +    gen_server:call(?MODULE, {set_config_value, {max_sessions, Host, Port}, Max}). + +%% @doc Set the maximum pipeline size for each connection to a specific Host:Port. +%% @spec set_max_pipeline_size(Host::string(), Port::integer(), Max::integer()) -> ok +set_max_pipeline_size(Host, Port, Max) when is_integer(Max), Max > 0 -> +    gen_server:call(?MODULE, {set_config_value, {max_pipeline_size, Host, Port}, Max}). + +do_send_req(Conn_Pid, Parsed_url, Headers, Method, Body, Options, Timeout) -> +    case catch ibrowse_http_client:send_req(Conn_Pid, Parsed_url, +					    Headers, Method, Body, +					    Options, Timeout) of +	{'EXIT', {timeout, _}} -> +	    {error, req_timedout}; +	{'EXIT', Reason} -> +	    {error, {'EXIT', Reason}}; +	Ret -> +	    Ret +    end. + +%% @doc Creates a HTTP client process to the specified Host:Port which +%% is not part of the load balancing pool. This is useful in cases +%% where some requests to a webserver might take a long time whereas +%% some might take a very short time. To avoid getting these quick +%% requests stuck in the pipeline behind time consuming requests, use +%% this function to get a handle to a connection process. <br/> +%% <b>Note:</b> Calling this function only creates a worker process. No connection +%% is setup. The connection attempt is made only when the first +%% request is sent via any of the send_req_direct/4,5,6,7 functions.<br/> +%% <b>Note:</b> It is the responsibility of the calling process to control +%% pipeline size on such connections. +%% +%% @spec spawn_worker_process(Host::string(), Port::integer()) -> {ok, pid()} +spawn_worker_process(Host, Port) -> +    ibrowse_http_client:start({Host, Port}). + +%% @doc Same as spawn_worker_process/2 except the the calling process +%% is linked to the worker process which is spawned. +spawn_link_worker_process(Host, Port) -> +    ibrowse_http_client:start_link({Host, Port}). + +%% @doc Terminate a worker process spawned using +%% spawn_worker_process/2 or spawn_link_worker_process/2. Requests in +%% progress will get the error response <pre>{error, closing_on_request}</pre> +%% @spec stop_worker_process(Conn_pid::pid()) -> ok +stop_worker_process(Conn_pid) -> +    ibrowse_http_client:stop(Conn_pid). + +%% @doc Same as send_req/3 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method) -> +    send_req_direct(Conn_pid, Url, Headers, Method, [], []). + +%% @doc Same as send_req/4 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body) -> +    send_req_direct(Conn_pid, Url, Headers, Method, Body, []). + +%% @doc Same as send_req/5 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body, Options) -> +    send_req_direct(Conn_pid, Url, Headers, Method, Body, Options, 30000). + +%% @doc Same as send_req/6 except that the first argument is the PID +%% returned by spawn_worker_process/2 or spawn_link_worker_process/2 +send_req_direct(Conn_pid, Url, Headers, Method, Body, Options, Timeout) -> +    case catch parse_url(Url) of +	#url{host = Host, +	     port = Port} = Parsed_url -> +	    Options_1 = merge_options(Host, Port, Options), +	    case do_send_req(Conn_pid, Parsed_url, Headers, Method, Body, Options_1, Timeout) of +		{error, {'EXIT', {noproc, _}}} -> +		    {error, worker_is_dead}; +		Ret -> +		    Ret +	    end; +	Err -> +	    {error, {url_parsing_failed, Err}} +    end. +     +%% @doc Turn tracing on for the ibrowse process +trace_on() -> +    ibrowse ! {trace, true}. +%% @doc Turn tracing off for the ibrowse process +trace_off() -> +    ibrowse ! {trace, false}. + +%% @doc Turn tracing on for all connections to the specified HTTP +%% server. Host is whatever is specified as the domain name in the URL +%% @spec trace_on(Host, Port) -> term()  +%% Host = string()  +%% Port = integer() +trace_on(Host, Port) -> +    ibrowse ! {trace, true, Host, Port}. + +%% @doc Turn tracing OFF for all connections to the specified HTTP +%% server. +%% @spec trace_off(Host, Port) -> term() +trace_off(Host, Port) -> +    ibrowse ! {trace, false, Host, Port}. + +%% @doc Shows some internal information about load balancing to a +%% specified Host:Port. Info about workers spawned using +%% spawn_worker_process/2 or spawn_link_worker_process/2 is not +%% included. +show_dest_status(Host, Port) -> +    case ets:lookup(ibrowse_lb, {Host, Port}) of +	[] -> +	    no_active_processes; +	[#lb_pid{pid = Lb_pid}] -> +	    io:format("Load Balancer Pid     : ~p~n", [Lb_pid]), +	    io:format("LB process msg q size : ~p~n", [(catch process_info(Lb_pid, message_queue_len))]), +	    case lists:dropwhile( +		   fun(Tid) -> +			   ets:info(Tid, owner) /= Lb_pid +		   end, ets:all()) of +		[] -> +		    io:format("Couldn't locate ETS table for ~p~n", [Lb_pid]); +		[Tid | _] -> +		    First = ets:first(Tid), +		    Last = ets:last(Tid), +		    Size = ets:info(Tid, size), +		    io:format("LB ETS table id       : ~p~n", [Tid]), +		    io:format("Num Connections       : ~p~n", [Size]), +		    case Size of +			0 -> +			    ok; +			_ -> +			    {First_p_sz, _} = First, +			    {Last_p_sz, _} = Last, +			    io:format("Smallest pipeline     : ~1000.p~n", [First_p_sz]), +			    io:format("Largest pipeline      : ~1000.p~n", [Last_p_sz]) +		    end +	    end +    end. + +%% @doc Clear current configuration for ibrowse and load from the file +%% ibrowse.conf in the IBROWSE_EBIN/../priv directory. Current +%% configuration is cleared only if the ibrowse.conf file is readable +%% using file:consult/1 +rescan_config() -> +    gen_server:call(?MODULE, rescan_config). + +%% Clear current configuration for ibrowse and load from the specified +%% file. Current configuration is cleared only if the specified +%% file is readable using file:consult/1 +rescan_config(File) when is_list(File) -> +    gen_server:call(?MODULE, {rescan_config, File}). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State}          | +%%          {ok, State, Timeout} | +%%          ignore               | +%%          {stop, Reason} +%%-------------------------------------------------------------------- +init(_) -> +    process_flag(trap_exit, true), +    State = #state{}, +    put(my_trace_flag, State#state.trace), +    put(ibrowse_trace_token, "ibrowse"), +    ets:new(ibrowse_lb, [named_table, public, {keypos, 2}]), +    ets:new(ibrowse_conf, [named_table, protected, {keypos, 2}]), +    import_config(), +    {ok, #state{}}. + +import_config() -> +    case code:priv_dir(ibrowse) of +	{error, _} = Err -> +	    Err; +	PrivDir -> +	    Filename = filename:join(PrivDir, "ibrowse.conf"), +	    import_config(Filename) +    end. + +import_config(Filename) -> +    case file:consult(Filename) of +	{ok, Terms} -> +	    ets:delete_all_objects(ibrowse_conf), +	    Fun = fun({dest, Host, Port, MaxSess, MaxPipe, Options})  +		     when list(Host), integer(Port), +		     integer(MaxSess), MaxSess > 0, +		     integer(MaxPipe), MaxPipe > 0, list(Options) -> +			  I = [{{max_sessions, Host, Port}, MaxSess}, +			       {{max_pipeline_size, Host, Port}, MaxPipe}, +			       {{options, Host, Port}, Options}], +			  lists:foreach( +			    fun({X, Y}) -> +				    ets:insert(ibrowse_conf, +					       #ibrowse_conf{key = X,  +							     value = Y}) +			    end, I); +		     ({K, V}) -> +			  ets:insert(ibrowse_conf, +				     #ibrowse_conf{key = K, +						   value = V}); +		     (X) -> +			  io:format("Skipping unrecognised term: ~p~n", [X]) +		  end, +	    lists:foreach(Fun, Terms); +	Err -> +	    Err +    end. + +%% @doc Internal export +get_config_value(Key) -> +    [#ibrowse_conf{value = V}] = ets:lookup(ibrowse_conf, Key), +    V. + +%% @doc Internal export +get_config_value(Key, DefVal) -> +    case ets:lookup(ibrowse_conf, Key) of +	[] -> +	    DefVal; +	[#ibrowse_conf{value = V}] -> +	    V +    end. + +set_config_value(Key, Val) -> +    ets:insert(ibrowse_conf, #ibrowse_conf{key = Key, value = Val}). +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State}          | +%%          {reply, Reply, State, Timeout} | +%%          {noreply, State}               | +%%          {noreply, State, Timeout}      | +%%          {stop, Reason, Reply, State}   | (terminate/2 is called) +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_call({get_lb_pid, #url{host = Host, port = Port} = Url}, _From, State) -> +    Pid = do_get_connection(Url, ets:lookup(ibrowse_lb, {Host, Port})), +    {reply, Pid, State}; + +handle_call(stop, _From, State) -> +    do_trace("IBROWSE shutting down~n", []), +    {stop, normal, ok, State}; + +handle_call({set_config_value, Key, Val}, _From, State) -> +    set_config_value(Key, Val), +    {reply, ok, State}; + +handle_call(rescan_config, _From, State) -> +    Ret = (catch import_config()), +    {reply, Ret, State}; + +handle_call({rescan_config, File}, _From, State) -> +    Ret = (catch import_config(File)), +    {reply, Ret, State}; + +handle_call(Request, _From, State) -> +    Reply = {unknown_request, Request}, +    {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- + +handle_cast(_Msg, State) -> +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info({trace, Bool}, State) -> +    put(my_trace_flag, Bool), +    {noreply, State}; + +handle_info({trace, Bool, Host, Port}, State) -> +    Fun = fun(#lb_pid{host_port = {H, P}, pid = Pid}, _) +	     when H == Host, +		  P == Port -> +		  catch Pid ! {trace, Bool}; +	     (#client_conn{key = {H, P, Pid}}, _) +	     when H == Host, +		  P == Port -> +		  catch Pid ! {trace, Bool}; +	     (_, Acc) -> +		  Acc +	  end, +    ets:foldl(Fun, undefined, ibrowse_lb), +    ets:insert(ibrowse_conf, #ibrowse_conf{key = {trace, Host, Port}, +					   value = Bool}), +    {noreply, State}; +		      +handle_info(_Info, State) -> +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> +    ok. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> +    {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +do_get_connection(#url{host = Host, port = Port}, []) -> +    {ok, Pid} = ibrowse_lb:start_link([Host, Port]), +    ets:insert(ibrowse_lb, #lb_pid{host_port = {Host, Port}, pid = Pid}), +    Pid; +do_get_connection(_Url, [#lb_pid{pid = Pid}]) -> +    Pid. diff --git a/src/ibrowse/ibrowse.hrl b/src/ibrowse/ibrowse.hrl new file mode 100644 index 00000000..ebf3bb33 --- /dev/null +++ b/src/ibrowse/ibrowse.hrl @@ -0,0 +1,12 @@ +-ifndef(IBROWSE_HRL). +-define(IBROWSE_HRL, "ibrowse.hrl"). + +-record(url, {abspath, host, port, username, password, path, protocol}). + +-record(lb_pid, {host_port, pid}). + +-record(client_conn, {key, cur_pipeline_size = 0, reqs_served = 0}). + +-record(ibrowse_conf, {key, value}). + +-endif. diff --git a/src/ibrowse/ibrowse_app.erl b/src/ibrowse/ibrowse_app.erl new file mode 100644 index 00000000..f5e523c2 --- /dev/null +++ b/src/ibrowse/ibrowse_app.erl @@ -0,0 +1,64 @@ +%%%------------------------------------------------------------------- +%%% File    : ibrowse_app.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description :  +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_app). +-vsn('$Id: ibrowse_app.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(application). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ +	 start/2, +	 stop/1 +        ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ +        ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: start/2 +%% Returns: {ok, Pid}        | +%%          {ok, Pid, State} | +%%          {error, Reason}    +%%-------------------------------------------------------------------- +start(_Type, _StartArgs) -> +    case ibrowse_sup:start_link() of +	{ok, Pid} ->  +	    {ok, Pid}; +	Error -> +	    Error +    end. + +%%-------------------------------------------------------------------- +%% Func: stop/1 +%% Returns: any  +%%-------------------------------------------------------------------- +stop(_State) -> +    ok. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/ibrowse/ibrowse_http_client.erl b/src/ibrowse/ibrowse_http_client.erl new file mode 100644 index 00000000..9a0e4d3b --- /dev/null +++ b/src/ibrowse/ibrowse_http_client.erl @@ -0,0 +1,1312 @@ +%%%------------------------------------------------------------------- +%%% File    : ibrowse_http_client.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : The name says it all +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_http_client). +-vsn('$Id: ibrowse_http_client.erl,v 1.18 2008/05/21 15:28:11 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([ +	 start_link/1, +	 start/1, +	 stop/1, +	 send_req/7 +	]). + +-ifdef(debug). +-compile(export_all). +-endif. + +%% gen_server callbacks +-export([ +	 init/1, +	 handle_call/3, +	 handle_cast/2, +	 handle_info/2, +	 terminate/2, +	 code_change/3 +	]). + +-include("ibrowse.hrl"). + +-record(state, {host, port,  +		use_proxy = false, proxy_auth_digest, +		ssl_options = [], is_ssl = false, socket,  +		reqs=queue:new(), cur_req, status=idle, http_status_code,  +		reply_buffer=[], rep_buf_size=0, recvd_headers=[], +		is_closing, send_timer, content_length, +		deleted_crlf = false, transfer_encoding, chunk_size,  +		chunks=[], lb_ets_tid, cur_pipeline_size = 0}). + +-record(request, {url, method, options, from, +		  stream_to, req_id, +		  save_response_to_file = false, +		  tmp_file_name, tmp_file_fd}). + +-import(ibrowse_lib, [ +		      parse_url/1, +		      printable_date/0, +		      get_value/2, +		      get_value/3, +		      do_trace/2 +		     ]). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start(Args) -> +    gen_server:start(?MODULE, Args, []). + +start_link(Args) -> +    gen_server:start_link(?MODULE, Args, []). + +stop(Conn_pid) -> +    gen_server:call(Conn_pid, stop). + +send_req(Conn_Pid, Url, Headers, Method, Body, Options, Timeout) -> +    Timeout_1 = case Timeout of +		    infinity -> +			infinity; +		    _ when is_integer(Timeout) -> +			Timeout + 100 +		end, +    gen_server:call( +      Conn_Pid, +      {send_req, {Url, Headers, Method, Body, Options, Timeout}}, Timeout_1). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State}          | +%%          {ok, State, Timeout} | +%%          ignore               | +%%          {stop, Reason} +%%-------------------------------------------------------------------- +init({Lb_Tid, #url{host = Host, port = Port}, {SSLOptions, Is_ssl}}) -> +    State = #state{host = Host, +		   port = Port, +		   ssl_options = SSLOptions, +		   is_ssl = Is_ssl, +		   lb_ets_tid = Lb_Tid}, +    put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), +    put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), +    {ok, State}; +init({Host, Port}) -> +    State = #state{host = Host, +		   port = Port}, +    put(ibrowse_trace_token, [Host, $:, integer_to_list(Port)]), +    put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), +    {ok, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State}          | +%%          {reply, Reply, State, Timeout} | +%%          {noreply, State}               | +%%          {noreply, State, Timeout}      | +%%          {stop, Reason, Reply, State}   | (terminate/2 is called) +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +%% Received a request when the remote server has already sent us a +%% Connection: Close header +handle_call({send_req, _},  +	    _From, +	    #state{is_closing=true}=State) -> +    {reply, {error, connection_closing}, State}; + +handle_call({send_req, {Url, Headers, Method, Body, Options, Timeout}},  +	    From, +	    #state{socket=undefined, +		   host=Host, port=Port}=State) -> +    {Host_1, Port_1, State_1} = +	case get_value(proxy_host, Options, false) of +	    false -> +		{Host, Port, State}; +	    PHost -> +		ProxyUser = get_value(proxy_user, Options, []), +		ProxyPassword = get_value(proxy_password, Options, []), +		Digest = http_auth_digest(ProxyUser, ProxyPassword), +		{PHost, get_value(proxy_port, Options, 80), +		 State#state{use_proxy = true, +			     proxy_auth_digest = Digest}} +	end, +    StreamTo = get_value(stream_to, Options, undefined), +    ReqId = make_req_id(), +    SaveResponseToFile = get_value(save_response_to_file, Options, false), +    NewReq = #request{url=Url,  +		      method=Method, +		      stream_to=StreamTo, +		      options=Options,  +		      req_id=ReqId, +		      save_response_to_file = SaveResponseToFile, +		      from=From}, +    Reqs = queue:in(NewReq, State#state.reqs), +    State_2 = check_ssl_options(Options, State_1#state{reqs = Reqs}), +    do_trace("Connecting...~n", []), +    Timeout_1 = case Timeout of +		    infinity -> +			infinity; +		    _ -> +			round(Timeout*0.9) +		end, +    case do_connect(Host_1, Port_1, Options, State_2, Timeout_1) of +	{ok, Sock} -> +	    Ref = case Timeout of +		      infinity -> +			  undefined; +		      _ -> +			  erlang:send_after(Timeout, self(), {req_timedout, From}) +		  end, +	    do_trace("Connected!~n", []), +	    case send_req_1(Url, Headers, Method, Body, Options, Sock, State_2) of +		ok -> +		    case StreamTo of +			undefined -> +			    ok; +			_ -> +			    gen_server:reply(From, {ibrowse_req_id, ReqId}) +		    end, +		    State_3 = inc_pipeline_counter(State_2#state{socket = Sock, +								 send_timer = Ref, +								 cur_req = NewReq, +								 status = get_header}), +		    {noreply, State_3}; +		Err -> +		    shutting_down(State_2), +		    do_trace("Send failed... Reason: ~p~n", [Err]), +		    gen_server:reply(From, {error, send_failed}), +		    {stop, normal, State_2} +	    end; +	Err -> +	    shutting_down(State_2), +	    do_trace("Error connecting. Reason: ~1000.p~n", [Err]), +	    gen_server:reply(From, {error, conn_failed}), +	    {stop, normal, State_2} +    end; + +%% Request which is to be pipelined +handle_call({send_req, {Url, Headers, Method, +			 Body, Options, Timeout}}, +	    From, +	    #state{socket=Sock, status=Status, reqs=Reqs}=State) -> +    do_trace("Recvd request in connected state. Status -> ~p NumPending: ~p~n", [Status, length(queue:to_list(Reqs))]), +    StreamTo = get_value(stream_to, Options, undefined), +    SaveResponseToFile = get_value(save_response_to_file, Options, false), +    ReqId = make_req_id(), +    NewReq = #request{url=Url,  +		      stream_to=StreamTo, +		      method=Method, +		      options=Options,  +		      req_id=ReqId, +		      save_response_to_file = SaveResponseToFile, +		      from=From}, +    State_1 = State#state{reqs=queue:in(NewReq, State#state.reqs)}, +    case send_req_1(Url, Headers, Method, Body, Options, Sock, State_1) of +	ok -> +	    State_2 = inc_pipeline_counter(State_1), +	    do_setopts(Sock, [{active, true}], State#state.is_ssl), +	    case Timeout of +		infinity -> +		    ok; +		_ -> +		    erlang:send_after(Timeout, self(), {req_timedout, From}) +	    end, +	    State_3 = case Status of +			  idle -> +			      State_2#state{status = get_header, +					    cur_req = NewReq}; +			  _ -> +			      State_2 +		      end, +	    case StreamTo of +		undefined -> +		    ok; +		_ -> +		    gen_server:reply(From, {ibrowse_req_id, ReqId}) +	    end, +	    {noreply, State_3}; +	Err -> +	    shutting_down(State_1), +	    do_trace("Send request failed: Reason: ~p~n", [Err]), +	    gen_server:reply(From, {error, send_failed}), +	    do_error_reply(State, send_failed), +	    {stop, normal, State_1} +    end; + +handle_call(stop, _From, #state{socket = Socket, is_ssl = Is_ssl} = State) -> +    do_close(Socket, Is_ssl), +    do_error_reply(State, closing_on_request), +    {stop, normal, State}; + +handle_call(Request, _From, State) -> +    Reply = {unknown_request, Request}, +    {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info({tcp, _Sock, Data}, State) -> +    handle_sock_data(Data, State); +handle_info({ssl, _Sock, Data}, State) -> +    handle_sock_data(Data, State); + +handle_info({tcp_closed, _Sock}, State) -> +    do_trace("TCP connection closed by peer!~n", []), +    handle_sock_closed(State), +    {stop, normal, State}; +handle_info({ssl_closed, _Sock}, State) -> +    do_trace("SSL connection closed by peer!~n", []), +    handle_sock_closed(State), +    {stop, normal, State}; + +handle_info({req_timedout, From}, State) -> +    case lists:keysearch(From, #request.from, queue:to_list(State#state.reqs)) of +       false -> +          {noreply, State}; +       {value, _} -> +          shutting_down(State), +          do_error_reply(State, req_timedout), +          {stop, normal, State} +    end; + +handle_info({trace, Bool}, State) -> +    put(my_trace_flag, Bool), +    {noreply, State}; + +handle_info(Info, State) -> +    io:format("Recvd unknown message ~p when in state: ~p~n", [Info, State]), +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, State) -> +    case State#state.socket of +	undefined -> +	    ok; +	Sock -> +	    do_close(Sock, State#state.is_ssl) +    end. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> +    {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Handles data recvd on the socket +%%-------------------------------------------------------------------- +handle_sock_data(Data, #state{status=idle}=State) -> +    do_trace("Data recvd on socket in state idle!. ~1000.p~n", [Data]), +    shutting_down(State), +    do_error_reply(State, data_in_status_idle), +    do_close(State#state.socket, State#state.is_ssl), +    {stop, normal, State}; + +handle_sock_data(Data, #state{status=get_header, socket=Sock}=State) -> +    case parse_response(Data, State) of +	{error, _Reason} -> +	    shutting_down(State), +	    {stop, normal, State}; +	stop -> +	    shutting_down(State), +	    {stop, normal, State}; +	State_1 -> +	    do_setopts(Sock, [{active, true}], State#state.is_ssl), +	    {noreply, State_1} +    end; + +handle_sock_data(Data, #state{status=get_body, content_length=CL, +			      http_status_code = StatCode, +			      recvd_headers=Headers,  +			      chunk_size=CSz, socket=Sock}=State) -> +    case (CL == undefined) and (CSz == undefined) of +	true -> +	    case accumulate_response(Data, State) of +		{error, Reason} -> +		    shutting_down(State), +		    fail_pipelined_requests(State,  +					    {error, {Reason, {stat_code, StatCode}, Headers}}), +		    {stop, normal, State}; +		State_1 -> +		    do_setopts(Sock, [{active, true}], State#state.is_ssl), +		    {noreply, State_1} +	    end; +	_ -> +	    case parse_11_response(Data, State) of +		{error, Reason} -> +		    shutting_down(State), +		    fail_pipelined_requests(State,  +					    {error, {Reason, {stat_code, StatCode}, Headers}}), +		    {stop, normal, State}; +		stop -> +		    shutting_down(State), +		    {stop, normal, State}; +		State_1 -> +		    do_setopts(Sock, [{active, true}], State#state.is_ssl), +		    {noreply, State_1} +	    end +    end. + +accumulate_response(Data, +		    #state{ +		      cur_req = #request{save_response_to_file = SaveResponseToFile, +					 tmp_file_fd = undefined} = CurReq, +		      http_status_code=[$2 | _]}=State) when SaveResponseToFile /= false -> +    TmpFilename = case SaveResponseToFile of +		      true -> make_tmp_filename(); +		      F -> F +		  end, +    case file:open(TmpFilename, [write, delayed_write, raw]) of +	{ok, Fd} -> +	    accumulate_response(Data, State#state{ +					cur_req = CurReq#request{ +						    tmp_file_fd = Fd, +						    tmp_file_name = TmpFilename}}); +	{error, Reason} -> +	    {error, {file_open_error, Reason}} +    end; +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = SaveResponseToFile, +						    tmp_file_fd = Fd}, +				 transfer_encoding=chunked, +				 chunks = Chunks, +				 http_status_code=[$2 | _] +				} = State) when SaveResponseToFile /= false -> +    case file:write(Fd, [Chunks | Data]) of +	ok -> +	    State#state{chunks = []}; +	{error, Reason} -> +	    {error, {file_write_error, Reason}} +    end; +accumulate_response(Data, #state{cur_req = #request{save_response_to_file = SaveResponseToFile, +						    tmp_file_fd = Fd}, +				 reply_buffer = RepBuf, +				 http_status_code=[$2 | _] +				} = State) when SaveResponseToFile /= false -> +    case file:write(Fd, [RepBuf | Data]) of +	ok -> +	    State#state{reply_buffer = []}; +	{error, Reason} -> +	    {error, {file_write_error, Reason}} +    end; +accumulate_response([], State) -> +    State; +accumulate_response(Data, #state{reply_buffer = RepBuf, +				 cur_req = CurReq}=State) -> +    #request{stream_to=StreamTo, req_id=ReqId} = CurReq, +    case StreamTo of +	undefined -> +	    State#state{reply_buffer = [Data | RepBuf]}; +	_ -> +	    do_interim_reply(StreamTo, ReqId, Data), +	    State +    end. + +make_tmp_filename() -> +    DownloadDir = ibrowse:get_config_value(download_dir, filename:absname("./")), +    {A,B,C} = now(), +    filename:join([DownloadDir, +		   "ibrowse_tmp_file_"++ +		   integer_to_list(A) ++ +		   integer_to_list(B) ++ +		   integer_to_list(C)]). + + +%%-------------------------------------------------------------------- +%% Handles the case when the server closes the socket +%%-------------------------------------------------------------------- +handle_sock_closed(#state{status=get_header}=State) -> +    shutting_down(State), +    do_error_reply(State, connection_closed); + +handle_sock_closed(#state{cur_req=undefined} = State) -> +    shutting_down(State); + +%% We check for IsClosing because this the server could have sent a  +%% Connection-Close header and has closed the socket to indicate end +%% of response. There maybe requests pipelined which need a response. +handle_sock_closed(#state{reply_buffer=Buf, reqs=Reqs, http_status_code=SC, +			  is_closing=IsClosing, +			  cur_req=#request{tmp_file_name=TmpFilename, +					   tmp_file_fd=Fd} = CurReq, +			  status=get_body, recvd_headers=Headers}=State) -> +    #request{from=From, stream_to=StreamTo, req_id=ReqId} = CurReq, +    case IsClosing of +	true -> +	    {_, Reqs_1} = queue:out(Reqs), +	    case TmpFilename of +		undefined -> +		    do_reply(State, From, StreamTo, ReqId, +			     {ok, SC, Headers, +			      lists:flatten(lists:reverse(Buf))}); +		_ -> +		    file:close(Fd), +		    do_reply(State, From, StreamTo, ReqId, +			     {ok, SC, Headers, {file, TmpFilename}}) +	    end, +	    do_error_reply(State#state{reqs = Reqs_1}, connection_closed), +	    State; +	_ -> +	    do_error_reply(State, connection_closed), +	    State +    end. + +do_connect(Host, Port, _Options, #state{is_ssl=true, ssl_options=SSLOptions}, Timeout) -> +    ssl:connect(Host, Port, [{nodelay, true}, {active, false} | SSLOptions], Timeout); +do_connect(Host, Port, _Options, _State, Timeout) -> +    gen_tcp:connect(Host, Port, [{nodelay, true}, {active, false}], Timeout). + +do_send(Sock, Req, true)  ->  ssl:send(Sock, Req); +do_send(Sock, Req, false) ->  gen_tcp:send(Sock, Req). + +%% @spec do_send_body(Sock::socket_descriptor(), Source::source_descriptor(), IsSSL::boolean()) -> ok | error() +%% source_descriptor() = fun_arity_0           | +%%                       {fun_arity_0}         | +%%                       {fun_arity_1, term()} +%% error() = term() +do_send_body(Sock, Source, IsSSL) when is_function(Source) -> +    do_send_body(Sock, {Source}, IsSSL); +do_send_body(Sock, {Source}, IsSSL) when is_function(Source) -> +    do_send_body1(Sock, Source, IsSSL, Source()); +do_send_body(Sock, {Source, State}, IsSSL) when is_function(Source) -> +    do_send_body1(Sock, Source, IsSSL, Source(State)); +do_send_body(Sock, Body, IsSSL) -> +    do_send(Sock, Body, IsSSL). + +do_send_body1(Sock, Source, IsSSL, Resp) -> +    case Resp of +	{ok, Data} -> +	    do_send(Sock, Data, IsSSL), +	    do_send_body(Sock, {Source}, IsSSL); +	{ok, Data, NewState} -> +	    do_send(Sock, Data, IsSSL), +	    do_send_body(Sock, {Source, NewState}, IsSSL); +	eof -> ok; +	Err -> Err +    end. + +do_close(Sock, true)  ->  ssl:close(Sock); +do_close(Sock, false) ->  gen_tcp:close(Sock). + +do_setopts(Sock, Opts, true)  ->  ssl:setopts(Sock, Opts); +do_setopts(Sock, Opts, false) ->  inet:setopts(Sock, Opts). + +check_ssl_options(Options, State) -> +    case get_value(is_ssl, Options, false) of +	false -> +	    State; +	true -> +	    State#state{is_ssl=true, ssl_options=get_value(ssl_options, Options)} +    end. + +send_req_1(#url{abspath = AbsPath, +		host = Host, +		port = Port,  +		path = RelPath} = Url, +	   Headers, Method, Body, Options, Sock, State) -> +    Headers_1 = add_auth_headers(Url, Options, Headers, State), +    HostHeaderValue = case lists:keysearch(host_header, 1, Options) of +			  false -> +			      case Port of +				  80 -> Host; +				  _ -> [Host, ":", integer_to_list(Port)] +			      end; +			  {value, {_, Host_h_val}} -> +			      Host_h_val +		      end, +    {Req, Body_1} = make_request(Method,  +				 [{"Host", HostHeaderValue} | Headers_1], +				 AbsPath, RelPath, Body, Options, State#state.use_proxy), +    case get(my_trace_flag) of  +	true -> +	    %%Avoid the binary operations if trace is not on... +	    NReq = binary_to_list(list_to_binary(Req)), +	    do_trace("Sending request: ~n" +		     "--- Request Begin ---~n~s~n" +		     "--- Request End ---~n", [NReq]); +	_ -> ok +    end, +    SndRes = case do_send(Sock, Req, State#state.is_ssl) of +		 ok -> do_send_body(Sock, Body_1, State#state.is_ssl); +		 Err ->  +		     io:format("Err: ~p~n", [Err]), +		     Err +	     end, +    do_setopts(Sock, [{active, true}], State#state.is_ssl), +    SndRes. + +add_auth_headers(#url{username = User, +		      password = UPw},  +		 Options, +		 Headers,  +		 #state{use_proxy = UseProxy, +		        proxy_auth_digest = ProxyAuthDigest}) -> +    Headers_1 = case User of +		    undefined -> +			case get_value(basic_auth, Options, undefined) of +			    undefined -> +				Headers; +			    {U,P} -> +				[{"Authorization", ["Basic ", http_auth_digest(U, P)]} | Headers] +			end; +		    _ -> +			[{"Authorization", ["Basic ", http_auth_digest(User, UPw)]} | Headers] +		end, +    case UseProxy of +	false -> +	    Headers_1; +	true when ProxyAuthDigest == [] -> +	    Headers_1; +	true -> +	    [{"Proxy-Authorization", ["Basic ", ProxyAuthDigest]} | Headers_1] +    end. +			 +http_auth_digest([], []) -> +    []; +http_auth_digest(Username, Password) -> +    encode_base64(Username ++ [$: | Password]). + +encode_base64([]) -> +    []; +encode_base64([A]) -> +    [e(A bsr 2), e((A band 3) bsl 4), $=, $=]; +encode_base64([A,B]) -> +    [e(A bsr 2), e(((A band 3) bsl 4) bor (B bsr 4)), e((B band 15) bsl 2), $=]; +encode_base64([A,B,C|Ls]) -> +    encode_base64_do(A,B,C, Ls). +encode_base64_do(A,B,C, Rest) -> +    BB = (A bsl 16) bor (B bsl 8) bor C, +    [e(BB bsr 18), e((BB bsr 12) band 63),  +     e((BB bsr 6) band 63), e(BB band 63)|encode_base64(Rest)]. + +e(X) when X >= 0, X < 26 -> X+65; +e(X) when X>25, X<52     -> X+71; +e(X) when X>51, X<62     -> X-4; +e(62)                    -> $+; +e(63)                    -> $/; +e(X)                     -> exit({bad_encode_base64_token, X}). + +make_request(Method, Headers, AbsPath, RelPath, Body, Options, UseProxy) -> +    HttpVsn = http_vsn_string(get_value(http_vsn, Options, {1,1})), +    Headers_1 = +	case get_value(content_length, Headers, false) of +	    false when (Body == []) or +	               (Body == <<>>) or +	               is_tuple(Body) or +	               is_function(Body) -> +		Headers; +	    false when is_binary(Body) -> +		[{"content-length", integer_to_list(size(Body))} | Headers]; +	    false -> +		[{"content-length", integer_to_list(length(Body))} | Headers]; +	    _ -> +		Headers +	end, +    {Headers_2, Body_1} =  +	case get_value(transfer_encoding, Options, false) of +	    false -> +		{Headers_1, Body}; +	    {chunked, ChunkSize} -> +		{[{X, Y} || {X, Y} <- Headers_1,  +			    X /= "Content-Length", +			    X /= "content-length", +			    X /= content_length] ++ +		 [{"Transfer-Encoding", "chunked"}], +		 chunk_request_body(Body, ChunkSize)} +	end, +    Headers_3 = cons_headers(Headers_2), +    Uri = case get_value(use_absolute_uri, Options, false) or UseProxy of +	      true -> +		  AbsPath; +	      false ->  +		  RelPath +	  end, +    {[method(Method), " ", Uri, " ", HttpVsn, crnl(), Headers_3, crnl()], Body_1}. + +http_vsn_string({0,9}) -> "HTTP/0.9"; +http_vsn_string({1,0}) -> "HTTP/1.0"; +http_vsn_string({1,1}) -> "HTTP/1.1". + +cons_headers(Headers) -> +    cons_headers(Headers, []). +cons_headers([], Acc) -> +    encode_headers(Acc); +cons_headers([{basic_auth, {U,P}} | T], Acc) -> +    cons_headers(T, [{"Authorization", +		      ["Basic ", ibrowse_lib:encode_base64(U++":"++P)]} | Acc]); +cons_headers([{cookie, Cookie} | T], Acc) -> +    cons_headers(T, [{"Cookie", Cookie} | Acc]); +cons_headers([{content_length, L} | T], Acc) -> +    cons_headers(T, [{"Content-Length", L} | Acc]); +cons_headers([{content_type, L} | T], Acc) -> +    cons_headers(T, [{"Content-Type", L} | Acc]); +cons_headers([H | T], Acc) -> +    cons_headers(T, [H | Acc]). + +encode_headers(L) -> +    encode_headers(L, []). +encode_headers([{http_vsn, _Val} | T], Acc) -> +    encode_headers(T, Acc); +encode_headers([{Name,Val} | T], Acc) when list(Name) -> +    encode_headers(T, [[Name, ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([{Name,Val} | T], Acc) when atom(Name) -> +    encode_headers(T, [[atom_to_list(Name), ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([], Acc) -> +    lists:reverse(Acc). + +chunk_request_body(Body, ChunkSize) -> +    chunk_request_body(Body, ChunkSize, []). + +chunk_request_body(Body, _ChunkSize, Acc) when Body == <<>>; Body == [] -> +    LastChunk = "0\r\n", +    lists:reverse(["\r\n", LastChunk | Acc]); +chunk_request_body(Body, ChunkSize, Acc) when binary(Body), +                                              size(Body) >= ChunkSize -> +    <<ChunkBody:ChunkSize/binary, Rest/binary>> = Body, +    Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", +	     ChunkBody, "\r\n"], +    chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); +chunk_request_body(Body, _ChunkSize, Acc) when binary(Body) -> +    BodySize = size(Body), +    Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", +	     Body, "\r\n"], +    LastChunk = "0\r\n", +    lists:reverse(["\r\n", LastChunk, Chunk | Acc]); +chunk_request_body(Body, ChunkSize, Acc) when list(Body), +                                              length(Body) >= ChunkSize -> +    {ChunkBody, Rest} = split_list_at(Body, ChunkSize), +    Chunk = [ibrowse_lib:dec2hex(4, ChunkSize),"\r\n", +	     ChunkBody, "\r\n"], +    chunk_request_body(Rest, ChunkSize, [Chunk | Acc]); +chunk_request_body(Body, _ChunkSize, Acc) when list(Body) -> +    BodySize = length(Body), +    Chunk = [ibrowse_lib:dec2hex(4, BodySize),"\r\n", +	     Body, "\r\n"], +    LastChunk = "0\r\n", +    lists:reverse(["\r\n", LastChunk, Chunk | Acc]). + + +parse_response(_Data, #state{cur_req = undefined}=State) -> +    State#state{status = idle}; +parse_response(Data, #state{reply_buffer=Acc, reqs=Reqs, +			    cur_req=CurReq}=State) -> +    #request{from=From, stream_to=StreamTo, req_id=ReqId, +	     method=Method} = CurReq, +    MaxHeaderSize = ibrowse:get_config_value(max_headers_size, infinity), +    case scan_header(Data, Acc) of +	{yes, Headers, Data_1}  -> +	    do_trace("Recvd Header Data -> ~s~n----~n", [Headers]), +	    do_trace("Recvd headers~n--- Headers Begin ---~n~s~n--- Headers End ---~n~n", [Headers]), +	    {HttpVsn, StatCode, Headers_1} = parse_headers(Headers), +	    do_trace("HttpVsn: ~p StatusCode: ~p Headers_1 -> ~1000.p~n", [HttpVsn, StatCode, Headers_1]), +	    LCHeaders = [{to_lower(X), Y} || {X,Y} <- Headers_1], +	    ConnClose = to_lower(get_value("connection", LCHeaders, "false")), +	    IsClosing = is_connection_closing(HttpVsn, ConnClose), +	    case IsClosing of +		true -> +                    shutting_down(State); +		false -> +		    ok +	    end, +	    State_1 = State#state{recvd_headers=Headers_1, status=get_body,  +				  http_status_code=StatCode, is_closing=IsClosing}, +	    put(conn_close, ConnClose), +	    TransferEncoding = to_lower(get_value("transfer-encoding", LCHeaders, "false")), +	    case get_value("content-length", LCHeaders, undefined) of +		_ when Method == head -> +		    {_, Reqs_1} = queue:out(Reqs), +		    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +		    State_1_1 = do_reply(State_1, From, StreamTo, ReqId, {ok, StatCode, Headers_1, []}), +		    cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), +		    State_2 = reset_state(State_1_1), +		    State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), +		    parse_response(Data_1, State_3); +		_ when hd(StatCode) == $1 -> +		    %% No message body is expected. Server may send +		    %% one or more 1XX responses before a proper +		    %% response. +		    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +		    do_trace("Recvd a status code of ~p. Ignoring and waiting for a proper response~n", [StatCode]), +		    parse_response(Data_1, State_1#state{recvd_headers = [], +							 status = get_header}); +		_ when StatCode == "204"; +		       StatCode == "304" -> +		    %% No message body is expected for these Status Codes. +		    %% RFC2616 - Sec 4.4 +		    {_, Reqs_1} = queue:out(Reqs), +		    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +		    State_1_1 = do_reply(State_1, From, StreamTo, ReqId, {ok, StatCode, Headers_1, []}), +		    cancel_timer(State_1_1#state.send_timer, {eat_message, {req_timedout, From}}), +		    State_2 = reset_state(State_1_1), +		    State_3 = set_cur_request(State_2#state{reqs = Reqs_1}), +		    parse_response(Data_1, State_3); +		_ when TransferEncoding == "chunked" -> +		    do_trace("Chunked encoding detected...~n",[]), +		    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +		    case parse_11_response(Data_1, State_1#state{transfer_encoding=chunked, +								 chunk_size=chunk_start, +								 reply_buffer=[], chunks=[]}) of +			{error, Reason} -> +			    fail_pipelined_requests(State_1,  +						    {error, {Reason, +							     {stat_code, StatCode}, Headers_1}}), +			    {error, Reason}; +			State_2 -> +			    State_2 +		    end; +		undefined when HttpVsn == "HTTP/1.0"; +			       ConnClose == "close" -> +		    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +		    State_1#state{reply_buffer=[Data_1]}; +		undefined -> +		    fail_pipelined_requests(State_1,  +					    {error, {content_length_undefined, +						     {stat_code, StatCode}, Headers}}), +		    {error, content_length_undefined}; +		V -> +		    case catch list_to_integer(V) of +			V_1 when integer(V_1), V_1 >= 0 -> +			    send_async_headers(ReqId, StreamTo, StatCode, Headers_1), +			    do_trace("Recvd Content-Length of ~p~n", [V_1]), +			    State_2 = State_1#state{rep_buf_size=0, +						    reply_buffer=[], +						    content_length=V_1}, +			    case parse_11_response(Data_1, State_2) of +				{error, Reason} -> +				    fail_pipelined_requests(State_1,  +							    {error, {Reason, +								     {stat_code, StatCode}, Headers_1}}), +				    {error, Reason}; +				State_3 -> +				    State_3 +			    end; +			_ -> +			    fail_pipelined_requests(State_1,  +					    {error, {content_length_undefined, +						     {stat_code, StatCode}, Headers}}), +			    {error, content_length_undefined} +		    end +	    end; +	{no, Acc_1} when MaxHeaderSize == infinity -> +	    State#state{reply_buffer=Acc_1}; +	{no, Acc_1} when length(Acc_1) < MaxHeaderSize -> +	    State#state{reply_buffer=Acc_1}; +	{no, _Acc_1} -> +	    fail_pipelined_requests(State, {error, max_headers_size_exceeded}), +	    {error, max_headers_size_exceeded} +    end. + +is_connection_closing("HTTP/0.9", _)       -> true; +is_connection_closing(_, "close")          -> true; +is_connection_closing("HTTP/1.0", "false") -> true; +is_connection_closing(_, _)                -> false. + +%% This clause determines the chunk size when given data from the beginning of the chunk +parse_11_response(DataRecvd,  +		  #state{transfer_encoding=chunked, +			 chunk_size=chunk_start, +			 cur_req=CurReq, +			 reply_buffer=Buf}=State) -> +    case scan_crlf(DataRecvd, Buf) of +	{yes, ChunkHeader, Data_1} -> +	    case parse_chunk_header(ChunkHeader) of +		{error, Reason} -> +		    {error, Reason}; +		ChunkSize -> +		    #request{stream_to=StreamTo, req_id=ReqId} = CurReq, +		    %% +		    %% Do we have to preserve the chunk encoding when streaming? +		    %% +		    do_interim_reply(StreamTo, ReqId, {chunk_start, ChunkSize}), +		    RemLen = length(Data_1), +		    do_trace("Determined chunk size: ~p. Already recvd: ~p~n", [ChunkSize, RemLen]), +		    parse_11_response(Data_1, State#state{rep_buf_size=0,  +							  reply_buffer=[], +							  deleted_crlf=true, +							  chunk_size=ChunkSize}) +	    end; +	{no, Data_1} -> +	    State#state{reply_buffer=Data_1, rep_buf_size=length(Data_1)} +    end; + +%% This clause is there to remove the CRLF between two chunks +%%  +parse_11_response(DataRecvd,  +		  #state{transfer_encoding=chunked, +			 chunk_size=tbd, +			 chunks = Chunks, +			 cur_req=CurReq, +			 reply_buffer=Buf}=State) -> +    case scan_crlf(DataRecvd, Buf) of +	{yes, _, NextChunk} -> +	    #request{stream_to=StreamTo, req_id=ReqId} = CurReq, +	    %% +	    %% Do we have to preserve the chunk encoding when streaming? +	    %% +	    State_1 = State#state{chunk_size=chunk_start, +				  rep_buf_size=0,  +				  reply_buffer=[], +				  deleted_crlf=true}, +	    State_2 = case StreamTo of +			  undefined -> +			      State_1#state{chunks = [Buf | Chunks]}; +		_ -> +			      do_interim_reply(StreamTo, ReqId, chunk_end), +			      State_1 +		      end, +	    parse_11_response(NextChunk, State_2); +	{no, Data_1} -> +	    State#state{reply_buffer=Data_1, rep_buf_size=length(Data_1)} +    end; + +%% This clause deals with the end of a chunked transfer +parse_11_response(DataRecvd,  +		  #state{transfer_encoding=chunked, chunk_size=0, +			 cur_req=CurReq, +			 deleted_crlf = DelCrlf, +			 reply_buffer=Trailer, reqs=Reqs}=State) -> +    do_trace("Detected end of chunked transfer...~n", []), +    DataRecvd_1 = case DelCrlf of +		      false ->  +			  DataRecvd; +		      true -> +			  [$\r, $\n | DataRecvd] +		  end, +    #request{stream_to=StreamTo, req_id=ReqId} = CurReq, +    case scan_header(DataRecvd_1, Trailer) of +	{yes, _TEHeaders, Rem} -> +	    {_, Reqs_1} = queue:out(Reqs), +	    %% +	    %% Do we have to preserve the chunk encoding when streaming? +	    %% +	    do_interim_reply(StreamTo, ReqId, chunk_end), +	    State_1 = handle_response(CurReq, State#state{reqs=Reqs_1}), +	    parse_response(Rem, reset_state(State_1)); +	{no, Rem} -> +	    State#state{reply_buffer=Rem, rep_buf_size=length(Rem), deleted_crlf=false} +    end; + +%% This clause extracts a chunk, given the size. +parse_11_response(DataRecvd,  +		  #state{transfer_encoding=chunked, chunk_size=CSz, +			 rep_buf_size=RepBufSz}=State) -> +    NeedBytes = CSz - RepBufSz, +    DataLen = length(DataRecvd), +    do_trace("Recvd more data: size: ~p. NeedBytes: ~p~n", [DataLen, NeedBytes]), +    case DataLen >= NeedBytes of +	true -> +	    {RemChunk, RemData} = split_list_at(DataRecvd, NeedBytes), +	    do_trace("Recvd another chunk...~n", []), +	    do_trace("RemData -> ~p~n", [RemData]), +	    case accumulate_response(RemChunk, State) of +		{error, Reason} -> +		    do_trace("Error accumulating response --> ~p~n", [Reason]), +		    {error, Reason}; +		#state{reply_buffer = NewRepBuf, +		       chunks = NewChunks} = State_1 -> +		    State_2 = State_1#state{reply_buffer=[], +					    chunks = [lists:reverse(NewRepBuf) | NewChunks], +					    rep_buf_size=0, +					    chunk_size=tbd}, +		    parse_11_response(RemData, State_2) +	    end; +	false -> +	    accumulate_response(DataRecvd, State#state{rep_buf_size=RepBufSz + DataLen}) +    end; + +%% This clause to extract the body when Content-Length is specified +parse_11_response(DataRecvd,  +		  #state{content_length=CL, rep_buf_size=RepBufSz,  +			 reqs=Reqs}=State) -> +    NeedBytes = CL - RepBufSz, +    DataLen = length(DataRecvd), +    case DataLen >= NeedBytes of +	true -> +	    {RemBody, Rem} = split_list_at(DataRecvd, NeedBytes), +	    {_, Reqs_1} = queue:out(Reqs), +	    State_1 = accumulate_response(RemBody, State), +	    State_2 = handle_response(State_1#state.cur_req, State_1#state{reqs=Reqs_1}), +	    State_3 = reset_state(State_2), +	    parse_response(Rem, State_3); +	false -> +	    accumulate_response(DataRecvd, State#state{rep_buf_size=RepBufSz+DataLen}) +    end. + +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId, +			 save_response_to_file = SaveResponseToFile,  +			 tmp_file_name = TmpFilename, +			 tmp_file_fd = Fd +			}, +		#state{http_status_code = SCode, +		       send_timer = ReqTimer, +		       reply_buffer = RepBuf, +		       transfer_encoding = TEnc, +		       chunks = Chunks, +		       recvd_headers = RespHeaders}=State) when SaveResponseToFile /= false -> +    Body = case TEnc of +	       chunked -> +		   lists:flatten(lists:reverse(Chunks)); +	       _ -> +		   lists:flatten(lists:reverse(RepBuf)) +	   end, +    State_1 = set_cur_request(State), +    file:close(Fd), +    ResponseBody = case TmpFilename of +		       undefined -> +			   Body; +		       _ -> +			   {file, TmpFilename} +		   end, +    State_2 = do_reply(State_1, From, StreamTo, ReqId, {ok, SCode, RespHeaders, ResponseBody}), +    cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), +    State_2; +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId}, +		#state{http_status_code=SCode, recvd_headers=RespHeaders, +		       reply_buffer=RepBuf, transfer_encoding=TEnc, +		       chunks=Chunks, send_timer=ReqTimer}=State) -> +    Body = case TEnc of +	       chunked -> +		   lists:flatten(lists:reverse(Chunks)); +	       _ -> +		   lists:flatten(lists:reverse(RepBuf)) +	   end, +    State_1 = set_cur_request(State), +    case get(conn_close) of +	"close" -> +	    do_reply(State_1, From, StreamTo, ReqId, {ok, SCode, RespHeaders, Body}), +	    exit(normal); +	_ -> +	    State_2 = do_reply(State_1, From, StreamTo, ReqId, {ok, SCode, RespHeaders, Body}), +	    cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), +	    State_2 +    end. + +reset_state(State) -> +    State#state{status=get_header, rep_buf_size=0,content_length=undefined, +		reply_buffer=[], chunks=[], recvd_headers=[], deleted_crlf=false, +		http_status_code=undefined, chunk_size=undefined, transfer_encoding=undefined}. + +set_cur_request(#state{reqs = Reqs} = State) -> +    case queue:to_list(Reqs) of +	[] -> +	    State#state{cur_req = undefined}; +	[NextReq | _] -> +	    State#state{cur_req = NextReq} +    end. + +parse_headers(Headers) -> +    case scan_crlf(Headers, []) of +	{yes, StatusLine, T} -> +	    Headers_1 = parse_headers_1(T), +	    case parse_status_line(StatusLine) of +		{ok, HttpVsn, StatCode, _Msg} -> +		    put(http_prot_vsn, HttpVsn), +		    {HttpVsn, StatCode, Headers_1}; +		_ -> %% A HTTP 0.9 response? +		    put(http_prot_vsn, "HTTP/0.9"), +		    {"HTTP/0.9", undefined, Headers} +	    end; +	_ -> +	    {error, no_status_line} +    end. + +% From RFC 2616 +% +%    HTTP/1.1 header field values can be folded onto multiple lines if +%    the continuation line begins with a space or horizontal tab. All +%    linear white space, including folding, has the same semantics as +%    SP. A recipient MAY replace any linear white space with a single +%    SP before interpreting the field value or forwarding the message +%    downstream. +parse_headers_1(String) -> +    parse_headers_1(String, [], []). + +parse_headers_1([$\n, H |T], [$\r | L], Acc) when H == 32; +						  H == $\t ->  +    parse_headers_1(lists:dropwhile(fun(X) -> +					    is_whitespace(X) +				    end, T), [32 | L], Acc); +parse_headers_1([$\n|T], [$\r | L], Acc) ->  +    case parse_header(lists:reverse(L)) of +	invalid -> +	    parse_headers_1(T, [], Acc); +	NewHeader -> +	    parse_headers_1(T, [], [NewHeader | Acc]) +    end; +parse_headers_1([H|T],  L, Acc) ->  +    parse_headers_1(T, [H|L], Acc); +parse_headers_1([], [], Acc) -> +    lists:reverse(Acc); +parse_headers_1([], L, Acc) -> +    Acc_1 = case parse_header(lists:reverse(L)) of +		invalid -> +		    Acc; +		NewHeader -> +		    [NewHeader | Acc] +	    end, +    lists:reverse(Acc_1). + +parse_status_line(Line) -> +    parse_status_line(Line, get_prot_vsn, [], []). +parse_status_line([32 | T], get_prot_vsn, ProtVsn, StatCode) -> +    parse_status_line(T, get_status_code, ProtVsn, StatCode); +parse_status_line([32 | T], get_status_code, ProtVsn, StatCode) -> +    {ok, lists:reverse(ProtVsn), lists:reverse(StatCode), T}; +parse_status_line([H | T], get_prot_vsn, ProtVsn, StatCode) -> +    parse_status_line(T, get_prot_vsn, [H|ProtVsn], StatCode); +parse_status_line([H | T], get_status_code, ProtVsn, StatCode) -> +    parse_status_line(T, get_status_code, ProtVsn, [H | StatCode]); +parse_status_line([], _, _, _) -> +    http_09. + +parse_header(L) -> +    parse_header(L, []). +parse_header([$: | V], Acc) -> +    {lists:reverse(Acc), string:strip(V)}; +parse_header([H | T], Acc) -> +    parse_header(T, [H | Acc]); +parse_header([], _) -> +    invalid. + +scan_header([$\n|T], [$\r,$\n,$\r|L]) -> {yes, lists:reverse([$\n,$\r| L]), T}; +scan_header([H|T],  L)                -> scan_header(T, [H|L]); +scan_header([], L)                    -> {no, L}. + +scan_crlf([$\n|T], [$\r | L]) -> {yes, lists:reverse(L), T}; +scan_crlf([H|T],  L)          -> scan_crlf(T, [H|L]); +scan_crlf([], L)              -> {no, L}. + +fmt_val(L) when list(L)    -> L; +fmt_val(I) when integer(I) -> integer_to_list(I); +fmt_val(A) when atom(A)    -> atom_to_list(A); +fmt_val(Term)              -> io_lib:format("~p", [Term]). + +crnl() -> "\r\n". + +method(get)       -> "GET"; +method(post)      -> "POST"; +method(head)      -> "HEAD"; +method(options)   -> "OPTIONS"; +method(put)       -> "PUT"; +method(delete)    -> "DELETE"; +method(trace)     -> "TRACE"; +method(mkcol)     -> "MKCOL"; +method(propfind)  -> "PROPFIND"; +method(proppatch) -> "PROPPATCH"; +method(lock)      -> "LOCK"; +method(unlock)    -> "UNLOCK"; +method(move)      -> "MOVE"; +method(copy)      -> "COPY". + +%% From RFC 2616 +%% +% The chunked encoding modifies the body of a message in order to +% transfer it as a series of chunks, each with its own size indicator, +% followed by an OPTIONAL trailer containing entity-header +% fields. This allows dynamically produced content to be transferred +% along with the information necessary for the recipient to verify +% that it has received the full message. +% 	Chunked-Body = 	*chunk +% 			last-chunk +% 			trailer +% 			CRLF +% 	chunk = chunk-size [ chunk-extension ] CRLF +% 		chunk-data CRLF +% 	chunk-size = 1*HEX +% 	last-chunk = 1*("0") [ chunk-extension ] CRLF +% 	chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +% 	chunk-ext-name = token +% 	chunk-ext-val = token | quoted-string +% 	chunk-data = chunk-size(OCTET) +% 	trailer = *(entity-header CRLF) +% The chunk-size field is a string of hex digits indicating the size +% of the chunk. The chunked encoding is ended by any chunk whose size +% is zero, followed by the trailer, which is terminated by an empty +% line. +%% +%% The parsing implemented here discards all chunk extensions. It also +%% strips trailing spaces from the chunk size fields as Apache 1.3.27 was +%% sending them. +parse_chunk_header([]) -> +    throw({error, invalid_chunk_size}); +parse_chunk_header(ChunkHeader) -> +    parse_chunk_header(ChunkHeader, []). + +parse_chunk_header([$; | _], Acc) -> +    hexlist_to_integer(lists:reverse(Acc)); +parse_chunk_header([H | T], Acc) -> +    case is_whitespace(H) of +	true -> +	    parse_chunk_header(T, Acc); +	false -> +	    parse_chunk_header(T, [H | Acc]) +    end; +parse_chunk_header([], Acc) -> +    hexlist_to_integer(lists:reverse(Acc)). + +is_whitespace(32)  -> true; +is_whitespace($\r) -> true; +is_whitespace($\n) -> true; +is_whitespace($\t) -> true; +is_whitespace(_)   -> false. + + +send_async_headers(_ReqId, undefined, _StatCode, _Headers) -> +    ok; +send_async_headers(ReqId, StreamTo, StatCode, Headers) -> +    catch StreamTo ! {ibrowse_async_headers, ReqId, StatCode, Headers}. + +do_reply(State, From, undefined, _, Msg) -> +    gen_server:reply(From, Msg), +    dec_pipeline_counter(State); +do_reply(State, _From, StreamTo, ReqId, {ok, _, _, _}) -> +    State_1 = dec_pipeline_counter(State), +    catch StreamTo ! {ibrowse_async_response_end, ReqId}, +    State_1; +do_reply(State, _From, StreamTo, ReqId, Msg) -> +    State_1 = dec_pipeline_counter(State), +    catch StreamTo ! {ibrowse_async_response, ReqId, Msg}, +    State_1. + +do_interim_reply(undefined, _ReqId, _Msg) -> +    ok; +do_interim_reply(StreamTo, ReqId, Msg) -> +    catch StreamTo ! {ibrowse_async_response, ReqId, Msg}. + +do_error_reply(#state{reqs = Reqs} = State, Err) -> +    ReqList = queue:to_list(Reqs), +    lists:foreach(fun(#request{from=From, stream_to=StreamTo, req_id=ReqId}) -> +                          do_reply(State, From, StreamTo, ReqId, {error, Err}) +		  end, ReqList). + +fail_pipelined_requests(#state{reqs = Reqs, cur_req = CurReq} = State, Reply) -> +    {_, Reqs_1} = queue:out(Reqs), +    #request{from=From, stream_to=StreamTo, req_id=ReqId} = CurReq, +    do_reply(State, From, StreamTo, ReqId, Reply), +    do_error_reply(State#state{reqs = Reqs_1}, previous_request_failed). + + +split_list_at(List, N) -> +    split_list_at(List, N, []). +split_list_at([], _, Acc) -> +    {lists:reverse(Acc), []}; +split_list_at(List2, 0, List1) -> +    {lists:reverse(List1), List2}; +split_list_at([H | List2], N, List1) -> +    split_list_at(List2, N-1, [H | List1]). + +hexlist_to_integer(List) -> +    hexlist_to_integer(lists:reverse(List), 1, 0). +hexlist_to_integer([H | T], Multiplier, Acc) -> +    hexlist_to_integer(T, Multiplier*16, Multiplier*to_ascii(H) + Acc); +hexlist_to_integer([], _, Acc) -> +    Acc. + +to_ascii($A) -> 10; +to_ascii($a) -> 10; +to_ascii($B) -> 11; +to_ascii($b) -> 11; +to_ascii($C) -> 12; +to_ascii($c) -> 12; +to_ascii($D) -> 13; +to_ascii($d) -> 13; +to_ascii($E) -> 14; +to_ascii($e) -> 14; +to_ascii($F) -> 15; +to_ascii($f) -> 15; +to_ascii($1) -> 1; +to_ascii($2) -> 2; +to_ascii($3) -> 3; +to_ascii($4) -> 4; +to_ascii($5) -> 5; +to_ascii($6) -> 6; +to_ascii($7) -> 7; +to_ascii($8) -> 8; +to_ascii($9) -> 9; +to_ascii($0) -> 0. + +cancel_timer(undefined) -> ok; +cancel_timer(Ref)       -> erlang:cancel_timer(Ref). + +cancel_timer(Ref, {eat_message, Msg}) -> +    cancel_timer(Ref), +    receive  +	Msg -> +	    ok +    after 0 -> +	    ok +    end. + +make_req_id() -> +    now(). + +to_lower(Str) -> +    to_lower(Str, []). +to_lower([H|T], Acc) when H >= $A, H =< $Z -> +    to_lower(T, [H+32|Acc]); +to_lower([H|T], Acc) -> +    to_lower(T, [H|Acc]); +to_lower([], Acc) -> +    lists:reverse(Acc). + +shutting_down(#state{lb_ets_tid = undefined}) -> +    ok; +shutting_down(#state{lb_ets_tid = Tid, +		     cur_pipeline_size = Sz}) -> +    catch ets:delete(Tid, {Sz, self()}). + +inc_pipeline_counter(#state{is_closing = true} = State) -> +    State; +inc_pipeline_counter(#state{cur_pipeline_size = Pipe_sz} = State) -> +    State#state{cur_pipeline_size = Pipe_sz + 1}. + +dec_pipeline_counter(#state{is_closing = true} = State) -> +    State; +dec_pipeline_counter(#state{lb_ets_tid = undefined} = State) -> +    State; +dec_pipeline_counter(#state{cur_pipeline_size = Pipe_sz, +			    lb_ets_tid = Tid} = State) -> +    ets:delete(Tid, {Pipe_sz, self()}), +    ets:insert(Tid, {{Pipe_sz - 1, self()}, []}), +    State#state{cur_pipeline_size = Pipe_sz - 1}. diff --git a/src/ibrowse/ibrowse_lb.erl b/src/ibrowse/ibrowse_lb.erl new file mode 100644 index 00000000..03dc4e02 --- /dev/null +++ b/src/ibrowse/ibrowse_lb.erl @@ -0,0 +1,195 @@ +%%%------------------------------------------------------------------- +%%% File    : ibrowse_lb.erl +%%% Author  : chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description :  +%%% +%%% Created :  6 Mar 2008 by chandru <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_lb). + +-vsn('$Id: ibrowse_lb.erl,v 1.1 2008/03/27 01:36:21 chandrusf Exp $ '). +-author(chandru). +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([ +	 start_link/1, +	 spawn_connection/5 +	]). + +%% gen_server callbacks +-export([ +	 init/1, +	 handle_call/3, +	 handle_cast/2, +	 handle_info/2, +	 terminate/2, +	 code_change/3 +	]). + +-record(state, {parent_pid, +		ets_tid, +		host, +		port, +		max_sessions, +		max_pipeline_size, +		num_cur_sessions = 0}). + +-import(ibrowse_lib, [ +		      parse_url/1, +		      printable_date/0, +		      get_value/3 +		     ]). +		       + +-include("ibrowse.hrl"). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link(Args) -> +    gen_server:start_link(?MODULE, Args, []). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State}          | +%%          {ok, State, Timeout} | +%%          ignore               | +%%          {stop, Reason} +%%-------------------------------------------------------------------- +init([Host, Port]) -> +    process_flag(trap_exit, true), +    Max_sessions = ibrowse:get_config_value({max_sessions, Host, Port}, 10), +    Max_pipe_sz = ibrowse:get_config_value({max_pipeline_size, Host, Port}, 10), +    put(my_trace_flag, ibrowse_lib:get_trace_status(Host, Port)), +    put(ibrowse_trace_token, ["LB: ", Host, $:, integer_to_list(Port)]), +    Tid = ets:new(ibrowse_lb, [public, ordered_set]), +    {ok, #state{parent_pid = whereis(ibrowse), +		host = Host, +		port = Port, +		ets_tid = Tid, +		max_pipeline_size = Max_pipe_sz, +	        max_sessions = Max_sessions}}. + +spawn_connection(Lb_pid, Url, +		 Max_sessions, +		 Max_pipeline_size, +		 SSL_options) +  when is_pid(Lb_pid), +       is_record(Url, url), +       is_integer(Max_pipeline_size), +       is_integer(Max_sessions) -> +    gen_server:call(Lb_pid, +		    {spawn_connection, Url, Max_sessions, Max_pipeline_size, SSL_options}). +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State}          | +%%          {reply, Reply, State, Timeout} | +%%          {noreply, State}               | +%%          {noreply, State, Timeout}      | +%%          {stop, Reason, Reply, State}   | (terminate/2 is called) +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +% handle_call({spawn_connection, _Url, Max_sess, Max_pipe, _}, _From, +% 	    #state{max_sessions = Max_sess, +% 		   ets_tid = Tid, +% 		   max_pipeline_size = Max_pipe_sz, +% 		   num_cur_sessions = Num} = State)  +%     when Num >= Max -> +%     Reply = find_best_connection(Tid), +%     {reply, sorry_dude_reuse, State}; + +%% Update max_sessions in #state with supplied value +handle_call({spawn_connection, _Url, Max_sess, Max_pipe, _}, _From, +	    #state{ets_tid = Tid, +		   num_cur_sessions = Num} = State)  +    when Num >= Max_sess -> +    Reply = find_best_connection(Tid, Max_pipe), +    {reply, Reply, State#state{max_sessions = Max_sess}}; + +handle_call({spawn_connection, Url, _Max_sess, _Max_pipe, SSL_options}, _From, +	    #state{num_cur_sessions = Cur, +		   ets_tid = Tid} = State) -> +    {ok, Pid} = ibrowse_http_client:start_link({Tid, Url, SSL_options}), +    ets:insert(Tid, {{1, Pid}, []}), +    {reply, {ok, Pid}, State#state{num_cur_sessions = Cur + 1}}; + +handle_call(Request, _From, State) -> +    Reply = {unknown_request, Request}, +    {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State}          | +%%          {noreply, State, Timeout} | +%%          {stop, Reason, State}            (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_info({'EXIT', Parent, _Reason}, #state{parent_pid = Parent} = State) -> +    {stop, normal, State}; + +handle_info({'EXIT', Pid, _Reason}, +	    #state{num_cur_sessions = Cur, +		   ets_tid = Tid} = State) -> +    ets:match_delete(Tid, {{'_', Pid}, '_'}), +    {noreply, State#state{num_cur_sessions = Cur - 1}}; + +handle_info({trace, Bool}, State) -> +    put(my_trace_flag, Bool), +    {noreply, State}; + +handle_info(_Info, State) -> +    {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> +    ok. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> +    {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- +find_best_connection(Tid, Max_pipe) -> +    case ets:first(Tid) of +	{Cur_sz, Pid} when Cur_sz < Max_pipe -> +	    ets:delete(Tid, {Cur_sz, Pid}), +	    ets:insert(Tid, {{Cur_sz + 1, Pid}, []}), +	    {ok, Pid}; +	_ -> +	    {error, retry_later} +    end. diff --git a/src/ibrowse/ibrowse_lib.erl b/src/ibrowse/ibrowse_lib.erl new file mode 100644 index 00000000..67c5eee2 --- /dev/null +++ b/src/ibrowse/ibrowse_lib.erl @@ -0,0 +1,399 @@ +%%% File    : ibrowse_lib.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description :  +%%% Created : 27 Feb 2004 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%% @doc Module with a few useful functions + +-module(ibrowse_lib). +-vsn('$Id: ibrowse_lib.erl,v 1.6 2008/03/27 01:35:50 chandrusf Exp $ '). +-author('chandru'). +-ifdef(debug). +-compile(export_all). +-endif. + +-include("ibrowse.hrl"). + +-export([ +	 get_trace_status/2, +	 do_trace/2, +	 do_trace/3, +	 url_encode/1, +	 decode_rfc822_date/1, +	 status_code/1, +	 dec2hex/2, +	 drv_ue/1, +	 drv_ue/2, +	 encode_base64/1, +	 decode_base64/1, +	 get_value/2, +	 get_value/3, +	 parse_url/1, +	 printable_date/0 +	]). + +get_trace_status(Host, Port) -> +    ibrowse:get_config_value({trace, Host, Port}, false). + +drv_ue(Str) -> +    [{port, Port}| _] = ets:lookup(ibrowse_table, port), +    drv_ue(Str, Port). +drv_ue(Str, Port) -> +    case erlang:port_control(Port, 1, Str) of +	[] -> +	    Str; +	Res -> +	    Res +    end. + +%% @doc URL-encodes a string based on RFC 1738. Returns a flat list. +%% @spec url_encode(Str) -> UrlEncodedStr +%% Str = string() +%% UrlEncodedStr = string() +url_encode(Str) when list(Str) -> +    url_encode_char(lists:reverse(Str), []). + +url_encode_char([X | T], Acc) when X >= $0, X =< $9 -> +    url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X >= $a, X =< $z -> +    url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X >= $A, X =< $Z -> +    url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X == $-; X == $_; X == $. -> +    url_encode_char(T, [X | Acc]); +url_encode_char([32 | T], Acc) -> +    url_encode_char(T, [$+ | Acc]); +url_encode_char([X | T], Acc) -> +    url_encode_char(T, [$%, d2h(X bsr 4), d2h(X band 16#0f) | Acc]); +url_encode_char([], Acc) -> +    Acc. + +d2h(N) when N<10 -> N+$0; +d2h(N) -> N+$a-10. + +decode_rfc822_date(String) when list(String) -> +    case catch decode_rfc822_date_1(string:tokens(String, ", \t\r\n")) of +	{'EXIT', _} -> +	    {error, invalid_date}; +	Res -> +	    Res +    end. + +% TODO: Have to handle the Zone +decode_rfc822_date_1([_,DayInt,Month,Year, Time,Zone]) -> +    decode_rfc822_date_1([DayInt,Month,Year, Time,Zone]); +decode_rfc822_date_1([Day,Month,Year, Time,_Zone]) -> +    DayI = list_to_integer(Day), +    MonthI = month_int(Month), +    YearI = list_to_integer(Year), +    TimeTup = case string:tokens(Time, ":") of +		  [H,M] -> +		      {list_to_integer(H), +		       list_to_integer(M), +		       0}; +		  [H,M,S] -> +		      {list_to_integer(H), +		       list_to_integer(M), +		       list_to_integer(S)} +	      end, +    {{YearI,MonthI,DayI}, TimeTup}. + +month_int("Jan") -> 1; +month_int("Feb") -> 2; +month_int("Mar") -> 3; +month_int("Apr") -> 4; +month_int("May") -> 5; +month_int("Jun") -> 6; +month_int("Jul") -> 7; +month_int("Aug") -> 8; +month_int("Sep") -> 9; +month_int("Oct") -> 10; +month_int("Nov") -> 11; +month_int("Dec") -> 12. + +%% @doc Given a status code, returns an atom describing the status code.  +%% @spec status_code(StatusCode::status_code()) -> StatusDescription +%% status_code() = string() | integer() +%% StatusDescription = atom() +status_code(100) -> continue; +status_code(101) -> switching_protocols; +status_code(102) -> processing; +status_code(200) -> ok; +status_code(201) -> created; +status_code(202) -> accepted; +status_code(203) -> non_authoritative_information; +status_code(204) -> no_content; +status_code(205) -> reset_content; +status_code(206) -> partial_content; +status_code(207) -> multi_status; +status_code(300) -> multiple_choices; +status_code(301) -> moved_permanently; +status_code(302) -> found; +status_code(303) -> see_other; +status_code(304) -> not_modified; +status_code(305) -> use_proxy; +status_code(306) -> unused; +status_code(307) -> temporary_redirect; +status_code(400) -> bad_request; +status_code(401) -> unauthorized; +status_code(402) -> payment_required; +status_code(403) -> forbidden; +status_code(404) -> not_found; +status_code(405) -> method_not_allowed; +status_code(406) -> not_acceptable; +status_code(407) -> proxy_authentication_required; +status_code(408) -> request_timeout; +status_code(409) -> conflict; +status_code(410) -> gone; +status_code(411) -> length_required; +status_code(412) -> precondition_failed; +status_code(413) -> request_entity_too_large; +status_code(414) -> request_uri_too_long; +status_code(415) -> unsupported_media_type; +status_code(416) -> requested_range_not_satisfiable; +status_code(417) -> expectation_failed; +status_code(422) -> unprocessable_entity; +status_code(423) -> locked; +status_code(424) -> failed_dependency; +status_code(500) -> internal_server_error; +status_code(501) -> not_implemented; +status_code(502) -> bad_gateway; +status_code(503) -> service_unavailable; +status_code(504) -> gateway_timeout; +status_code(505) -> http_version_not_supported; +status_code(507) -> insufficient_storage; +status_code(X) when is_list(X) -> status_code(list_to_integer(X)); +status_code(_)   -> unknown_status_code. + +%% @doc dec2hex taken from gtk.erl in std dist +%% M = integer() -- number of hex digits required +%% N = integer() -- the number to represent as hex +%% @spec dec2hex(M::integer(), N::integer()) -> string() +dec2hex(M,N) -> dec2hex(M,N,[]). + +dec2hex(0,_N,Ack) -> Ack; +dec2hex(M,N,Ack) -> dec2hex(M-1,N bsr 4,[d2h(N band 15)|Ack]). + +%% @doc Implements the base64 encoding algorithm. The output data type matches in the input data type. +%% @spec encode_base64(In) -> Out +%% In = string() | binary() +%% Out = string() | binary() +encode_base64(List) when list(List) -> +    encode_base64_1(list_to_binary(List)); +encode_base64(Bin) when binary(Bin) -> +    List = encode_base64_1(Bin), +    list_to_binary(List). + +encode_base64_1(<<A:6, B:6, C:6, D:6, Rest/binary>>) -> +    [int_to_b64(A), int_to_b64(B), +     int_to_b64(C), int_to_b64(D) | encode_base64_1(Rest)]; +encode_base64_1(<<A:6, B:6, C:4>>) -> +    [int_to_b64(A), int_to_b64(B), int_to_b64(C bsl 2), $=]; +encode_base64_1(<<A:6, B:2>>) -> +    [int_to_b64(A), int_to_b64(B bsl 4), $=, $=]; +encode_base64_1(<<>>) -> +    []. + +%% @doc Implements the base64 decoding algorithm. The output data type matches in the input data type. +%% @spec decode_base64(In) -> Out | exit({error, invalid_input}) +%% In = string() | binary() +%% Out = string() | binary() +decode_base64(List) when list(List) -> +    decode_base64_1(List, []); +decode_base64(Bin) when binary(Bin) -> +    List = decode_base64_1(binary_to_list(Bin), []), +    list_to_binary(List). + +decode_base64_1([H | T], Acc) when ((H == $\t) or +				    (H == 32) or +				    (H == $\r) or +				    (H == $\n)) -> +    decode_base64_1(T, Acc); + +decode_base64_1([$=, $=], Acc) -> +    lists:reverse(Acc); +decode_base64_1([$=, _ | _], _Acc) -> +    exit({error, invalid_input}); + +decode_base64_1([A1, B1, $=, $=], Acc) -> +    A = b64_to_int(A1), +    B = b64_to_int(B1), +    Oct1 = (A bsl 2) bor (B bsr 4), +    decode_base64_1([], [Oct1 | Acc]); +decode_base64_1([A1, B1, C1, $=], Acc) -> +    A = b64_to_int(A1), +    B = b64_to_int(B1), +    C = b64_to_int(C1), +    Oct1 = (A bsl 2) bor (B bsr 4), +    Oct2 = ((B band 16#f) bsl 6) bor (C bsr 2), +    decode_base64_1([], [Oct2, Oct1 | Acc]); +decode_base64_1([A1, B1, C1, D1 | T], Acc) -> +    A = b64_to_int(A1), +    B = b64_to_int(B1), +    C = b64_to_int(C1), +    D = b64_to_int(D1), +    Oct1 = (A bsl 2) bor (B bsr 4), +    Oct2 = ((B band 16#f) bsl 4) bor (C bsr 2), +    Oct3 = ((C band 2#11) bsl 6) bor D, +    decode_base64_1(T, [Oct3, Oct2, Oct1 | Acc]); +decode_base64_1([], Acc) -> +    lists:reverse(Acc). + +%% Taken from httpd_util.erl +int_to_b64(X) when X >= 0, X =< 25 -> X + $A; +int_to_b64(X) when X >= 26, X =< 51 -> X - 26 + $a; +int_to_b64(X) when X >= 52, X =< 61 -> X - 52 + $0; +int_to_b64(62) -> $+; +int_to_b64(63) -> $/. + +%% Taken from httpd_util.erl +b64_to_int(X) when X >= $A, X =< $Z -> X - $A; +b64_to_int(X) when X >= $a, X =< $z -> X - $a + 26; +b64_to_int(X) when X >= $0, X =< $9 -> X - $0 + 52; +b64_to_int($+) -> 62; +b64_to_int($/) -> 63. + +get_value(Tag, TVL, DefVal) -> +    case lists:keysearch(Tag, 1, TVL) of +	false -> +	    DefVal; +	{value, {_, Val}} -> +	    Val +    end. + +get_value(Tag, TVL) -> +    {value, {_, V}} = lists:keysearch(Tag,1,TVL), +    V. + +parse_url(Url) -> +    parse_url(Url, get_protocol, #url{abspath=Url}, []). + +parse_url([$:, $/, $/ | _], get_protocol, Url, []) -> +    {invalid_uri_1, Url}; +parse_url([$:, $/, $/ | T], get_protocol, Url, TmpAcc) -> +    Prot = list_to_atom(lists:reverse(TmpAcc)), +    parse_url(T, get_username,  +	      Url#url{protocol = Prot}, +	      []); +parse_url([$/ | T], get_username, Url, TmpAcc) -> +    %% No username/password. No  port number +    Url#url{host = lists:reverse(TmpAcc), +	    port = default_port(Url#url.protocol), +	    path = [$/ | T]}; +parse_url([$: | T], get_username, Url, TmpAcc) -> +    %% It is possible that no username/password has been +    %% specified. But we'll continue with the assumption that there is +    %% a username/password. If we encounter a '@' later on, there is a +    %% username/password indeed. If we encounter a '/', it was +    %% actually the hostname +    parse_url(T, get_password,  +	      Url#url{username = lists:reverse(TmpAcc)}, +	      []); +parse_url([$@ | T], get_username, Url, TmpAcc) -> +    parse_url(T, get_host,  +	      Url#url{username = lists:reverse(TmpAcc), +		      password = ""}, +	      []); +parse_url([$@ | T], get_password, Url, TmpAcc) -> +    parse_url(T, get_host,  +	      Url#url{password = lists:reverse(TmpAcc)}, +	      []); +parse_url([$/ | T], get_password, Url, TmpAcc) -> +    %% Ok, what we thought was the username/password was the hostname +    %% and portnumber +    #url{username=User} = Url, +    Port = list_to_integer(lists:reverse(TmpAcc)), +    Url#url{host = User, +	    port = Port, +	    username = undefined, +	    password = undefined, +	    path = [$/ | T]}; +parse_url([$: | T], get_host, #url{} = Url, TmpAcc) -> +    parse_url(T, get_port,  +	      Url#url{host = lists:reverse(TmpAcc)}, +	      []); +parse_url([$/ | T], get_host, #url{protocol=Prot} = Url, TmpAcc) -> +    Url#url{host = lists:reverse(TmpAcc), +	    port = default_port(Prot), +	    path = [$/ | T]}; +parse_url([$/ | T], get_port, #url{protocol=Prot} = Url, TmpAcc) -> +    Port = case TmpAcc of +	       [] -> +		   default_port(Prot); +	       _ -> +		   list_to_integer(lists:reverse(TmpAcc)) +	   end, +    Url#url{port = Port, path = [$/ | T]}; +parse_url([H | T], State, Url, TmpAcc) -> +    parse_url(T, State, Url, [H | TmpAcc]); +parse_url([], get_host, Url, TmpAcc) when TmpAcc /= [] -> +    Url#url{host = lists:reverse(TmpAcc), +	    port = default_port(Url#url.protocol), +	    path = "/"}; +parse_url([], get_username, Url, TmpAcc) when TmpAcc /= [] -> +    Url#url{host = lists:reverse(TmpAcc), +	    port = default_port(Url#url.protocol), +	    path = "/"}; +parse_url([], get_port, #url{protocol=Prot} = Url, TmpAcc) -> +    Port = case TmpAcc of +	       [] -> +		   default_port(Prot); +	       _ -> +		   list_to_integer(lists:reverse(TmpAcc)) +	   end, +    Url#url{port = Port,  +	    path = "/"}; +parse_url([], get_password, Url, TmpAcc) -> +    %% Ok, what we thought was the username/password was the hostname +    %% and portnumber +    #url{username=User} = Url, +    Port = case TmpAcc of +	       [] -> +		   default_port(Url#url.protocol); +	       _ -> +		   list_to_integer(lists:reverse(TmpAcc)) +	   end, +    Url#url{host = User, +	    port = Port, +	    username = undefined, +	    password = undefined, +	    path = "/"}; +parse_url([], State, Url, TmpAcc) -> +    {invalid_uri_2, State, Url, TmpAcc}. + +default_port(http)  -> 80; +default_port(https) -> 443; +default_port(ftp)   -> 21. + +printable_date() -> +    {{Y,Mo,D},{H, M, S}} = calendar:local_time(), +    {_,_,MicroSecs} = now(), +    [integer_to_list(Y), +     $-, +     integer_to_list(Mo), +     $-, +     integer_to_list(D), +     $_, +     integer_to_list(H), +     $:, +     integer_to_list(M), +     $:, +     integer_to_list(S), +     $:, +     integer_to_list(MicroSecs div 1000)]. + +do_trace(Fmt, Args) -> +    do_trace(get(my_trace_flag), Fmt, Args). + +-ifdef(DEBUG). +do_trace(_, Fmt, Args) -> +    io:format("~s -- (~s) - "++Fmt, +	      [printable_date(),  +	       get(ibrowse_trace_token) | Args]). +-else. +do_trace(true, Fmt, Args) -> +    io:format("~s -- (~s) - "++Fmt, +	      [printable_date(),  +	       get(ibrowse_trace_token) | Args]); +do_trace(_, _, _) -> +    ok. +-endif. diff --git a/src/ibrowse/ibrowse_sup.erl b/src/ibrowse/ibrowse_sup.erl new file mode 100644 index 00000000..300435d4 --- /dev/null +++ b/src/ibrowse/ibrowse_sup.erl @@ -0,0 +1,65 @@ +%%%------------------------------------------------------------------- +%%% File    : ibrowse_sup.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description :  +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%%------------------------------------------------------------------- +-module(ibrowse_sup). +-vsn('$Id: ibrowse_sup.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(supervisor). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ +	 start_link/0 +        ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ +	 init/1 +        ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- +-define(SERVER, ?MODULE). + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the supervisor +%%-------------------------------------------------------------------- +start_link() -> +    supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +%%==================================================================== +%% Server functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok,  {SupFlags,  [ChildSpec]}} | +%%          ignore                          | +%%          {error, Reason}    +%%-------------------------------------------------------------------- +init([]) -> +    AChild = {ibrowse,{ibrowse,start_link,[]}, +	      permanent,2000,worker,[ibrowse, ibrowse_http_client]}, +    {ok,{{one_for_all,10,1}, [AChild]}}. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/ibrowse/ibrowse_test.erl b/src/ibrowse/ibrowse_test.erl new file mode 100644 index 00000000..b4429c9b --- /dev/null +++ b/src/ibrowse/ibrowse_test.erl @@ -0,0 +1,226 @@ +%%% File    : ibrowse_test.erl +%%% Author  : Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> +%%% Description : Test ibrowse +%%% Created : 14 Oct 2003 by Chandrashekhar Mullaparthi <chandrashekhar.mullaparthi@t-mobile.co.uk> + +-module(ibrowse_test). +-vsn('$Id: ibrowse_test.erl,v 1.3 2008/05/21 15:28:11 chandrusf Exp $ '). +-export([ +	 load_test/3, +	 send_reqs_1/3, +	 do_send_req/2, +	 unit_tests/0, +	 unit_tests/1, +	 drv_ue_test/0, +	 drv_ue_test/1, +	 ue_test/0, +	 ue_test/1 +	]). + +-import(ibrowse_lib, [printable_date/0]). + +%% Use ibrowse:set_max_sessions/3 and ibrowse:set_max_pipeline_size/3 to +%% tweak settings before running the load test. The defaults are 10 and 10. +load_test(Url, NumWorkers, NumReqsPerWorker) when is_list(Url), +                                                  is_integer(NumWorkers), +                                                  is_integer(NumReqsPerWorker), +                                                  NumWorkers > 0, +                                                  NumReqsPerWorker > 0 -> +    proc_lib:spawn(?MODULE, send_reqs_1, [Url, NumWorkers, NumReqsPerWorker]). + +send_reqs_1(Url, NumWorkers, NumReqsPerWorker) -> +    Start_time = now(), +    ets:new(pid_table, [named_table, public]), +    ets:new(ibrowse_test_results, [named_table, public]), +    ets:new(ibrowse_errors, [named_table, public, ordered_set]), +    init_results(), +    process_flag(trap_exit, true), +    log_msg("Starting spawning of workers...~n", []), +    spawn_workers(Url, NumWorkers, NumReqsPerWorker), +    log_msg("Finished spawning workers...~n", []), +    do_wait(), +    End_time = now(), +    log_msg("All workers are done...~n", []), +    log_msg("ibrowse_test_results table: ~n~p~n", [ets:tab2list(ibrowse_test_results)]), +    log_msg("Start time: ~1000.p~n", [calendar:now_to_local_time(Start_time)]), +    log_msg("End time  : ~1000.p~n", [calendar:now_to_local_time(End_time)]), +    Elapsed_time_secs = trunc(timer:now_diff(End_time, Start_time) / 1000000), +    log_msg("Elapsed   : ~p~n", [Elapsed_time_secs]), +    log_msg("Reqs/sec  : ~p~n", [(NumWorkers*NumReqsPerWorker) / Elapsed_time_secs]), +    dump_errors(). + +init_results() -> +    ets:insert(ibrowse_test_results, {crash, 0}), +    ets:insert(ibrowse_test_results, {send_failed, 0}), +    ets:insert(ibrowse_test_results, {other_error, 0}), +    ets:insert(ibrowse_test_results, {success, 0}), +    ets:insert(ibrowse_test_results, {retry_later, 0}), +    ets:insert(ibrowse_test_results, {trid_mismatch, 0}), +    ets:insert(ibrowse_test_results, {success_no_trid, 0}), +    ets:insert(ibrowse_test_results, {failed, 0}), +    ets:insert(ibrowse_test_results, {timeout, 0}), +    ets:insert(ibrowse_test_results, {req_id, 0}). + +spawn_workers(_Url, 0, _) -> +    ok; +spawn_workers(Url, NumWorkers, NumReqsPerWorker) -> +    Pid = proc_lib:spawn_link(?MODULE, do_send_req, [Url, NumReqsPerWorker]), +    ets:insert(pid_table, {Pid, []}), +    spawn_workers(Url, NumWorkers - 1, NumReqsPerWorker). + +do_wait() -> +    receive +	{'EXIT', _, normal} -> +	    do_wait(); +	{'EXIT', Pid, Reason} -> +	    ets:delete(pid_table, Pid), +	    ets:insert(ibrowse_errors, {Pid, Reason}), +	    ets:update_counter(ibrowse_test_results, crash, 1), +	    do_wait(); +	Msg -> +	    io:format("Recvd unknown message...~p~n", [Msg]), +	    do_wait() +    after 1000 -> +	    case ets:info(pid_table, size) of +		0 -> +		    done; +		_ -> +		    do_wait() +	    end +    end. +		      +do_send_req(Url, NumReqs) -> +    do_send_req_1(Url, NumReqs). + +do_send_req_1(_Url, 0) -> +    ets:delete(pid_table, self()); +do_send_req_1(Url, NumReqs) -> +    Counter = integer_to_list(ets:update_counter(ibrowse_test_results, req_id, 1)), +    case ibrowse:send_req(Url, [{"ib_req_id", Counter}], get, [], [], 10000) of +	{ok, _Status, Headers, _Body} -> +	    case lists:keysearch("ib_req_id", 1, Headers) of +		{value, {_, Counter}} -> +		    ets:update_counter(ibrowse_test_results, success, 1); +		{value, _} -> +		    ets:update_counter(ibrowse_test_results, trid_mismatch, 1); +		false -> +		    ets:update_counter(ibrowse_test_results, success_no_trid, 1) +	    end; +	{error, req_timedout} -> +	    ets:update_counter(ibrowse_test_results, timeout, 1); +	{error, send_failed} -> +	    ets:update_counter(ibrowse_test_results, send_failed, 1); +	{error, retry_later} -> +	    ets:update_counter(ibrowse_test_results, retry_later, 1); +	Err -> +	    ets:insert(ibrowse_errors, {now(), Err}), +	    ets:update_counter(ibrowse_test_results, other_error, 1), +	    ok +    end, +    do_send_req_1(Url, NumReqs-1). + +dump_errors() -> +    case ets:info(ibrowse_errors, size) of +	0 -> +	    ok; +	_ -> +	    {A, B, C} = now(), +	    Filename = lists:flatten( +			 io_lib:format("ibrowse_errors_~p_~p_~p.txt" , [A, B, C])), +	    case file:open(Filename, [write, delayed_write, raw]) of +		{ok, Iod} -> +		    dump_errors(ets:first(ibrowse_errors), Iod); +		Err -> +		    io:format("failed to create file ~s. Reason: ~p~n", [Filename, Err]), +		    ok +	    end +    end. + +dump_errors('$end_of_table', Iod) -> +    file:close(Iod); +dump_errors(Key, Iod) -> +    [{_, Term}] = ets:lookup(ibrowse_errors, Key), +    file:write(Iod, io_lib:format("~p~n", [Term])), +    dump_errors(ets:next(ibrowse_errors, Key), Iod). + +%%------------------------------------------------------------------------------ +%% Unit Tests +%%------------------------------------------------------------------------------ +-define(TEST_LIST, [{"http://intranet/messenger", get}, +		    {"http://www.google.co.uk", get}, +		    {"http://www.google.com", get}, +		    {"http://www.google.com", options},  +		    {"http://www.sun.com", get}, +		    {"http://www.oracle.com", get}, +		    {"http://www.bbc.co.uk", get}, +		    {"http://www.bbc.co.uk", trace}, +		    {"http://www.bbc.co.uk", options}, +		    {"http://yaws.hyber.org", get}, +		    {"http://jigsaw.w3.org/HTTP/ChunkedScript", get}, +		    {"http://jigsaw.w3.org/HTTP/TE/foo.txt", get}, +		    {"http://jigsaw.w3.org/HTTP/TE/bar.txt", get}, +		    {"http://jigsaw.w3.org/HTTP/connection.html", get}, +		    {"http://jigsaw.w3.org/HTTP/cc.html", get}, +		    {"http://jigsaw.w3.org/HTTP/cc-private.html", get}, +		    {"http://jigsaw.w3.org/HTTP/cc-proxy-revalidate.html", get}, +		    {"http://jigsaw.w3.org/HTTP/cc-nocache.html", get}, +		    {"http://jigsaw.w3.org/HTTP/h-content-md5.html", get}, +		    {"http://jigsaw.w3.org/HTTP/h-retry-after.html", get}, +		    {"http://jigsaw.w3.org/HTTP/h-retry-after-date.html", get}, +		    {"http://jigsaw.w3.org/HTTP/neg", get}, +		    {"http://jigsaw.w3.org/HTTP/negbad", get}, +		    {"http://jigsaw.w3.org/HTTP/400/toolong/", get}, +		    {"http://jigsaw.w3.org/HTTP/300/", get}, +		    {"http://jigsaw.w3.org/HTTP/Basic/", get, [{basic_auth, {"guest", "guest"}}]}, +		    {"http://jigsaw.w3.org/HTTP/CL/", get} +		   ]). + +unit_tests() -> +    unit_tests([]). + +unit_tests(Options) -> +    lists:foreach(fun({Url, Method}) -> +			  execute_req(Url, Method, Options); +		     ({Url, Method, X_Opts}) -> +			  execute_req(Url, Method, X_Opts ++ Options) +		  end, ?TEST_LIST). + +execute_req(Url, Method) -> +    execute_req(Url, Method, []). + +execute_req(Url, Method, Options) -> +    io:format("~s, ~p: ", [Url, Method]), +    Result = (catch ibrowse:send_req(Url, [], Method, [], Options)), +    case Result of  +	{ok, SCode, _H, _B} -> +	    io:format("Status code: ~p~n", [SCode]); +	Err -> +	    io:format("Err -> ~p~n", [Err]) +    end. + +drv_ue_test() -> +    drv_ue_test(lists:duplicate(1024, 127)). +drv_ue_test(Data) -> +    [{port, Port}| _] = ets:lookup(ibrowse_table, port), +%     erl_ddll:unload_driver("ibrowse_drv"), +%     timer:sleep(1000), +%     erl_ddll:load_driver("../priv", "ibrowse_drv"), +%     Port = open_port({spawn, "ibrowse_drv"}, []), +    {Time, Res} = timer:tc(ibrowse_lib, drv_ue, [Data, Port]), +    io:format("Time -> ~p~n", [Time]), +    io:format("Data Length -> ~p~n", [length(Data)]), +    io:format("Res Length -> ~p~n", [length(Res)]). +%    io:format("Result -> ~s~n", [Res]). + +ue_test() -> +    ue_test(lists:duplicate(1024, $?)). +ue_test(Data) -> +    {Time, Res} = timer:tc(ibrowse_lib, url_encode, [Data]), +    io:format("Time -> ~p~n", [Time]), +    io:format("Data Length -> ~p~n", [length(Data)]), +    io:format("Res Length -> ~p~n", [length(Res)]). +%    io:format("Result -> ~s~n", [Res]). + +log_msg(Fmt, Args) -> +    io:format("~s -- " ++ Fmt, +	      [ibrowse_lib:printable_date() | Args]). diff --git a/utils/Makefile.am b/utils/Makefile.am index 8536b481..608942ef 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -24,6 +24,7 @@ run: ../bin/couchdb.tpl  	    -e "s|%localerlanglibdir%|$(abs_top_srcdir)/src|g" \  	    -e "s|%mochiwebebindir%|mochiweb|g" \  	    -e "s|%couchdbebindir%|couchdb|g" \ +	    -e "s|%ibrowseebindir%|ibrowse|g" \  	    -e "s|%defaultini%|default_dev.ini|g" \  	    -e "s|%localini%|local_dev.ini|g" \  	    -e "s|%localerlanglibdir%|$(abs_top_srcdir)/src|g" \ | 
