From 1f12baed15dc7502365afb54161827405ff24732 Mon Sep 17 00:00:00 2001 From: Thomas Sanders Date: Tue, 14 Mar 2017 12:15:52 +0000 Subject: [PATCH 04/15] oxenstored: handling of domain conflict-credit This commit gives each domain a conflict-credit variable, which will later be used for limiting how often a domain can cause other domain's transaction-commits to fail. This commit also provides functions and data for manipulating domains and their conflict-credit, and checking whether they have credit. Reported-by: Juergen Gross Signed-off-by: Thomas Sanders Reviewed-by: Jonathan Davies Reviewed-by: Christian Lindig --- tools/ocaml/xenstored/connection.ml | 5 ++ tools/ocaml/xenstored/define.ml | 3 + tools/ocaml/xenstored/domain.ml | 11 +++- tools/ocaml/xenstored/domains.ml | 103 ++++++++++++++++++++++++++++++- tools/ocaml/xenstored/oxenstored.conf.in | 32 ++++++++++ tools/ocaml/xenstored/transaction.ml | 2 + tools/ocaml/xenstored/xenstored.ml | 2 + 7 files changed, 154 insertions(+), 4 deletions(-) diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml index 3ffd35b..a66d2f7 100644 --- a/tools/ocaml/xenstored/connection.ml +++ b/tools/ocaml/xenstored/connection.ml @@ -296,3 +296,8 @@ let debug con = let domid = get_domstr con in let watches = List.map (fun (path, token) -> Printf.sprintf "watch %s: %s %s\n" domid path token) (list_watches con) in String.concat "" watches + +let decr_conflict_credit doms con = + match con.dom with + | None -> () (* It's a socket connection. We don't know which domain we're in, so treat it as if it's free to conflict *) + | Some dom -> Domains.decr_conflict_credit doms dom diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml index e9d957f..816b493 100644 --- a/tools/ocaml/xenstored/define.ml +++ b/tools/ocaml/xenstored/define.ml @@ -29,6 +29,9 @@ let maxwatch = ref (50) let maxtransaction = ref (20) let maxrequests = ref (-1) (* maximum requests per transaction *) +let conflict_burst_limit = ref 5.0 +let conflict_rate_limit_is_aggregate = ref true + let domid_self = 0x7FF0 exception Not_a_directory of string diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml index ab34314..e677aa3 100644 --- a/tools/ocaml/xenstored/domain.ml +++ b/tools/ocaml/xenstored/domain.ml @@ -31,8 +31,12 @@ type t = mutable io_credit: int; (* the rounds of ring process left to do, default is 0, usually set to 1 when there is work detected, could also set to n to give "lazy" clients extra credit *) + mutable conflict_credit: float; (* Must be positive to perform writes; a commit + that later causes conflict with another + domain's transaction costs credit. *) } +let is_dom0 d = d.id = 0 let get_path dom = "/local/domain/" ^ (sprintf "%u" dom.id) let get_id domain = domain.id let get_interface d = d.interface @@ -48,6 +52,10 @@ let set_io_credit ?(n=1) domain = domain.io_credit <- max 0 n let incr_io_credit domain = domain.io_credit <- domain.io_credit + 1 let decr_io_credit domain = domain.io_credit <- max 0 (domain.io_credit - 1) +let is_paused_for_conflict dom = dom.conflict_credit <= 0.0 + +let is_free_to_conflict = is_dom0 + let string_of_port = function | None -> "None" | Some x -> string_of_int (Xeneventchn.to_int x) @@ -84,6 +92,5 @@ let make id mfn remote_port interface eventchn = { port = None; bad_client = false; io_credit = 0; + conflict_credit = !Define.conflict_burst_limit; } - -let is_dom0 d = d.id = 0 diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml index 395f3a9..3d29cc8 100644 --- a/tools/ocaml/xenstored/domains.ml +++ b/tools/ocaml/xenstored/domains.ml @@ -15,20 +15,58 @@ *) let debug fmt = Logging.debug "domains" fmt +let error fmt = Logging.error "domains" fmt +let warn fmt = Logging.warn "domains" fmt type domains = { eventchn: Event.t; table: (Xenctrl.domid, Domain.t) Hashtbl.t; + + (* N.B. the Queue module is not thread-safe but oxenstored is single-threaded. *) + (* Domains queue up to regain conflict-credit; we have a queue for + domains that are carrying some penalty and so are below the + maximum credit, and another queue for domains that have run out of + credit and so have had their access paused. *) + doms_conflict_paused: (Domain.t option ref) Queue.t; + doms_with_conflict_penalty: (Domain.t option ref) Queue.t; + + (* A callback function to be called when we go from zero to one paused domain. + This will be to reset the countdown until the next unit of credit is issued. *) + on_first_conflict_pause: unit -> unit; + + (* If config is set to use individual instead of aggregate conflict-rate-limiting, + we use this instead of the queues. *) + mutable n_paused: int; } -let init eventchn = - { eventchn = eventchn; table = Hashtbl.create 10 } +let init eventchn = { + eventchn = eventchn; + table = Hashtbl.create 10; + doms_conflict_paused = Queue.create (); + doms_with_conflict_penalty = Queue.create (); + on_first_conflict_pause = (fun () -> ()); (* Dummy value for now, pending subsequent commit. *) + n_paused = 0; +} let del doms id = Hashtbl.remove doms.table id let exist doms id = Hashtbl.mem doms.table id let find doms id = Hashtbl.find doms.table id let number doms = Hashtbl.length doms.table let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table +(* Functions to handle queues of domains given that the domain might be deleted while in a queue. *) +let push dom queue = + Queue.push (ref (Some dom)) queue + +let rec pop queue = + match !(Queue.pop queue) with + | None -> pop queue + | Some x -> x + +let remove_from_queue dom queue = + Queue.iter (fun d -> match !d with + | None -> () + | Some x -> if x=dom then d := None) queue + let cleanup xc doms = let notify = ref false in let dead_dom = ref [] in @@ -52,6 +90,11 @@ let cleanup xc doms = let dom = Hashtbl.find doms.table id in Domain.close dom; Hashtbl.remove doms.table id; + if dom.Domain.conflict_credit <= !Define.conflict_burst_limit + then ( + remove_from_queue dom doms.doms_with_conflict_penalty; + if (dom.Domain.conflict_credit <= 0.) then remove_from_queue dom doms.doms_conflict_paused + ) ) !dead_dom; !notify, !dead_dom @@ -82,3 +125,59 @@ let create0 doms = Domain.bind_interdomain dom; Domain.notify dom; dom + +let decr_conflict_credit doms dom = + let before = dom.Domain.conflict_credit in + let after = max (-1.0) (before -. 1.0) in + dom.Domain.conflict_credit <- after; + if !Define.conflict_rate_limit_is_aggregate then ( + if before >= !Define.conflict_burst_limit + && after < !Define.conflict_burst_limit + && after > 0.0 + then ( + push dom doms.doms_with_conflict_penalty + ) else if before > 0.0 && after <= 0.0 + then ( + let first_pause = Queue.is_empty doms.doms_conflict_paused in + push dom doms.doms_conflict_paused; + if first_pause then doms.on_first_conflict_pause () + ) else ( + (* The queues are correct already: no further action needed. *) + ) + ) else if before > 0.0 && after <= 0.0 then ( + doms.n_paused <- doms.n_paused + 1; + if doms.n_paused = 1 then doms.on_first_conflict_pause () + ) + +(* Give one point of credit to one domain, and update the queues appropriately. *) +let incr_conflict_credit_from_queue doms = + let process_queue q requeue_test = + let d = pop q in + d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit; + if requeue_test d.Domain.conflict_credit then ( + push d q (* Make it queue up again for its next point of credit. *) + ) + in + let paused_queue_test cred = cred <= 0.0 in + let penalty_queue_test cred = cred < !Define.conflict_burst_limit in + try process_queue doms.doms_conflict_paused paused_queue_test + with Queue.Empty -> ( + try process_queue doms.doms_with_conflict_penalty penalty_queue_test + with Queue.Empty -> () (* Both queues are empty: nothing to do here. *) + ) + +let incr_conflict_credit doms = + if !Define.conflict_rate_limit_is_aggregate + then incr_conflict_credit_from_queue doms + else ( + (* Give a point of credit to every domain, subject only to the cap. *) + let inc dom = + let before = dom.Domain.conflict_credit in + let after = min (before +. 1.0) !Define.conflict_burst_limit in + dom.Domain.conflict_credit <- after; + if before <= 0.0 && after > 0.0 + then doms.n_paused <- doms.n_paused - 1 + in + (* Scope for optimisation (probably tiny): avoid iteration if all domains are at max credit *) + iter doms inc + ) diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in index 82117a9..edd4335 100644 --- a/tools/ocaml/xenstored/oxenstored.conf.in +++ b/tools/ocaml/xenstored/oxenstored.conf.in @@ -9,6 +9,38 @@ test-eagain = false # Activate transaction merge support merge-activate = true +# Limits applied to domains whose writes cause other domains' transaction +# commits to fail. Must include decimal point. + +# The burst limit is the number of conflicts a domain can cause to +# fail in a short period; this value is used for both the initial and +# the maximum value of each domain's conflict-credit, which falls by +# one point for each conflict caused, and when it reaches zero the +# domain's requests are ignored. +conflict-burst-limit = 5.0 + +# The conflict-credit is replenished over time: +# one point is issued after each conflict-max-history-seconds, so this +# is the minimum pause-time during which a domain will be ignored. +# conflict-max-history-seconds = 0.05 + +# If the conflict-rate-limit-is-aggregate flag is true then after each +# tick one point of conflict-credit is given to just one domain: the +# one at the front of the queue. If false, then after each tick each +# domain gets a point of conflict-credit. +# +# In environments where it is known that every transaction will +# involve a set of nodes that is writable by at most one other domain, +# then it is safe to set this aggregate-limit flag to false for better +# performance. (This can be determined by considering the layout of +# the xenstore tree and permissions, together with the content of the +# transactions that require protection.) +# +# A transaction which involves a set of nodes which can be modified by +# multiple other domains can suffer conflicts caused by any of those +# domains, so the flag must be set to true. +conflict-rate-limit-is-aggregate = true + # Activate node permission system perms-activate = true diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml index 51d5d6a..6f758ff 100644 --- a/tools/ocaml/xenstored/transaction.ml +++ b/tools/ocaml/xenstored/transaction.ml @@ -14,6 +14,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) +let error fmt = Logging.error "transaction" fmt + open Stdext let none = 0 diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml index 2efcce6..20473d5 100644 --- a/tools/ocaml/xenstored/xenstored.ml +++ b/tools/ocaml/xenstored/xenstored.ml @@ -89,6 +89,8 @@ let parse_config filename = let pidfile = ref default_pidfile in let options = [ ("merge-activate", Config.Set_bool Transaction.do_coalesce); + ("conflict-burst-limit", Config.Set_float Define.conflict_burst_limit); + ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); ("perms-activate", Config.Set_bool Perms.activate); ("quota-activate", Config.Set_bool Quota.activate); ("quota-maxwatch", Config.Set_int Define.maxwatch); -- 2.1.4