summaryrefslogtreecommitdiffstats
path: root/system/xen/xsa/xsa206-4.8-0011-oxenstored-blame-the-connection-that-caused-a-transa.patch
blob: b0a8e011ee7da06d73f2306d7ba2d84ef34a7a79 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
From 0972f3e46001e9b3192786033663ef4ee423f8be Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@citrix.com>
Date: Thu, 23 Mar 2017 14:28:16 +0000
Subject: [PATCH 11/15] oxenstored: blame the connection that caused a
 transaction conflict

Blame each connection found to have made a commit that would cause this
transaction to fail. Each blamed connection is penalised by having its
conflict-credit decremented.

Note the change in semantics for the replay function: we no longer stop after
finding the first operation that can't be replayed. This allows us to identify
all operations that conflicted with this transaction, not just the one that
conflicted first.

Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com>
Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com>
v1 Reviewed-by: Christian Lindig <christian.lindig@citrix.com>

Changes since v1:
 * use correct log levels for informational messages
Changes since v2:
 * fix the blame algorithm and improve logging
   (fix was reviewed by Jonathan Davies)

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com>
---
 tools/ocaml/xenstored/history.ml | 12 ++++++++++
 tools/ocaml/xenstored/process.ml | 50 ++++++++++++++++++++++++++++++++--------
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml
index 6f7a282..e941e2b 100644
--- a/tools/ocaml/xenstored/history.ml
+++ b/tools/ocaml/xenstored/history.ml
@@ -58,3 +58,15 @@ let push (x: history_record) =
 	match dom with
 	| None -> () (* treat socket connections as always free to conflict *)
 	| Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history
+
+(* Find the connections from records since commit-count [since] for which [f record] returns [true] *)
+let filter_connections ~since ~f =
+	(* The "mem" call is an optimisation, to avoid calling f if we have picked con already. *)
+	(* Using a hash table rather than a list is to optimise the "mem" call. *)
+	List.fold_left (fun acc hist_rec ->
+		if hist_rec.finish_count > since
+		&& not (Hashtbl.mem acc hist_rec.con)
+		&& f hist_rec
+		then Hashtbl.replace acc hist_rec.con ();
+		acc
+	) (Hashtbl.create 1023) !history
diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
index 1ed1a8f..5e5a1ab 100644
--- a/tools/ocaml/xenstored/process.ml
+++ b/tools/ocaml/xenstored/process.ml
@@ -16,6 +16,7 @@
 
 let error fmt = Logging.error "process" fmt
 let info fmt = Logging.info "process" fmt
+let debug fmt = Logging.debug "process" fmt
 
 open Printf
 open Stdext
@@ -25,6 +26,7 @@ exception Transaction_nested
 exception Domain_not_match
 exception Invalid_Cmd_Args
 
+(* This controls the do_debug fn in this module, not the debug logging-function. *)
 let allow_debug = ref false
 
 let c_int_of_string s =
@@ -308,23 +310,51 @@ let transaction_replay c t doms cons =
 		false
 	| Transaction.Full(id, oldstore, cstore) ->
 		let tid = Connection.start_transaction c cstore in
-		let new_t = Transaction.make ~internal:true tid cstore in
+		let replay_t = Transaction.make ~internal:true tid cstore in
 		let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in
-		let perform_exn (request, response) =
-			write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data;
+
+		let perform_exn ~wlog txn (request, response) =
+			if wlog then write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data;
 			let fct = function_of_type_simple_op request.Packet.ty in
-			let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in
-			write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response';
-			if not(Packet.response_equal response response') then raise Transaction_again in
+			let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:txn ~req:request in
+			if wlog then write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response';
+			if not(Packet.response_equal response response') then raise Transaction_again
+		in
 		finally
 		(fun () ->
 			try
 				Logging.start_transaction ~con ~tid;
-				List.iter perform_exn (Transaction.get_operations t);
-				Logging.end_transaction ~con ~tid;
+				List.iter (perform_exn ~wlog:true replay_t) (Transaction.get_operations t); (* May throw EAGAIN *)
 
-				Transaction.commit ~con new_t
-			with e ->
+				Logging.end_transaction ~con ~tid;
+				Transaction.commit ~con replay_t
+			with
+			| Transaction_again -> (
+				let victim_domstr = Connection.get_domstr c in
+				debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr;
+				let punish guilty_con =
+					debug "Blaming domain %s for conflict with domain %s txn %d"
+						(Connection.get_domstr guilty_con) victim_domstr id;
+					Connection.decr_conflict_credit doms guilty_con
+				in
+				let judge_and_sentence hist_rec = (
+					let can_apply_on store = (
+						let store = Store.copy store in
+						let trial_t = Transaction.make ~internal:true Transaction.none store in
+						try List.iter (perform_exn ~wlog:false trial_t) (Transaction.get_operations t);
+							true
+						with Transaction_again -> false
+					) in
+					if can_apply_on hist_rec.History.before
+					&& not (can_apply_on hist_rec.History.after)
+					then (punish hist_rec.History.con; true)
+					else false
+				) in
+				let guilty_cons = History.filter_connections ~since:t.Transaction.start_count ~f:judge_and_sentence in
+				if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con;
+				false
+			)
+			| e ->
 				info "transaction_replay %d caught: %s" tid (Printexc.to_string e);
 				false
 			)
-- 
2.1.4