0103-tools-ocaml-GC-parameter-tuning.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

From 9f89883fabd53cb7873cc31778887ba2a1228dd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edvin.torok@citrix.com>
Date: Wed, 12 Oct 2022 19:13:07 +0100
Subject: [PATCH 103/126] tools/ocaml: GC parameter tuning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By default the OCaml garbage collector would return memory to the OS only
after unused memory is 5x live memory.  Tweak this to 120% instead, which
would match the major GC speed.

This is part of XSA-326.

Signed-off-by: Edwin Török <edvin.torok@citrix.com>
Acked-by: Christian Lindig <christian.lindig@citrix.com>
(cherry picked from commit 4a8bacff20b857ca0d628ef5525877ade11f2a42)
---
 tools/ocaml/xenstored/define.ml    |  1 +
 tools/ocaml/xenstored/xenstored.ml | 64 ++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml
index 6b06f808595b..ba63a8147e09 100644
--- a/tools/ocaml/xenstored/define.ml
+++ b/tools/ocaml/xenstored/define.ml
@@ -25,6 +25,7 @@ let maxwatch = ref (100)
 let maxtransaction = ref (10)
 let maxrequests = ref (1024)   (* maximum requests per transaction *)
 
+let gc_max_overhead = ref 120 (* 120% see comment in xenstored.ml *)
 let conflict_burst_limit = ref 5.0
 let conflict_max_history_seconds = ref 0.05
 let conflict_rate_limit_is_aggregate = ref true
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index d44ae673c42a..3b57ad016dfb 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -104,6 +104,7 @@ let parse_config filename =
 		("quota-maxsize", Config.Set_int Quota.maxsize);
 		("quota-maxrequests", Config.Set_int Define.maxrequests);
 		("quota-path-max", Config.Set_int Define.path_max);
+		("gc-max-overhead", Config.Set_int Define.gc_max_overhead);
 		("test-eagain", Config.Set_bool Transaction.test_eagain);
 		("persistent", Config.Set_bool Disk.enable);
 		("xenstored-log-file", Config.String Logging.set_xenstored_log_destination);
@@ -265,6 +266,67 @@ let to_file store cons fds file =
 	        (fun () -> close_out channel)
 end
 
+(*
+	By default OCaml's GC only returns memory to the OS when it exceeds a
+	configurable 'max overhead' setting.
+	The default is 500%, that is 5/6th of the OCaml heap needs to be free
+	and only 1/6th live for a compaction to be triggerred that would
+	release memory back to the OS.
+	If the limit is not hit then the OCaml process can reuse that memory
+	for its own purposes, but other processes won't be able to use it.
+
+	There is also a 'space overhead' setting that controls how much work
+	each major GC slice does, and by default aims at having no more than
+	80% or 120% (depending on version) garbage values compared to live
+	values.
+	This doesn't have as much relevance to memory returned to the OS as
+	long as space_overhead <= max_overhead, because compaction is only
+	triggerred at the end of major GC cycles.
+
+	The defaults are too large once the program starts using ~100MiB of
+	memory, at which point ~500MiB would be unavailable to other processes
+	(which would be fine if this was the main process in this VM, but it is
+	not).
+
+	Max overhead can also be set to 0, however this is for testing purposes
+	only (setting it lower than 'space overhead' wouldn't help because the
+	major GC wouldn't run fast enough, and compaction does have a
+	performance cost: we can only compact contiguous regions, so memory has
+	to be moved around).
+
+	Max overhead controls how often the heap is compacted, which is useful
+	if there are burst of activity followed by long periods of idle state,
+	or if a domain quits, etc. Compaction returns memory to the OS.
+
+	wasted = live * space_overhead / 100
+
+	For globally overriding the GC settings one can use OCAMLRUNPARAM,
+	however we provide a config file override to be consistent with other
+	oxenstored settings.
+
+	One might want to dynamically adjust the overhead setting based on used
+	memory, i.e. to use a fixed upper bound in bytes, not percentage. However
+	measurements show that such adjustments increase GC overhead massively,
+	while still not guaranteeing that memory is returned any more quickly
+	than with a percentage based setting.
+
+	The allocation policy could also be tweaked, e.g. first fit would reduce
+	fragmentation and thus memory usage, but the documentation warns that it
+	can be sensibly slower, and indeed one of our own testcases can trigger
+	such a corner case where it is multiple times slower, so it is best to keep
+	the default allocation policy (next-fit/best-fit depending on version).
+
+	There are other tweaks that can be attempted in the future, e.g. setting
+	'ulimit -v' to 75% of RAM, however getting the kernel to actually return
+	NULL from allocations is difficult even with that setting, and without a
+	NULL the emergency GC won't be triggerred.
+	Perhaps cgroup limits could help, but for now tweak the safest only.
+*)
+
+let tweak_gc () =
+	Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
+
+
 let _ =
 	let cf = do_argv in
 	let pidfile =
@@ -274,6 +336,8 @@ let _ =
 			default_pidfile
 		in
 
+	tweak_gc ();
+
 	(try
 		Unixext.mkdir_rec (Filename.dirname pidfile) 0o755
 	with _ ->
-- 
2.37.4