summaryrefslogtreecommitdiff
blob: 02edc3709e2a3482bc1b1060b650751f98f11571 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
diff -Naurp gcc-3.4.6.orig/gcc/config/mips/mips.c gcc-3.4.6/gcc/config/mips/mips.c
--- gcc-3.4.6.orig/gcc/config/mips/mips.c	2005-07-31 04:35:15.000000000 -0400
+++ gcc-3.4.6/gcc/config/mips/mips.c	2006-04-08 17:41:44.000000000 -0400
@@ -8801,6 +8801,11 @@ mips_reorg (void)
 	dbr_schedule (get_insns (), rtl_dump_file);
       mips_avoid_hazards ();
     }
+  if (mips_r10k_cache_barrier)
+    {
+      static int r10k_insert_cache_barriers (void);
+      r10k_insert_cache_barriers ();
+    }
 }
 
 /* We need to use a special set of functions to handle hard floating
@@ -9661,5 +9666,5 @@ irix_section_type_flags (tree decl, cons
 }
 
 #endif /* TARGET_IRIX */
-
+#include "r10k-cacheb.c"
 #include "gt-mips.h"
diff -Naurp gcc-3.4.6.orig/gcc/config/mips/mips.h gcc-3.4.6/gcc/config/mips/mips.h
--- gcc-3.4.6.orig/gcc/config/mips/mips.h	2004-07-14 20:42:49.000000000 -0400
+++ gcc-3.4.6/gcc/config/mips/mips.h	2006-04-08 17:41:01.000000000 -0400
@@ -122,6 +122,7 @@ extern const char *mips_tune_string;    
 extern const char *mips_isa_string;	/* for -mips{1,2,3,4} */
 extern const char *mips_abi_string;	/* for -mabi={32,n32,64} */
 extern const char *mips_cache_flush_func;/* for -mflush-func= and -mno-flush-func */
+extern const char *mips_r10k_cache_barrier;/* for -mr10k-cache-barrier[={1,2}] */
 extern int mips_string_length;		/* length of strings for mips16 */
 extern const struct mips_cpu_info mips_cpu_info_table[];
 extern const struct mips_cpu_info *mips_arch_info;
@@ -752,6 +753,10 @@ extern const struct mips_cpu_info *mips_
       N_("Don't call any cache flush functions"), 0},			\
   { "flush-func=", &mips_cache_flush_func,				\
       N_("Specify cache flush function"), 0},				\
+  { "r10k-cache-barrier", &mips_r10k_cache_barrier,			\
+      N_("[=1|2]\tGenerate cache barriers for SGI Indigo2/O2 R10k"), 0},	\
+  { "ip28-cache-barrier", &mips_r10k_cache_barrier,			\
+      N_(""), 0},	\
 }
 
 /* This is meant to be redefined in the host dependent files.  */
diff -Naurp gcc-3.4.6.orig/gcc/config/mips/r10k-cacheb.c gcc-3.4.6/gcc/config/mips/r10k-cacheb.c
--- gcc-3.4.6.orig/gcc/config/mips/r10k-cacheb.c	1969-12-31 19:00:00.000000000 -0500
+++ gcc-3.4.6/gcc/config/mips/r10k-cacheb.c	2006-04-08 17:41:22.000000000 -0400
@@ -0,0 +1,318 @@
+/* Subroutines used for MIPS code generation: generate cache-barriers
+   for SiliconGraphics IP28 and IP32/R10000 kernel-code.
+   Copyright (C) 2005,2006 peter fuerst, pf@net.alphadv.de.
+
+This file is intended to become part of GCC.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published
+by the Free Software Foundation; either version 2, or (at your
+option) any later version.
+
+This file is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the
+Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+MA  02110-1301  USA.  */
+
+
+#define ASM_R10K_CACHE_BARRIER	"cache 0x14,0($sp)"
+
+/* Some macros, ported back from 4.x ... */
+
+#define CALL_P(X)  (GET_CODE (X) == CALL_INSN)
+#define MEM_P(X)   (GET_CODE (X) == MEM)
+#define NONJUMP_INSN_P(X)  (GET_CODE (X) == INSN)
+
+#define SEQ_BEGIN(insn)                        \
+  (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE  \
+   ? XVECEXP (PATTERN (insn), 0, 0)                        \
+   : (insn))
+
+#define SEQ_END(insn)                          \
+  (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE      \
+   ? XVECEXP (PATTERN (insn), 0, XVECLEN (PATTERN (insn), 0) - 1)  \
+   : (insn))
+
+#define FOR_EACH_SUBINSN(subinsn, insn)        \
+  for ((subinsn) = SEQ_BEGIN (insn);       \
+       (subinsn) != NEXT_INSN (SEQ_END (insn));    \
+       (subinsn) = NEXT_INSN (subinsn))
+
+
+/* Nonzero means generate special cache barriers to inhibit speculative
+   stores which might endanger cache coherency or reference invalid
+   addresses (especially on SGI's Indigo2 R10k (IP28)).  */
+const char *mips_r10k_cache_barrier;
+static int TARGET_R10K_SPECEX;
+
+/* Check, whether an instruction is a possibly harmful store instruction,
+   i.e. a store which might cause damage, if speculatively executed. */
+
+/* Return truth value whether the expression `*memx' instantiates
+   (mem:M (not (stackpointer_address or constant))). */
+
+static int
+is_stack_pointer (rtx *x, void *data)
+{
+  return (*x == stack_pointer_rtx);
+}
+
+static int
+check_p_mem_expr (rtx *memx, void *data)
+{
+  if (!MEM_P (*memx) || for_each_rtx (memx, is_stack_pointer, 0))
+    return 0;
+
+  /* Stores/Loads to/from constant addresses can be considered
+     harmless, since:
+     1)  the address is always valid, even when taken speculatively.
+     2a) the location is (hopefully) never used as a dma-target, thus
+         there is no danger of cache-inconsistency.
+     2b) uncached loads/stores are guaranteed to be non-speculative. */
+  if ( CONSTANT_P(XEXP (*memx, 0)) )
+    return 0;
+
+  return 1;
+}
+
+/* Return truth value whether we find (set (mem:M (non_stackpointer_address)
+   ...)) in instruction-pattern `body'.
+   Here we assume, that addressing with the stackpointer accesses neither
+   uncached-aliased nor invalid memory.
+   (May be, this applies to the global pointer and frame pointer also,
+   but its saver not to assume it. And probably it's not worthwile to
+   regard these registers)
+
+   Speculative loads from invalid addresses also cause bus errors...
+   So check for (set (reg:M ...) (mem:M (non_stackpointer_address)))
+   too, unless there is an enhanced bus-error handler. */
+
+static int
+check_p_pattern_for_store (rtx *body, void *data)
+{
+  if (*body && GET_CODE (*body) == SET)
+    {
+      /* Cache-barriers for SET_SRC may be requested as well. */
+      if (!(TARGET_R10K_SPECEX & 2))
+        body = &SET_DEST(*body);
+
+      if (for_each_rtx (body, check_p_mem_expr, 0))
+        return 1;
+
+      /* Don't traverse sub-expressions again. */
+      return -1;
+    }
+  return 0;
+}
+
+static int
+strmatch (const char *txt, const char *match)
+{
+  return !strncmp(txt, match, strlen (match));
+}
+
+/* Check for (ins (set (mem:M (dangerous_address)) ...)) or end of the
+   current basic block in instruction `insn'.
+   `state': (internal) recursion-counter and delayslot-flag
+   Criteria to recognize end-of/next basic-block are reduplicated here
+   from final_scan_insn.
+   return >0: `insn' is critical.
+   return <0: `insn' is at end of current basic-block.
+   return 0:  `insn' can be ignored. */
+
+static int
+check_insn_for_store (int state, rtx insn)
+{
+  rtx body;
+
+  if (INSN_DELETED_P (insn))
+    return 0;
+
+  if (LABEL_P (insn))
+    return -1;
+
+  if (CALL_P (insn) || JUMP_P (insn) || NONJUMP_INSN_P (insn))
+    {
+      body = PATTERN (insn);
+      if (GET_CODE (body) == SEQUENCE)
+        {
+          /* A delayed-branch sequence. */
+          rtx insq;
+          FOR_EACH_SUBINSN(insq, insn)
+            if (! INSN_DELETED_P (insq))
+              {
+                /* |1: delay-slot completely contained in sequence. */
+                if (check_insn_for_store (8+state|1, insq) > 0)
+                  return 1;
+              }
+          /* Following a (conditional) branch sequence, we have a new
+             basic block.  */
+          if (JUMP_P (SEQ_BEGIN(insn)))
+            return -1;
+          /* Handle a call sequence like a conditional branch sequence. */
+          if (CALL_P (SEQ_BEGIN(insn)))
+            return -1;
+        }
+      if (GET_CODE (body) == PARALLEL)
+        if (for_each_rtx (&body, check_p_pattern_for_store, 0))
+           return 1;
+
+      /* Now, only a `simple' INSN or JUMP_INSN remains to be checked. */
+      if (NONJUMP_INSN_P (insn))
+        {
+          /* Since we don't know what's inside, we must take inline
+             assembly to be dangerous. */
+          if (GET_CODE (body) == ASM_INPUT)
+            {
+              const char *t = XSTR (body, 0);
+              if (t && !strmatch(t, ASM_R10K_CACHE_BARRIER))
+                return 1;
+            }
+
+          if (check_p_pattern_for_store (&body, 0) > 0)
+            return 1;
+        }
+      /* Handle a CALL_INSN instruction like a conditional branch. */
+      if (JUMP_P (insn) || CALL_P (insn))
+        {
+          /* Following a (conditional) branch, we have a new basic block. */
+          /* But check insn(s) in delay-slot first.  If we could know in
+             advance that this jump is in `.reorder' mode, where gas will
+             insert a `nop' into the delay-slot, we could skip this test.
+             Since we don't know, always assume `.noreorder', sometimes
+             emitting a cache-barrier, that isn't needed.  */
+          /* But if we are here recursively, already checking a (pseudo-)
+             delay-slot, we are done.  */
+          if ( !(state & 1) )
+            for (insn = NEXT_INSN (insn); insn; insn = NEXT_INSN (insn))
+              {
+                if (LABEL_P (insn) || CALL_P (insn) || JUMP_P (insn))
+                  /* Not in delay-slot at all. */
+                  break;
+
+                if (NONJUMP_INSN_P (insn))
+                  {
+                    if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+                      /* Not in delay-slot at all. */
+                      break;
+
+                    if (check_insn_for_store (8+state|1, insn) > 0)
+                      return 1;
+                    /* We're done anyway. */
+                    break;
+                  }
+                /* skip NOTE,... */;
+              }
+          return -1;
+        }
+    }
+  return 0;
+}
+
+
+/* Scan a basic block, starting with `insn', for a possibly harmful store
+   instruction.  If found, output a cache barrier at the start of this
+   block.  */
+
+static int
+bb_insert_store_cache_barrier (rtx head, rtx nxtb)
+{
+  rtx insn = head;
+
+  if (!insn || insn == nxtb)
+     return 0;
+
+  while ((insn = NEXT_INSN (insn)) && insn != nxtb)
+    {
+      int found;
+
+      if (NOTE_INSN_BASIC_BLOCK_P(insn)) /* See scan_1_bb_for_store() */
+        break;
+      
+      found = check_insn_for_store (0, insn);
+      if (found < 0)
+        break;
+      if (found > 0)
+        {
+          /* found critical store instruction */
+          insn = gen_rtx_ASM_INPUT (VOIDmode,
+                                    ASM_R10K_CACHE_BARRIER "\t"
+                                    ASM_COMMENT_START " Cache Barrier");
+          /* Here we rely on the assumption, that an explicit delay-slot
+             - if any - is already embedded (in a sequence) in 'head'! */
+          insn = emit_insn_after (insn, head);
+          return 1;
+        }
+    }
+  return 0;
+}
+
+
+/* Scan one basic block for a possibly harmful store instruction.
+   If found, insert a cache barrier at the start of this block,
+   return number of inserted cache_barriers. */
+
+static int
+scan_1_bb_for_store (rtx head, rtx end)
+{
+  rtx nxtb;
+  int count;
+
+  /* Note: 'end' is not necessarily reached from 'head' (hidden in
+     SEQUENCE, PARALLEL), but 'nxtb' is. */
+  nxtb = NEXT_INSN (end);
+
+  /* Each basic block starts with zero or more CODE_LABEL(s), followed
+     by one NOTE_INSN_BASIC_BLOCK.
+     Note: bb_head may equal next_insn(bb_end) already ! */
+  while (head && head != nxtb && LABEL_P (head))
+    head = NEXT_INSN (head);
+
+  if (!head || head == nxtb)
+    return 0;
+
+  /* Handle the basic block itself, at most up to next CALL_INSN. */
+  count = bb_insert_store_cache_barrier (head, nxtb);
+
+  /* 1) Handle any CALL_INSN instruction like a conditional branch.
+     2) There may be "basic blocks" in the list, which are no basic blocks
+        at all. (containing CODE_LABELs in the body or gathering several
+        other basic blocks (e.g. bb5 containing bb6,bb7,bb8)). */
+
+  while ((head = NEXT_INSN (head)) && head != nxtb)
+    {
+      if (INSN_DELETED_P (head))
+        continue;
+
+      /* Later we'll be called again for this bb on its own. */
+      if (NOTE_INSN_BASIC_BLOCK_P(head))
+        break;
+
+      if (CALL_P (SEQ_BEGIN (head)) || LABEL_P (head))
+        count += bb_insert_store_cache_barrier (head, nxtb);
+    }
+  return count;
+}
+
+static int
+r10k_insert_cache_barriers (void)
+{
+  if (mips_r10k_cache_barrier)
+    {
+      basic_block bb;
+
+      const char *s = mips_r10k_cache_barrier;
+      /* Default is to protect stores (only). */
+      TARGET_R10K_SPECEX = 1 | strtol(*s != '=' ? s:s+1, (char**)0, 0);
+
+      FOR_EACH_BB (bb)
+        if (0 <= bb->index)
+          scan_1_bb_for_store (BB_HEAD (bb), BB_END (bb));
+    }
+  return 0;
+}