summaryrefslogtreecommitdiff
blob: 8fde50376e798f2ba0f2239f6a13e7b6fcd3b940 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
<?php
/**
 * Normalizes throttle parameters as part of the overhaul described in T203587
 *
 * Tasks performed by this script:
 * - Remove duplicated throttle groups (T203584)
 * - Remove unrecognized stuff from throttle groups (T203584)
 * - Checks if throttle count or period have extra commas inside. If this leads to the filter acting
 *   like it would with throttle disabled, we just disable it. Otherwise, since we don't know what
 *   the filter is meant to do, we just ask users to evaluate and fix every case by hand. This is
 *   highly unlikely to happen anyway. (T203585)
 * - If throttle groups are empty (or only contain unknown keywords), ask users to fix every case
 *   by hand. (T203584)
 * - Change some edge cases of throttle parameters saved in abuse_filter_history (T215787):
 *     - parameters = null ==> parameters = [ filterID, "0,0", 'none' ]
 *     - at least a number missing from parameters[1] ==> insert 0 in place of the missing param
 *     - empty groups ==> 'none' (special case, uses the message abusefilter-throttle-none)
 *
 * @ingroup Maintenance
 */
if ( getenv( 'MW_INSTALL_PATH' ) ) {
	$IP = getenv( 'MW_INSTALL_PATH' );
} else {
	$IP = __DIR__ . '/../../..';
}
require_once "$IP/maintenance/Maintenance.php";

/**
 * Normalizes throttle parameters, see T203587
 * @codeCoverageIgnore
 * No need to cover: old, single-use script.
 */
class NormalizeThrottleParameters extends LoggedUpdateMaintenance {
	public function __construct() {
		parent::__construct();

		$this->addDescription( 'Normalize AbuseFilter throttle parameters - T203587' );
		$this->addOption( 'dry-run', 'Perform a dry run' );
		$this->requireExtension( 'Abuse Filter' );
	}

	/**
	 * @see Maintenance::getUpdateKey()
	 * @return string
	 */
	public function getUpdateKey() {
		return __CLASS__;
	}

	/** @var \Wikimedia\Rdbms\Database $dbw The master database */
	private $dbw;

	/**
	 * Rollback the current transaction and emit a fatal error
	 *
	 * @param string $msg The message of the error
	 */
	protected function fail( $msg ) {
		$this->rollbackTransaction( $this->dbw, __METHOD__ );
		$this->fatalError( $msg );
	}

	/**
	 * Get normalized throttle groups
	 *
	 * @param array $params Throttle parameters
	 * @return array[] The first element is the array of old throttle groups, the second
	 * is an array of formatted throttle groups
	 */
	private function getNewGroups( $params ) {
		$validGroups = [
			'ip',
			'user',
			'range',
			'creationdate',
			'editcount',
			'site',
			'page'
		];
		$rawGroups = array_slice( $params, 2 );
		$newGroups = [];
		// We use a standard order to check for duplicates. This variable is not used as the actual
		// array of groups to avoid silly changes like 'ip,user' => 'user,ip'. In this variable we also
		// store trimmed groups, so that 'ip, user' is considered to be the same as 'ip,user', just
		// as the actual code does. And again, we don't want to edit the filter just to strip spaces.
		$normalizedGroups = [];
		// Every group should be valid, and subgroups should have valid groups inside. Only keep
		// valid (sub)groups.
		foreach ( $rawGroups as $group ) {
			// Groups must be lowercase.
			$group = strtolower( $group );
			if ( strpos( $group, ',' ) !== false ) {
				// No duplicates in subgroups
				$subGroups = array_unique( explode( ',', $group ) );
				$uniqueGroup = implode( ',', $subGroups );
				$valid = true;
				foreach ( $subGroups as $subGroup ) {
					if ( !in_array( trim( $subGroup ), $validGroups ) ) {
						$valid = false;
						break;
					}
				}
				sort( $subGroups );
				if ( $valid && !in_array( $subGroups, $normalizedGroups ) ) {
					$newGroups[] = $uniqueGroup;
					$normalizedGroups[] = array_map( 'trim', $subGroups );
				}
			} elseif ( in_array( trim( $group ), $validGroups ) ) {
				$newGroups[] = $group;
				$normalizedGroups[] = trim( $group );
			}
		}

		// Remove duplicates
		$newGroups = array_unique( $newGroups );

		return [ $rawGroups, $newGroups ];
	}

	/**
	 * Check if throttle rate is malformed, i.e. if it has extra commas or a part of it is empty
	 *
	 * @param string $rate The throttle rate as saved in the DB ("count,period")
	 * @return string|null String with error type or null if the rate is valid
	 */
	private function checkThrottleRate( $rate ) {
		if ( preg_match( '/^,/', $rate ) === 1 ) {
			// The comma was inserted at least in throttle count. This behaves like if
			// throttling isn't enabled, so just disable it
			return 'disable';
		} elseif ( preg_match( '/^\d+,$/', $rate ) === 1 || preg_match( '/^\d+,\d+$/', $rate ) === 0 ) {
			// First condition is for comma only inside throttle period. The behaviour in this case
			// is unclear, ask users to fix this by hand. Second condition is for every other case;
			// since it's unpredictable what the problem is, we just ask to fix it by hand.
			return 'hand';
		} else {
			return null;
		}
	}

	/**
	 * Main logic of parameters normalization
	 *
	 * @return int Amount of normalized rows
	 */
	protected function normalizeParameters() {
		$user = AbuseFilter::getFilterUser();
		$dryRun = $this->hasOption( 'dry-run' );

		// IDs of filters with invalid rate (count or period)
		$invalidRate = [];
		// IDs of filters with invalid groups
		$invalidGroups = [];
		// IDs of filters where throttle parameters are completely empty, and even the filter ID is
		// missing. This happened for filters containing a throttle group with a comma inside which
		// were modified between the OOUI switch (gerrit/421487) and throttle repair (gerrit/459368):
		// a bug caused all existing throttle parameters to be wiped away, so that afa_consequence
		// holds an empty string and (unserialize(afh_actions))['throttle'] is null.
		$totallyEmpty = [];

		// Only select throttle actions
		$actionRows = $this->dbw->select(
			'abuse_filter_action',
			[ 'afa_filter', 'afa_parameters' ],
			[ 'afa_consequence' => 'throttle' ],
			__METHOD__,
			[ 'LOCK IN SHARE MODE' ]
		);

		$newActionRows = [];
		// Save new, sanitized throttle parameters to be copied in abuse_filter_history.
		// The structure is [ filterID => val ] where "val" is either an array with new params
		// or null if throttle must be removed.
		$historyThrottleParams = [];
		$deleteActionIDs = [];
		$changeActionIDs = [];
		foreach ( $actionRows as $actRow ) {
			$filter = $actRow->afa_filter;

			if ( $actRow->afa_parameters === '' ) {
				// All parameters are empty. See comment above the declaration of $totallyEmpty for
				// why this happens. Definitely to be fixed by hand, without further checks.
				$totallyEmpty[] = $filter;
				continue;
			}

			$params = explode( "\n", $actRow->afa_parameters );
			$rateCheck = $this->checkThrottleRate( $params[1] );
			list( $oldGroups, $newGroups ) = $this->getNewGroups( $params );

			// If the rate is invalid or the groups are empty (or only contain invalid identifiers),
			// it means that the throttle limit is never reached. Since we cannot guess what the
			// filter should do, nor we want to impose a default, we ask to manually fix the problem.
			if ( $rateCheck === 'hand' ) {
				$invalidRate[] = $filter;
			}
			if ( count( $newGroups ) === 0 ) {
				$invalidGroups[] = $filter;
			}
			if ( $rateCheck === 'hand' || count( $newGroups ) === 0 ) {
				continue;
			}

			if ( $rateCheck === 'disable' ) {
				// Invalid rate, disable throttle for the filter
				$deleteActionIDs[] = $actRow->afa_filter;
				$historyThrottleParams[ $actRow->afa_filter ] = null;
			} elseif ( $oldGroups !== $newGroups ) {
				$newParams = array_merge( array_slice( $params, 0, 2 ), $newGroups );
				$newActionRows[] = [
					'afa_filter' => $actRow->afa_filter,
					'afa_consequence' => 'throttle',
					'afa_parameters' => implode( "\n", $newParams )
				];
				$changeActionIDs[] = $actRow->afa_filter;
				$historyThrottleParams[ $actRow->afa_filter ] = $newParams;
			} else {
				// The filter is not broken!
				continue;
			}
		}

		if ( $invalidRate || $invalidGroups || $totallyEmpty ) {
			$invalidMsg = '';
			if ( $invalidRate ) {
				$invalidMsg .= 'Throttle count and period are malformed or empty for the following filters: ' .
					implode( ', ', $invalidRate ) . '. ' .
					"Please fix them by hand in the way they're meant to be, then launch the script again.\n";
			}
			if ( $invalidGroups ) {
				$invalidMsg .= 'Throttle groups are empty for the following filters: ' .
					implode( ', ', $invalidGroups ) . '. ' .
					"Please add some groups or disable throttling, then launch the script again.\n";
			}
			if ( $totallyEmpty ) {
				$invalidMsg .= 'Throttle parameters are empty for the following filters: ' .
					implode( ', ', $totallyEmpty ) . '. ' .
					'This was probably caused by a temporary bug and you should be able to find valid ' .
					"parameters in each filter's history. Please restore them, then launch the script again.\n";
			}

			$this->fail( $invalidMsg );
		}

		// Use the same timestamps in abuse_filter and abuse_filter_history, since this is
		// what we do in the actual code.
		$timestamps = [];
		$changeActionCount = count( $changeActionIDs );
		if ( $changeActionCount ) {
			if ( $dryRun ) {
				$this->output(
					"normalizeThrottleParameter has found $changeActionCount rows to change in " .
					"abuse_filter_action for the following IDs: " . implode( ', ', $changeActionIDs ) . "\n"
				);
			} else {
				$this->dbw->replace(
					'abuse_filter_action',
					[ [ 'afa_filter', 'afa_consequence' ] ],
					$newActionRows,
					__METHOD__
				);
				// Touch the abuse_filter table to update the "filter last modified" field
				foreach ( $changeActionIDs as $id ) {
					$timestamps[ $id ] = $this->dbw->timestamp();

					$this->dbw->update(
						'abuse_filter',
						[
							'af_user' => $user->getId(),
							'af_user_text' => $user->getName(),
							'af_timestamp' => $timestamps[ $id ]
						],
						[ 'af_id' => $id ],
						__METHOD__
					);
				}
			}
		}

		$deleteActionCount = count( $deleteActionIDs );
		if ( $deleteActionCount ) {
			if ( $dryRun ) {
				$this->output(
					"normalizeThrottleParameter has found $deleteActionCount rows to delete in " .
					"abuse_filter_action and update in abuse_filter for the following IDs: " .
					implode( ', ', $deleteActionIDs ) . "\n"
				);
			} else {
				// Delete rows in abuse_filter_action
				$this->dbw->delete(
					'abuse_filter_action',
					[
						'afa_consequence' => 'throttle',
						'afa_filter' => $deleteActionIDs
					],
					__METHOD__
				);
				// Update abuse_filter. abuse_filter_history done later
				foreach ( $deleteActionIDs as $id ) {
					$timestamps[ $id ] = $this->dbw->timestamp();

					$this->dbw->update(
						'abuse_filter',
						[
							'af_user' => $user->getId(),
							'af_user_text' => $user->getName(),
							'af_timestamp' => $timestamps[ $id ],
							// Use string replacement so that we can avoid an extra query to retrieve the
							// value and then explode, remove throttle and implode again.
							'af_actions = ' . $this->dbw->strreplace(
								$this->dbw->strreplace( 'af_actions', "',throttle'", "''" ),
								"'throttle'",
								"''"
							)
						],
						[ 'af_id' => $id ],
						__METHOD__
					);
				}
			}
		}
		$affectedActionRows = $changeActionCount + $deleteActionCount;

		$touchedIDs = array_merge( $changeActionIDs, $deleteActionIDs );
		if ( count( $touchedIDs ) === 0 ) {
			$this->output( "No throttle parameters to normalize.\n" );
			return 0;
		}

		// Create new history rows for every changed filter

		$newHistoryRows = [];
		$changeHistoryFilters = [];
		foreach ( $touchedIDs as $filter ) {
			$histRow = $this->dbw->selectRow(
				'abuse_filter_history',
				[
					// All columns in the table, aside from afh_id that we don't need, and the
					// ones where we're going to put something new, plus afh_actions.
					'afh_filter',
					'afh_pattern',
					'afh_comments',
					'afh_flags',
					'afh_public_comments',
					'afh_deleted',
					'afh_group',
					'afh_actions'
				],
				[ 'afh_filter' => $filter ],
				__METHOD__,
				[ 'ORDER BY' => 'afh_id DESC', 'LOCK IN SHARE MODE' ]
			);

			if ( !isset( $historyThrottleParams[ $filter ] ) ) {
				// Sanity
				$this->fail( "Throttle parameters weren't saved for filter $filter" );
			}

			$timestamp = $timestamps[ $filter ] ?? null;
			if ( !$timestamp && !$dryRun ) {
				// Sanity check
				$this->fail( "The timestamp wasn't saved for filter $filter" );
			}

			$actions = unserialize( $histRow->afh_actions );
			if ( $historyThrottleParams[ $filter ] === null ) {
				// Invalid rate, disable throttle for the filter
				unset( $actions['throttle'] );
			} else {
				$actions['throttle'] = $historyThrottleParams[ $filter ];
			}

			$newHistoryRows[] = [
				'afh_user' => $user->getId(),
				'afh_user_text' => $user->getName(),
				'afh_timestamp' => $timestamp,
				'afh_changed_fields' => 'actions',
				'afh_actions' => serialize( $actions )
			] + get_object_vars( $histRow );
			$changeHistoryFilters[] = $filter;
		}

		$historyCount = count( $changeHistoryFilters );
		if ( $historyCount !== $affectedActionRows ) {
			// Sanity: prevent unexpected errors.
			$this->fail(
				"The amount of affected rows isn't equal for abuse_filter_action and abuse_filter history. " .
				"Found $affectedActionRows for the former and $historyCount for the latter."
			);
		}
		if ( count( $newHistoryRows ) ) {
			if ( $dryRun ) {
				$this->output(
					"normalizeThrottleParameter would insert $historyCount rows in abuse_filter_history" .
					" for the following filters: " . implode( ', ', $changeHistoryFilters ) . "\n"
				);
			} else {
				$this->dbw->insert(
					'abuse_filter_history',
					$newHistoryRows,
					__METHOD__
				);
			}
		}
		return $affectedActionRows + $historyCount;
	}

	/**
	 * Beautify empty/missing/corrupted parameters in abuse_filter_history
	 *
	 * @return int Amount of beautified rows
	 */
	protected function beautifyHistory() {
		$dryRun = $this->hasOption( 'dry-run' );

		// We need any row containing throttle, but there's no
		// need to lock as these rows aren't changed by the actual code.
		$likeClause = $this->dbw->buildLike(
			$this->dbw->anyString(),
			'throttle',
			$this->dbw->anyString()
		);
		$histRows = $this->dbw->select(
			'abuse_filter_history',
			[ 'afh_id', 'afh_actions', 'afh_filter' ],
			[ 'afh_actions ' . $likeClause ],
			__METHOD__
		);

		$beautyIDs = [];
		foreach ( $histRows as $row ) {
			$acts = unserialize( $row->afh_actions );
			if ( !array_key_exists( 'throttle', $acts ) ) {
				// The LIKE clause is very raw, so this could happen
				continue;
			}

			if ( $acts['throttle'] === null ) {
				// Corrupted row, rebuild it (T215787)
				$acts['throttle'] = [ $row->afh_filter, '0,0', 'none' ];
			} elseif ( $this->checkThrottleRate( $acts['throttle'][1] ) !== null ) {
				// Missing count, make it explicitly 0
				$acts['throttle'][1] = preg_replace( '/^,/', '0,', $acts['throttle'][1] );
				// Missing period, make it explicitly 0
				$acts['throttle'][1] = preg_replace( '/,$/', ',0', $acts['throttle'][1] );
			} elseif ( count( $acts['throttle'] ) === 2 ) {
				// Missing groups, make them explicitly "none" (special group)
				$acts['throttle'][] = 'none';
			} else {
				// Everything's fine!
				continue;
			}

			$beautyIDs[] = $row->afh_id;
			if ( !$dryRun ) {
				$this->dbw->update(
					'abuse_filter_history',
					[ 'afh_actions' => serialize( $acts ) ],
					[ 'afh_id' => $row->afh_id ],
					__METHOD__
				);
			}
		}

		$changed = count( $beautyIDs );
		if ( $changed ) {
			$verb = $dryRun ? 'would beautify' : 'beautified';
			$this->output(
				"normalizeThrottleParameter $verb $changed rows in abuse_filter_history" .
				" for the following history IDs: " . implode( ', ', $beautyIDs ) . "\n"
			);
		}
		return $changed;
	}

	/**
	 * @inheritDoc
	 */
	public function doDBUpdates() {
		$dryRun = $this->hasOption( 'dry-run' );
		$this->dbw = wfGetDB( DB_MASTER );
		$this->beginTransaction( $this->dbw, __METHOD__ );

		$normalized = $this->normalizeParameters();
		$beautified = $this->beautifyHistory();

		$this->commitTransaction( $this->dbw, __METHOD__ );

		$changed = $normalized + $beautified;

		$resultMsg = $dryRun ?
			"Throttle parameter normalization would change a total of $changed rows.\n" :
			"Throttle parameters successfully normalized. Changed $changed rows.\n";
		$this->output( $resultMsg );

		return !$dryRun;
	}
}

$maintClass = NormalizeThrottleParameters::class;
require_once RUN_MAINTENANCE_IF_MAIN;