aboutsummaryrefslogtreecommitdiff
blob: 50c6dbc26845b065e92ee057122080e82e684fd2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#!/bin/bash
# Copyright 2005-2015 Gentoo Foundation; Distributed under the GPL v2
# might be earlier copyright, no history available

# 0) Make a backup incase there is something wrong with server side
# 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging
#    - cvs.g.o::mastermirror-staging is populated by script mastermirror-staging.sh
# 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR
# 1c) source (bash -n) eclasses to check for syntax errors
# 2) generate metadata (egencache)
# 3) place dtd info & xml schemas in STAGEDIR
# 4) place glsa's in STAGEDIR
# 5) place news in STAGEDIR
# 6) place projects.xml in STAGEDIR
# 7) rsync from STAGEDIR to FINALDIR
# 8) cache generated data
# 9) rsync from STAGEDIR to REPODIR ### testing

################################################################################
# NOTE: You MUST run git-restore-mtime -m on the git checkouts hosted in
# git.gentoo.org::mastermirror-staging/ or this WILL NOT WORK.
# You can get git-restore-mtime here:
# https://github.com/MestreLion/git-tools/blob/master/git-restore-mtime
################################################################################

umask 022
source /usr/local/bin/mastermirror/rsync-gen.vars

# block_start and block___end are named to keep
function _log() {
	logfmt_what_width=22
	logfmt_time_width=40
	ts="@$(date +%s)/$(date -u)"
	printf "%-5s %-${logfmt_what_width}s %${logfmt_time_width}s\n" "$1" "$2" "$ts"
}
function timelog_start() {
	_log "START" "$1"
}
function timelog___end() {
	_log "END" "$1"
}
function timelog_MARKER() {
	perl -e 'print "-"x70,"\n";'
}

# We want to exclude many RCS files, but we have some exceptions to the 'cvs-exclude' list:
# - 'core' is actually a valid package name that we do use for dev-ml/core.
# - '*.old' is used by 'net-p2p/freenet/files/freenet.old'
RSYNC_GIT_EXCLUDE="--include core/ --include '*.old' --cvs-exclude --exclude=.gitignore"
#--filter='dir-merge /.cvsignore' --filter='exclude .cvsignore'
#--filter='dir-merge /.gitignore' --filter='exclude .gitignore'

# sanity checking
[[ -d ${EXPORTS}  ]]		|| mkdir ${EXPORTS}
[[ -d ${STAGEDIR} ]]		|| mkdir ${STAGEDIR}
[[ -d ${FINALDIR} ]]		|| mkdir ${FINALDIR}
[[ -d ${REPODIR}  ]]		|| mkdir ${REPODIR}
[[ -d ${LOGDIR}   ]]		|| mkdir ${LOGDIR}
[[ -d ${BASE}/tmp ]]		|| mkdir ${BASE}/tmp
TIMESLOG="${LOGDIR}/rsync-gen-times.log"
for atomic_rsync in `which atomic-rsync 2>/dev/null` /usr/share/rsync/atomic-rsync MISSING ; do
  [ -x $atomic_rsync ] && break
done
if [ "$atomic_rsync" == "MISSING" ]; then
	echo "$0: Cannot do final atomic rsync into place, atomic-rsync tool is missing"
	exit 1
fi

function timelogger() {
  tee -a "$TIMESLOG" | logger -t rsync-gen-times -p INFO
}

TIME_ENTIRE_START=$(date -u +%s)
timelog_MARKER | timelogger
timelog_start "ENTIRE SCRIPT" | timelogger

# 0) Make a backup incase there is something wrong with server side
#echo "START	BACKUP		$(date -u)" | timelogger
#cp -al ${FINALDIR}/ ${FINALDIR}.bak/
#echo "END	BACKUP		$(date -u)" | timelogger

# 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging
PASSWD_FILE="/etc/mastermirror-fetch/gcvsd-rsync.rsync.passwd"
RSYNC="/usr/bin/rsync"
RSYNC_ARGS="--no-motd --recursive --times --links --port=60024 --password-file ${PASSWD_FILE} --ignore-errors --timeout=300 --checksum"
RSYNC_ARGS="${RSYNC_ARGS} --quiet --chmod=a+r,go-w"
RSYNC_ARGS_DELETE="--delete --delete-after --delete-excluded"

timelog_start "STAGING RSYNC" | timelogger
${RSYNC} ${RSYNC_ARGS} ${RSYNC_ARGS_DELETE} git.gentoo.org::mastermirror-staging/ ${EXPORTS}/
timelog___end "STAGING RSYNC" | timelogger
# end 1)

# start 1a)
# Temporary: import old CVS changelogs
# This will cause some churn, because it'll copy changelogs for packages which
# don't exist anymore, and then the next rsync will delete them.  You can reduce
# churn by occasionally culling dead packages from the repo
timelog_start "CHANGELOG-IMPORT" | timelogger
rsync -Wqa \
	$RSYNC_GIT_EXCLUDE \
	${EXPORTS}/changelogs/ ${STAGEDIR}
timelog___end "CHANGELOG-IMPORT" | timelogger
# end 1a)

# 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR

# timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR
# timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR
# We want to track the progress of STAGEDIR->FINALDIR so exclude .x file here
# (so it isn't deleted)
# Keep /metadata/cache around so the --rsync switch of egencache will work as
# designed
timelog_start "STAGEDIR RSYNC" | timelogger
rsync -Wqau \
	$RSYNC_GIT_EXCLUDE \
	--filter 'P /metadata/***' \
	--filter 'Pp Manifest' \
	--filter 'Pp ChangeLog*' \
	--delete --delete-excluded \
	${EXPORTS}/gentoo-x86/ ${STAGEDIR}/ || exit
timelog___end "STAGEDIR RSYNC" | timelogger
# end 1b)

# 1c) source (bash -n) eclasses to check for syntax errors
timelog_start "ECLASS CHECK" | timelogger
for i in ${STAGEDIR}/eclass/*.eclass; do
	bash -n $i || { echo "failed to source $i, exiting"; exit 1; }
done
timelog___end "ECLASS CHECK" | timelogger
# end 1c)

# 2) generate metadata (egencache)
# ${STAGEDIR}/metadata/cache is created automatically
export PORTAGE_USERNAME=gmirror PORTAGE_GRPNAME=gmirror
LOG_TIMESTAMP=$(date -u '+%Y%m%d-%H%M')
REGEN_LOG_FILE=regen-run-${LOG_TIMESTAMP}.log
REGEN_LOG_DIR=${LOGDIR}/regen
[[ -d ${REGEN_LOG_DIR} ]] || mkdir ${REGEN_LOG_DIR}

function parallel_repoman_manifest_THICKEN() {
	/usr/local/bin/mastermirror/thicken-manifests.py \
		${PARALLEL_PARAMS_noloadavg} \
		"${STAGEDIR}"
}

function parallel_repoman_manifest_REPOMAN() {
	find ${STAGEDIR} \
		-maxdepth 1 -mindepth 1 \
		-type d \
		! -name eclass \
		! -name scripts \
		! -name licenses \
		! -name metadata \
		! -name profiles \
		| parallel --no-notice \
		${PARALLEL_PARAMS/--load-average/--load} \
		'cd {} && repoman manifest' \
		>>${REGEN_LOG_DIR}/${REGEN_LOG_FILE}
}

# TODO: Implement a monitoring system that guarantees low-latency human
# intervention when necessary, and stop using --tolerant (bug #239266).

timelog_start "REGEN" | timelogger

# Ensure layout.conf is set to thin-manifests so egencache doesn't error
sed -i \
	-e '/^thin-manifests/s,false,true,g' \
	-e '/^sign-manifests/s,true,false,g' \
	-e '/^sign-commits/s,false,true,g' \
	${STAGEDIR}/metadata/layout.conf

# Only update the changelogs every 6 hours
# because right now it takes a very long time to do it (exceeding 1 hour at times).
HOURS=$(date +%H)
EGENCACHE_CHANGELOG=""
case $HOURS in
	03|09|15|21) EGENCACHE_CHANGELOG="--update-changelogs --changelog-reversed --changelog-output ChangeLog" ;;
esac

# for egencache, set user/group or make sure the user is in the portage group
	#--update-changelogs \
GIT_DIR=${EXPORTS}/gentoo-x86/.git/ \
egencache --update --rsync $PARALLEL_PARAMS \
	--tolerant --cache-dir=${BASE}/tmp/ \
	--repositories-configuration="
[gentoo]
location = ${STAGEDIR}
" \
	--update-use-local-desc \
	--repo=gentoo \
	$EGENCACHE_CHANGELOG \
	>> ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} 2>&1
rval=$?
timelog___end "REGEN" | timelogger

if [[ ${rval} != 0 ]]; then
	echo "$0: something happened with egencache, cowardly refusing to continue"
	echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}:"
	cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}"
	exit 5
fi

# don't save empty files
if [[ ! -s ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} ]]; then
	rm ${REGEN_LOG_DIR}/${REGEN_LOG_FILE}
fi

# Keep 30 days of logs
find ${REGEN_LOG_DIR} -type f -mtime +30 -print0 | xargs -0r rm

# Force Manifests to thick, unsigned
# also disable commit signing for now.
# TODO: add infra signing of Manifests
sed -i \
	-e '/^thin-manifests/s,true,false,g' \
	-e '/^sign-manifests/s,true,false,g' \
	-e '/^sign-commits/s,true,false,g' \
	${STAGEDIR}/metadata/layout.conf

# Thicken manifests
timelog_start "THICKEN" | timelogger
parallel_repoman_manifest_THICKEN
timelog___end "THICKEN" | timelogger

# Mark that metadata is done
date -u > ${STAGEDIR}/metadata/timestamp

timelog_start "MANIFEST-VALIDATE" | timelogger
cd ${STAGEDIR}
PORTDIR=${STAGEDIR} repoman manifest-check 2>&1 >${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate
rval=$?
if [[ ${rval} != 0 ]]; then
	echo "$0: A Manifest has a failure!"
	echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate:"
	cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate"
	exit 5
fi
timelog___end "MANIFEST-VALIDATE" | timelogger
# end 2)

# 3) place dtd info in STAGEDIR
timelog_start "DTD" | timelogger
rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/dtd ${STAGEDIR}/metadata/
date -R -u > ${STAGEDIR}/metadata/dtd/timestamp.chk
timelog___end "DTD" | timelogger
# end 3)

# 3b) place xml schemas in STAGEDIR
timelog_start "XML-SCHEMA" | timelogger
rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/xml-schema ${STAGEDIR}/metadata/
date -R -u > ${STAGEDIR}/metadata/xml-schema/timestamp.chk
timelog___end "XML-SCHEMA" | timelogger
# end 3b)

# 4) place glsa's in STAGEDIR
timelog_start "GLSA" | timelogger
rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/glsa ${STAGEDIR}/metadata/
date -R -u > ${STAGEDIR}/metadata/glsa/timestamp.chk
timelog___end "GLSA" | timelogger
# end 4)

# 5) place news in STAGEDIR
timelog_start "NEWS" | timelogger
rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/gentoo-news/. ${STAGEDIR}/metadata/news
date -R -u > ${STAGEDIR}/metadata/news/timestamp.chk
timelog___end "NEWS" | timelogger
# end 5)

# 6) place projects.xml in STAGEDIR
timelog_start "HERDS-AND-PROJ" | timelogger
rsync -Wqa --checksum ${EXPORTS}/projects/projects.xml ${STAGEDIR}/metadata/projects.xml
timelog___end "HERDS-AND-PROJ" | timelogger
# end 6)

# 7) rsync from STAGEDIR to FINALDIR
# note, call exit above if it is not desired to update the FINALDIR. This is
# where all the rsync nodes pull from.

# timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR
# timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR
# We want to track the progress of STAGEDIR->FINALDIR so exclude .chk file here
# Checksum is needed because we explicitly set the timestamp of Manifests/ChangeLogs.
# exclude of .git/CVS is just in case we still accidently created them.
timelog_start "FINAL RSYNC" | timelogger
FINALDIR_tmp=${FINALDIR%/}
mkdir -p "${FINALDIR_tmp}-1" "${FINALDIR_tmp}-2"
$atomic_rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \
	--exclude=.git --exclude=CVS --exclude=.gitignore \
	--chmod=u-s,g-s \
	${STAGEDIR}/ \
	${FINALDIR}/

# idea: "max-delete" - require infra approval if wc -l of bak - finaldir is >
# 500?
# The previous method of "detecting" some failure was --max-delete=100, this
# exposed breakage to users though.
#if [[ XXXX ]]; then
#	echo "Something went wrong, putting backup in place from last sync"
#	mv ${FINALDIR} ${FINALDIR}.broken
#	mv ${FINALDIR}.bak ${FINALDIR}
#fi

# Testcase for mirrors without --checksum:
# If a mirror is running WITHOUT --checksum
# They will get the file the first time around, but not any later versions.
# The date is deliberately set for just before this experiment started, at a
# very round timestamp, works out to 'Sun Jan 17 03:06:40 UTC 2016'.
ts=1453000000 f=$FINALDIR/metadata/.checksum-test-marker
cat >$f <<EOF
$(date +%s) $(date -u)
# Testcase for mirror network:
# Watch the value over a day, if unchanging then
# one or more mirrors is missing --checksum
# Please check bug #572168 for status (review for duplicates).
EOF
touch -d @$ts $f
# End of mirror --checksum testcase

timelog___end "FINAL RSYNC" | timelogger
# end 7)

# 8) cache generated data:
# md5-cache
# ChangeLog
timelog_start "CACHE RSYNC" | timelogger
CACHEDIR_md5cache=${CACHEDIR}/metadata_md5-cache
CACHEDIR_changelogs=${CACHEDIR}/changelogs
mkdir -p $CACHEDIR_md5cache $CACHEDIR_changelogs
# MD5 cache
rsync -Wqatm --delete ${STAGEDIR}/metadata/md5-cache/ $CACHEDIR_md5cache/
# ChangeLogs: we do NOT use --delete, as we want to keep old ChangeLogs
rsync -Wqatm --include 'ChangeLog' --include='*/' --exclude='*' ${STAGEDIR}/ $CACHEDIR_changelogs/
timelog___end "CACHE RSYNC" | timelogger

# 9) rsync from STAGEDIR to REPODIR ### testing
timelog_start "GIT RSYNC" | timelogger
rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \
	--exclude=.git --exclude=CVS --exclude=.gitignore \
	--chmod=u-s,g-s \
	--exclude ChangeLog* \
	${STAGEDIR}/ \
	${REPODIR}/ && \
( cd $REPODIR && \
git add -A && \
GIT_AUTHOR_NAME='Gentoo Infrastructure' \
GIT_AUTHOR_EMAIL='infra@gentoo.org' \
GIT_COMMITTER_NAME='Gentoo Infrastructure' \
GIT_COMMITTER_EMAIL='infra@gentoo.org' \
git commit -a -q -F /dev/fd/300 300<<-EOF
	rsync-gen.sh done!

	X-Started: @${TIME_ENTIRE_START}
	X-Finished: @$(date -u +%s)
EOF
)
timelog___end "GIT RSYNC" | timelogger
# end 7)

timelog___end "ENTIRE SCRIPT" | timelogger