Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/fs.h>
3 : #include <linux/random.h>
4 : #include <linux/buffer_head.h>
5 : #include <linux/utsname.h>
6 : #include <linux/kthread.h>
7 :
8 : #include "ext4.h"
9 :
10 : /* Checksumming functions */
11 0 : static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
12 : {
13 0 : struct ext4_sb_info *sbi = EXT4_SB(sb);
14 0 : int offset = offsetof(struct mmp_struct, mmp_checksum);
15 0 : __u32 csum;
16 :
17 0 : csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
18 :
19 0 : return cpu_to_le32(csum);
20 : }
21 :
22 0 : static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
23 : {
24 0 : if (!ext4_has_metadata_csum(sb))
25 : return 1;
26 :
27 0 : return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28 : }
29 :
30 0 : static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31 : {
32 0 : if (!ext4_has_metadata_csum(sb))
33 : return;
34 :
35 0 : mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
36 : }
37 :
38 : /*
39 : * Write the MMP block using REQ_SYNC to try to get the block on-disk
40 : * faster.
41 : */
42 0 : static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
43 : {
44 0 : struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
45 :
46 : /*
47 : * We protect against freezing so that we don't create dirty buffers
48 : * on frozen filesystem.
49 : */
50 0 : sb_start_write(sb);
51 0 : ext4_mmp_csum_set(sb, mmp);
52 0 : lock_buffer(bh);
53 0 : bh->b_end_io = end_buffer_write_sync;
54 0 : get_bh(bh);
55 0 : submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);
56 0 : wait_on_buffer(bh);
57 0 : sb_end_write(sb);
58 0 : if (unlikely(!buffer_uptodate(bh)))
59 0 : return 1;
60 :
61 : return 0;
62 : }
63 :
64 : /*
65 : * Read the MMP block. It _must_ be read from disk and hence we clear the
66 : * uptodate flag on the buffer.
67 : */
68 0 : static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
69 : ext4_fsblk_t mmp_block)
70 : {
71 0 : struct mmp_struct *mmp;
72 0 : int ret;
73 :
74 0 : if (*bh)
75 0 : clear_buffer_uptodate(*bh);
76 :
77 : /* This would be sb_bread(sb, mmp_block), except we need to be sure
78 : * that the MD RAID device cache has been bypassed, and that the read
79 : * is not blocked in the elevator. */
80 0 : if (!*bh) {
81 0 : *bh = sb_getblk(sb, mmp_block);
82 0 : if (!*bh) {
83 0 : ret = -ENOMEM;
84 0 : goto warn_exit;
85 : }
86 : }
87 :
88 0 : lock_buffer(*bh);
89 0 : ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
90 0 : if (ret)
91 0 : goto warn_exit;
92 :
93 0 : mmp = (struct mmp_struct *)((*bh)->b_data);
94 0 : if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
95 0 : ret = -EFSCORRUPTED;
96 0 : goto warn_exit;
97 : }
98 0 : if (!ext4_mmp_csum_verify(sb, mmp)) {
99 0 : ret = -EFSBADCRC;
100 0 : goto warn_exit;
101 : }
102 : return 0;
103 0 : warn_exit:
104 0 : brelse(*bh);
105 0 : *bh = NULL;
106 0 : ext4_warning(sb, "Error %d while reading MMP block %llu",
107 : ret, mmp_block);
108 0 : return ret;
109 : }
110 :
111 : /*
112 : * Dump as much information as possible to help the admin.
113 : */
114 0 : void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
115 : const char *function, unsigned int line, const char *msg)
116 : {
117 0 : __ext4_warning(sb, function, line, "%s", msg);
118 0 : __ext4_warning(sb, function, line,
119 : "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
120 0 : (unsigned long long)le64_to_cpu(mmp->mmp_time),
121 0 : (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
122 0 : (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
123 0 : }
124 :
125 : /*
126 : * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
127 : */
128 0 : static int kmmpd(void *data)
129 : {
130 0 : struct super_block *sb = ((struct mmpd_data *) data)->sb;
131 0 : struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
132 0 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
133 0 : struct mmp_struct *mmp;
134 0 : ext4_fsblk_t mmp_block;
135 0 : u32 seq = 0;
136 0 : unsigned long failed_writes = 0;
137 0 : int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
138 0 : unsigned mmp_check_interval;
139 0 : unsigned long last_update_time;
140 0 : unsigned long diff;
141 0 : int retval;
142 :
143 0 : mmp_block = le64_to_cpu(es->s_mmp_block);
144 0 : mmp = (struct mmp_struct *)(bh->b_data);
145 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
146 : /*
147 : * Start with the higher mmp_check_interval and reduce it if
148 : * the MMP block is being updated on time.
149 : */
150 0 : mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
151 : EXT4_MMP_MIN_CHECK_INTERVAL);
152 0 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
153 0 : BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
154 0 : bdevname(bh->b_bdev, mmp->mmp_bdevname);
155 :
156 0 : memcpy(mmp->mmp_nodename, init_utsname()->nodename,
157 : sizeof(mmp->mmp_nodename));
158 :
159 0 : while (!kthread_should_stop()) {
160 0 : if (++seq > EXT4_MMP_SEQ_MAX)
161 0 : seq = 1;
162 :
163 0 : mmp->mmp_seq = cpu_to_le32(seq);
164 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
165 0 : last_update_time = jiffies;
166 :
167 0 : retval = write_mmp_block(sb, bh);
168 : /*
169 : * Don't spew too many error messages. Print one every
170 : * (s_mmp_update_interval * 60) seconds.
171 : */
172 0 : if (retval) {
173 0 : if ((failed_writes % 60) == 0) {
174 0 : ext4_error_err(sb, -retval,
175 : "Error writing to MMP block");
176 : }
177 0 : failed_writes++;
178 : }
179 :
180 0 : if (!(le32_to_cpu(es->s_feature_incompat) &
181 : EXT4_FEATURE_INCOMPAT_MMP)) {
182 0 : ext4_warning(sb, "kmmpd being stopped since MMP feature"
183 : " has been disabled.");
184 0 : goto exit_thread;
185 : }
186 :
187 0 : if (sb_rdonly(sb))
188 : break;
189 :
190 0 : diff = jiffies - last_update_time;
191 0 : if (diff < mmp_update_interval * HZ)
192 0 : schedule_timeout_interruptible(mmp_update_interval *
193 0 : HZ - diff);
194 :
195 : /*
196 : * We need to make sure that more than mmp_check_interval
197 : * seconds have not passed since writing. If that has happened
198 : * we need to check if the MMP block is as we left it.
199 : */
200 0 : diff = jiffies - last_update_time;
201 0 : if (diff > mmp_check_interval * HZ) {
202 0 : struct buffer_head *bh_check = NULL;
203 0 : struct mmp_struct *mmp_check;
204 :
205 0 : retval = read_mmp_block(sb, &bh_check, mmp_block);
206 0 : if (retval) {
207 0 : ext4_error_err(sb, -retval,
208 : "error reading MMP data: %d",
209 : retval);
210 0 : goto exit_thread;
211 : }
212 :
213 0 : mmp_check = (struct mmp_struct *)(bh_check->b_data);
214 0 : if (mmp->mmp_seq != mmp_check->mmp_seq ||
215 0 : memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
216 : sizeof(mmp->mmp_nodename))) {
217 0 : dump_mmp_msg(sb, mmp_check,
218 : "Error while updating MMP info. "
219 : "The filesystem seems to have been"
220 : " multiply mounted.");
221 0 : ext4_error_err(sb, EBUSY, "abort");
222 0 : put_bh(bh_check);
223 0 : retval = -EBUSY;
224 0 : goto exit_thread;
225 : }
226 0 : put_bh(bh_check);
227 : }
228 :
229 : /*
230 : * Adjust the mmp_check_interval depending on how much time
231 : * it took for the MMP block to be written.
232 : */
233 0 : mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
234 : EXT4_MMP_MAX_CHECK_INTERVAL),
235 : EXT4_MMP_MIN_CHECK_INTERVAL);
236 0 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
237 : }
238 :
239 : /*
240 : * Unmount seems to be clean.
241 : */
242 0 : mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
243 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
244 :
245 0 : retval = write_mmp_block(sb, bh);
246 :
247 0 : exit_thread:
248 0 : EXT4_SB(sb)->s_mmp_tsk = NULL;
249 0 : kfree(data);
250 0 : brelse(bh);
251 0 : return retval;
252 : }
253 :
254 : /*
255 : * Get a random new sequence number but make sure it is not greater than
256 : * EXT4_MMP_SEQ_MAX.
257 : */
258 0 : static unsigned int mmp_new_seq(void)
259 : {
260 0 : u32 new_seq;
261 :
262 0 : do {
263 0 : new_seq = prandom_u32();
264 0 : } while (new_seq > EXT4_MMP_SEQ_MAX);
265 :
266 0 : return new_seq;
267 : }
268 :
269 : /*
270 : * Protect the filesystem from being mounted more than once.
271 : */
272 0 : int ext4_multi_mount_protect(struct super_block *sb,
273 : ext4_fsblk_t mmp_block)
274 : {
275 0 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
276 0 : struct buffer_head *bh = NULL;
277 0 : struct mmp_struct *mmp = NULL;
278 0 : struct mmpd_data *mmpd_data;
279 0 : u32 seq;
280 0 : unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
281 0 : unsigned int wait_time = 0;
282 0 : int retval;
283 :
284 0 : if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
285 0 : mmp_block >= ext4_blocks_count(es)) {
286 0 : ext4_warning(sb, "Invalid MMP block in superblock");
287 0 : goto failed;
288 : }
289 :
290 0 : retval = read_mmp_block(sb, &bh, mmp_block);
291 0 : if (retval)
292 0 : goto failed;
293 :
294 0 : mmp = (struct mmp_struct *)(bh->b_data);
295 :
296 0 : if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
297 : mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
298 :
299 : /*
300 : * If check_interval in MMP block is larger, use that instead of
301 : * update_interval from the superblock.
302 : */
303 0 : if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
304 : mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
305 :
306 0 : seq = le32_to_cpu(mmp->mmp_seq);
307 0 : if (seq == EXT4_MMP_SEQ_CLEAN)
308 0 : goto skip;
309 :
310 0 : if (seq == EXT4_MMP_SEQ_FSCK) {
311 0 : dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
312 0 : goto failed;
313 : }
314 :
315 0 : wait_time = min(mmp_check_interval * 2 + 1,
316 : mmp_check_interval + 60);
317 :
318 : /* Print MMP interval if more than 20 secs. */
319 0 : if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
320 0 : ext4_warning(sb, "MMP interval %u higher than expected, please"
321 : " wait.\n", wait_time * 2);
322 :
323 0 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
324 0 : ext4_warning(sb, "MMP startup interrupted, failing mount\n");
325 0 : goto failed;
326 : }
327 :
328 0 : retval = read_mmp_block(sb, &bh, mmp_block);
329 0 : if (retval)
330 0 : goto failed;
331 0 : mmp = (struct mmp_struct *)(bh->b_data);
332 0 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
333 0 : dump_mmp_msg(sb, mmp,
334 : "Device is already active on another node.");
335 0 : goto failed;
336 : }
337 :
338 0 : skip:
339 : /*
340 : * write a new random sequence number.
341 : */
342 0 : seq = mmp_new_seq();
343 0 : mmp->mmp_seq = cpu_to_le32(seq);
344 :
345 0 : retval = write_mmp_block(sb, bh);
346 0 : if (retval)
347 0 : goto failed;
348 :
349 : /*
350 : * wait for MMP interval and check mmp_seq.
351 : */
352 0 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
353 0 : ext4_warning(sb, "MMP startup interrupted, failing mount");
354 0 : goto failed;
355 : }
356 :
357 0 : retval = read_mmp_block(sb, &bh, mmp_block);
358 0 : if (retval)
359 0 : goto failed;
360 0 : mmp = (struct mmp_struct *)(bh->b_data);
361 0 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
362 0 : dump_mmp_msg(sb, mmp,
363 : "Device is already active on another node.");
364 0 : goto failed;
365 : }
366 :
367 0 : mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
368 0 : if (!mmpd_data) {
369 0 : ext4_warning(sb, "not enough memory for mmpd_data");
370 0 : goto failed;
371 : }
372 0 : mmpd_data->sb = sb;
373 0 : mmpd_data->bh = bh;
374 :
375 : /*
376 : * Start a kernel thread to update the MMP block periodically.
377 : */
378 0 : EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
379 : (int)sizeof(mmp->mmp_bdevname),
380 : bdevname(bh->b_bdev,
381 : mmp->mmp_bdevname));
382 0 : if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
383 0 : EXT4_SB(sb)->s_mmp_tsk = NULL;
384 0 : kfree(mmpd_data);
385 0 : ext4_warning(sb, "Unable to create kmmpd thread for %s.",
386 : sb->s_id);
387 0 : goto failed;
388 : }
389 :
390 : return 0;
391 :
392 0 : failed:
393 0 : brelse(bh);
394 : return 1;
395 : }
396 :
397 :
|